PATH = .:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin LD_LIBRARY_PATH = /opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64 SLURM_JOB_NODELIST = n[001-002] num_proc = 2 host: n001 host: n002 ================================================================================================== mpiexec options: ---------------- Base path: /opt/mpich/bin/ Launcher: (null) Debug level: 1 Enable X: 1 Global environment: ------------------- SLURM_MPI_TYPE=pmi2 LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64 LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36: ODBG= SLURM_TASK_PID=36228 NEEDLES_NUM_MANAGERS=2 SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22 SLURM_PRIO_PROCESS=0 n_vars=-n_vars 54 CDC_PREW2KHOST=rocci all_orders=-all_orders 0 MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD LANG=en_US.UTF-8 SLURM_SUBMIT_DIR=/home/kmccall/Needles2 HISTCONTROL=ignoredups NEEDLES_INPUT_FILE=haystack_out.bin DISPLAY=localhost:20.0 HOSTNAME=n001 OLDPWD=/home/kmccall/Needles2 NEEDLES_OUTPUT_FILE=needles_out.txt ENVIRONMENT=BATCH PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1 LOADEDMODULES_modshare=intel/intelmpi:1 CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov SLURM_JOB_GID=513 CDC_LOCALHOST=rocci.ndc.nasa.gov work_dir=/home/kmccall/Needles2 SLURMD_NODENAME=n001 FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov met_name=-metric m_disp_contin which_declare=declare -f LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1 CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar XDG_SESSION_ID=187440 MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl USER=kmccall XTERM_SHELL=/bin/bash man_mas_cmd=./NeedlesMpiMM job_id_cmd=-job_id 065 PWD=/home/kmccall/Needles2 SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass SLURM_JOB_NODELIST=n[001-002] HOME=/home/kmccall SLURM_CLUSTER_NAME=cluster SSH_CLIENT=156.68.206.153 63270 22 SLURM_NTASKS=2 SLURM_JOB_CPUS_PER_NODE=24(x2) XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share SLURM_TOPOLOGY_ADDR=n001 _LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1 debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101 SLURM_JOB_NAME=RunNeedles.rocci.bash TMPDIR=/tmp LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib NEEDLES_METRIC_NAME=m_disp_contin SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf n_ndxs=-n_ndxs 1 LOADEDMODULES=intel/intelmpi XTERM_VERSION=XTerm(331) SLURM_NODE_ALIASES=(null) SLURM_JOB_QOS=normal SLURM_TOPOLOGY_ADDR_PATTERN=node DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947 MAIL=/var/spool/mail/kmccall SLURM_JOB_NUM_NODES=2 epoch_arg=-epoch 1643395222 SHELL=/bin/bash TERM=xterm EPOCH=1643395222 SLURM_JOB_UID=384580260 TC_LIB_DIR=/usr/lib64/tc NEEDLES_NUM_NDXS=1 STRACE= output_file=-output_file needles_out.txt SLURM_JOB_PARTITION=normal num_proc=2 NEEDLES_NUM_WORKERS=23 job_id=065 SLURM_JOB_USER=kmccall CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov NEEDLES_ALL_ORDERS=0 SHLVL=5 SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov SLURM_JOB_ACCOUNT=users CDC_JOINED_DOMAIN=ndc.nasa.gov MANPATH=:: worker_cmd=-worker_cmd NeedlesMpiWork WINDOWID=56623138 workers_per_manager=-works_per_man 23 manager_cmd=-manager_cmd NeedlesMpiMM GDK_BACKEND=x11 MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles SLURM_GTIDS=0 LOGNAME=kmccall DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756 XDG_RUNTIME_DIR=/run/user/384580260 MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1 PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin SLURM_JOB_ID=37065 _LMFILES_=/opt/modulefiles/intel/intelmpi in_file=-input_file haystack_out.bin MODULESHOME=/usr/share/Modules I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi HISTSIZE=1000 USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV NEEDLES_NUM_VARS=54 XTERM_LOCALE=en_US.UTF-8 CDC_JOINED_SITE=MSFCPrivate LESSOPEN=||/usr/bin/lesspipe.sh %s BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" } BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 } BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus } BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi } BASH_FUNC_ml%%=() { module ml "$@" } _=/opt/mpich/bin/mpiexec ZES_ENABLE_SYSMAN=1 Hydra internal environment: --------------------------- GFORTRAN_UNBUFFERED_PRECONNECTED=y Proxy information: ********************* [1] proxy: n001 (1 cores) Exec list: ./NeedlesMpiMM (1 processes); [2] proxy: n002 (1 cores) Exec list: ./NeedlesMpiMM (1 processes); ================================================================================================== [mpiexec@n001.cluster.pssclabs.com] Timeout set to -1 (-1 means infinite) [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:39095 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:39095 --debug --rmk user --launcher slurm --demux poll --pgid 0 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 2 --auto-cleanup 1 --pmi-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,2,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'DISPLAY=localhost:10.0' --exec-wdir /home/kmccall/Needles2 --exec-args 23 ./NeedlesMpiMM -input_file haystack_out.bin -works_per_man 23 -manager_cmd NeedlesMpiMM -worker_cmd NeedlesMpiWork -n_vars 54 -n_ndxs 1 -metric m_disp_contin -all_orders 0 -output_file needles_out.txt -epoch 1643395222 -job_id 065 Arguments being passed to proxy 1: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 0,1,2 --pmi-id-map 0,1 --global-process-count 2 --auto-cleanup 1 --pmi-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,2,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'DISPLAY=localhost:10.0' --exec-wdir /home/kmccall/Needles2 --exec-args 23 ./NeedlesMpiMM -input_file haystack_out.bin -works_per_man 23 -manager_cmd NeedlesMpiMM -worker_cmd NeedlesMpiWork -n_vars 54 -n_ndxs 1 -metric m_disp_contin -all_orders 0 -output_file needles_out.txt -epoch 1643395222 -job_id 065 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001,n002 -N 2 -n 2 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:39095 --debug --rmk user --launcher slurm --demux poll --pgid 0 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [proxy:0:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:0:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;rc=0; PMI sending: 50 cmd=info-getjobattr;key=PMI_process_mapping; PMI received (cmdlen 68): cmd=info-getjobattr-response;found=TRUE;value=(vector,(0,2,1));rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#54399$ifname#172.16.56.1$;® [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 7 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#54399$ifname#172.16.56.1$;® PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 7 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; MASTER ON NODE n001.cluster.pssclabs.com 36249 [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:0:1@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 599 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#54399$ifname#172.16.56.1$;subcmd=NeedlesMpiMM;maxprocs=1;argc=26;argv0=./NeedlesMpiMM;argv1=-input_file;argv2=haystack_out.bin;argv3=-works_per_man;argv4=22;argv5=-manager_cmd;argv6=NeedlesMpiMM;argv7=-worker_cmd;argv8=NeedlesMpiWork;argv9=-n_vars;argv10=54;argv11=-n_ndxs;argv12=1;argv13=-metric;argv14=m_disp_contin;argv15=-all_orders;argv16=0;argv17=-output_file;argv18=needles_out.txt;argv19=-epoch;argv20=1643395222;argv21=-job_id;argv22=065;argv23=-n_managers;argv24=2;argv25=-spawn_manager;alu PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 599 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#54399$ifname#172.16.56.1$;subcmd=NeedlesMpiMM;maxprocs=1;argc=26;argv0=./NeedlesMpiMM;argv1=-input_file;argv2=haystack_out.bin;argv3=-works_per_man;argv4=22;argv5=-manager_cmd;argv6=NeedlesMpiMM;argv7=-worker_cmd;argv8=NeedlesMpiWork;argv9=-n_vars;argv10=54;argv11=-n_ndxs;argv12=1;argv13=-metric;argv14=m_disp_contin;argv15=-all_orders;argv16=0;argv17=-output_file;argv18=needles_out.txt;argv19=-epoch;argv20=1643395222;argv21=-job_id;argv22=065;argv23=-n_managers;argv24=2;argv25=-spawn_manager;alu [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:37223 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:37223 --debug --rmk user --launcher slurm --demux poll --pgid 1 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 27 NeedlesMpiMM ./NeedlesMpiMM -input_file haystack_out.bin -works_per_man 22 -manager_cmd NeedlesMpiMM -worker_cmd NeedlesMpiWork -n_vars 54 -n_ndxs 1 -metric m_disp_contin -all_orders 0 -output_file needles_out.txt -epoch 1643395222 -job_id 065 -n_managers 2 -spawn_manager [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:37223 --debug --rmk user --launcher slurm --demux poll --pgid 1 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 7 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;nerrs=0; PMI sending: 50 cmd=info-getjobattr;key=PMI_process_mapping; PMI received (cmdlen 68): cmd=info-getjobattr-response;found=TRUE;value=(vector,(0,2,1));rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;rc=0; PMI received (cmdlen 87): cmd=spawn-response;rc=0;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;nerrs=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 92 cmd=kvs-put;key=P1-businesscard;value=description#n002$port#51287$ifname#172.16.56.2$;> [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P1-businesscard;value=description#n002$port#51287$ifname#172.16.56.2$;> PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; MANAGER ON NODE n002.cluster.pssclabs.com 340368 MANAGER n002.cluster.pssclabs.com 340368 ARGS passed to NeedlesMpiManager = ./NeedlesMpiMM -input_file haystack_out.bin -works_per_man 23 -manager_cmd NeedlesMpiMM -worker_cmd NeedlesMpiWork -n_vars 54 -n_ndxs 1 -metric m_disp_contin -all_orders 0 -output_file needles_out.txt -epoch 1643395222 -job_id 065 PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.0 pmirank=1 threaded=FALSE [proxy:0:1@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=1;size=2;appnum=0;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:0:1@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): info-getjobattr key=PMI_process_mapping [proxy:0:1@n002.cluster.pssclabs.com] PMI response: cmd=info-getjobattr-response;found=TRUE;value=(vector,(0,2,1));rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:0:1@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P1-businesscard value=description#n002$port#51287$ifname#172.16.56.2$ > [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.0 pmirank=0 threaded=FALSE [proxy:0:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=2;appnum=0;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:0:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:0:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;rc=0; [proxy:0:0@n001.cluster.pssclabs.com] got pmi command (from 4): info-getjobattr key=PMI_process_mapping [proxy:0:0@n001.cluster.pssclabs.com] PMI response: cmd=info-getjobattr-response;found=TRUE;value=(vector,(0,2,1));rc=0; [proxy:0:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:0:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;rc=0; [proxy:0:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#54399$ifname#172.16.56.1$ ® [proxy:0:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:0:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:0:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:0:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:0:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:0:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#54399$ifname#172.16.56.1$ subcmd=NeedlesMpiMM maxprocs=1 argc=26 argv0=./NeedlesMpiMM argv1=-input_file argv2=haystack_out.bin argv3=-works_per_man argv4=22 argv5=-manager_cmd argv6=NeedlesMpiMM argv7=-worker_cmd argv8=NeedlesMpiWork argv9=-n_vars argv10=54 argv11=-n_ndxs argv12=1 argv13=-metric argv14=m_disp_contin argv15=-all_orders argv16=0 argv17=-output_file argv18=needles_out.txt argv19=-epoch argv20=1643395222 argv21=-job_id argv22=065 argv23=-n_managers argv24=2 argv25=-spawn_manager alu [proxy:0:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:0:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:1:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#59293$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#59293$ifname#172.16.56.1$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 108 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#54399$ifname#172.16.56.1$;rc=0; PMI sending: 108 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#54399$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 7 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 7 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; [proxy:0:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MASTER 36249 n001.cluster.pssclabs.com 0 t = 0.454812: spawn new manager MPI_Comm_spawn success MASTER 36249 n001.cluster.pssclabs.com MANAGER ON NODE n001.cluster.pssclabs.com 0 t = 0.454844: spawned new manager on mother superior 36266 MANAGER n001.cluster.pssclabs.com 36266 ARGS passed to NeedlesMpiManager = NeedlesMpiMM ./NeedlesMpiMM -input_file haystack_out.bin -works_per_man 22 -manager_cmd NeedlesMpiMM -worker_cmd NeedlesMpiWork -n_vars 54 -n_ndxs 1 -metric m_disp_contin -all_orders 0 -output_file needles_out.txt -epoch 1643395222 -job_id 065 -n_managers 2 -spawn_manager [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.1 pmirank=0 threaded=FALSE [proxy:1:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:1:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:1:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#59293$ifname#172.16.56.1$ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream CREATED SHARED MEMORY 340368 CREATED SHARED MEMORY 36266 setTupleSeqParams: setting for n_ndxs = 1 HERE: short host name = n002 SPAWNED NEW WORKER 340368 [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=0; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:39789 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:39789 --debug --rmk user --launcher slurm --demux poll --pgid 2 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_2_1739082981_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 0 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:39789 --debug --rmk user --launcher slurm --demux poll --pgid 2 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_2_1739082981_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=0  [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=0; PMI received (cmdlen 87): cmd=spawn-response;rc=0;jobid=kvs_36231_2_1739082981_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream [proxy:2:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:2:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_2_1739082981_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_2_1739082981_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 2] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#35151$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 16 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#35151$ifname#172.16.56.2$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 2] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 16 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 108 cmd=kvs-get;jobid=kvs_36231_2_1739082981_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 2] got PMI command: 108 cmd=kvs-get;jobid=kvs_36231_2_1739082981_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 16 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_2_1739082981_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 2] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_2_1739082981_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 16 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#35151$ifname#172.16.56.2$;rc=0; [proxy:2:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.2 pmirank=0 threaded=FALSE [proxy:2:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:2:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:2:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_2_1739082981_n001.cluster.pssclabs.com;rc=0; [proxy:2:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:2:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_2_1739082981_n001.cluster.pssclabs.com;rc=0; [proxy:2:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#35151$ifname#172.16.56.2$ [proxy:2:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:2:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:2:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:2:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:2:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:2:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_2_1739082981_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:2:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:2:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:2:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_2_1739082981_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:2:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#35151$ifname#172.16.56.2$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream MANAGER 340368 n002.cluster.pssclabs.comARGS passed to NeedlesMpiWorker (pid = 1 t = 340386) NeedlesMpiWork 5.59591: NeedlesMpiWork 1643395222 340368spawned new worker 1 of 23 sub_id = 0 -job_id 065 -n_managers SPAWNED NEW WORKER 340368 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 0 HERE worker pid =340386 manager_pid = 340368 OPENED SHARED MEMORY 340386 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=1; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=1; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:37057 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:37057 --debug --rmk user --launcher slurm --demux poll --pgid 3 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_3_176028381_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 1 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:37057 --debug --rmk user --launcher slurm --demux poll --pgid 3 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_3_176028381_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=1  [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI received (cmdlen 86): cmd=spawn-response;rc=0;jobid=kvs_36231_3_176028381_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream [proxy:2:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:3:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:3:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 82): cmd=job-getid-response;jobid=kvs_36231_3_176028381_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 82): cmd=job-getid-response;jobid=kvs_36231_3_176028381_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 3] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#45861$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 20 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#45861$ifname#172.16.56.1$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 3] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 20 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 107 cmd=kvs-get;jobid=kvs_36231_3_176028381_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 3] got PMI command: 107 cmd=kvs-get;jobid=kvs_36231_3_176028381_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 20 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 101 cmd=kvs-get;jobid=kvs_36231_3_176028381_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 3] got PMI command: 101 cmd=kvs-get;jobid=kvs_36231_3_176028381_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 20 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#45861$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#45861$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = ARGS passed to NeedlesMpiWorker (pid = 36323) NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 5.729: spawned new worker -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 2 of 23 sub_id = -metric 59 -manager_rank 1 -worker_rank 1 SPAWNED NEW WORKER 3403681 HERE worker pid =36323 manager_pid = 340368 OPENED SHARED MEMORY 36323 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=2; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=2; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:46133 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:46133 --debug --rmk user --launcher slurm --demux poll --pgid 4 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_4_1205859381_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 2 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:46133 --debug --rmk user --launcher slurm --demux poll --pgid 4 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_4_1205859381_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=2  [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream WORKER 36323 n001.cluster.pssclabs.com 0 t = 5.7305: suicide due to failed manager: pid = 340368 PMI received (cmdlen 87): cmd=spawn-response;rc=0;jobid=kvs_36231_4_1205859381_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream [proxy:3:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.3 pmirank=0 threaded=FALSE [proxy:3:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:3:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:3:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_3_176028381_n001.cluster.pssclabs.com;rc=0; [proxy:3:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:3:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_3_176028381_n001.cluster.pssclabs.com;rc=0; [proxy:3:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#45861$ifname#172.16.56.1$ [proxy:3:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:3:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:3:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:3:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:3:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:3:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_3_176028381_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:3:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:3:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:3:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_3_176028381_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:3:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:3:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:4:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:4:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_4_1205859381_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_4_1205859381_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 4] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#51631$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 24 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#51631$ifname#172.16.56.2$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 4] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 24 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 108 cmd=kvs-get;jobid=kvs_36231_4_1205859381_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 4] got PMI command: 108 cmd=kvs-get;jobid=kvs_36231_4_1205859381_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 24 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_4_1205859381_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 4] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_4_1205859381_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 24 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51631$ifname#172.16.56.2$;rc=0; [proxy:4:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.4 pmirank=0 threaded=FALSE [proxy:4:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:4:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:4:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_4_1205859381_n001.cluster.pssclabs.com;rc=0; [proxy:4:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:4:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_4_1205859381_n001.cluster.pssclabs.com;rc=0; [proxy:4:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#51631$ifname#172.16.56.2$ [proxy:4:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:4:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:4:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:4:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:4:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:4:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_4_1205859381_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:4:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:4:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:4:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_4_1205859381_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:4:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51631$ifname#172.16.56.2$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream MANAGER 340368 n002.cluster.pssclabs.com 1 t = 5.8589: spawned new worker ARGS passed to NeedlesMpiWorker (pid = 340396) 3 of 23 sub_id = NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man2 SPAWNED NEW WORKER 340368 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 2 HERE worker pid =340396 manager_pid = 340368 OPENED SHARED MEMORY 340396 [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=3; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:46581 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:46581 --debug --rmk user --launcher slurm --demux poll --pgid 5 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_5_52802906_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 3 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:46581 --debug --rmk user --launcher slurm --demux poll --pgid 5 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_5_52802906_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=3  [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=3; PMI received (cmdlen 85): cmd=spawn-response;rc=0;jobid=kvs_36231_5_52802906_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream [proxy:4:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:5:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:5:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 81): cmd=job-getid-response;jobid=kvs_36231_5_52802906_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 81): cmd=job-getid-response;jobid=kvs_36231_5_52802906_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 5] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#48713$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 28 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#48713$ifname#172.16.56.1$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 5] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 28 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 106 cmd=kvs-get;jobid=kvs_36231_5_52802906_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 5] got PMI command: 106 cmd=kvs-get;jobid=kvs_36231_5_52802906_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 28 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 100 cmd=kvs-get;jobid=kvs_36231_5_52802906_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 5] got PMI command: 100 cmd=kvs-get;jobid=kvs_36231_5_52802906_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 28 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#48713$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#48713$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream ARGS passed to NeedlesMpiWorker (pid = MANAGER 36347) NeedlesMpiWork NeedlesMpiWork340368 n002.cluster.pssclabs.com 1 t = 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 5.98897: spawned new worker -metric 59 -manager_rank 1 -worker_rank 4 of 23 sub_id = 33 SPAWNED NEW WORKER 340368 HERE worker pid =36347 manager_pid = 340368 OPENED SHARED MEMORY 36347 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=4; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=4  [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=4; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:34267 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:34267 --debug --rmk user --launcher slurm --demux poll --pgid 6 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_6_99919134_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 4 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:34267 --debug --rmk user --launcher slurm --demux poll --pgid 6 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_6_99919134_n001.cluster.pssclabs.com;nerrs=0; WORKER 36347 n001.cluster.pssclabs.com 0 t = 5.99043: suicide due to failed manager: pid = 340368 PMI received (cmdlen 85): cmd=spawn-response;rc=0;jobid=kvs_36231_6_99919134_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream [proxy:5:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.5 pmirank=0 threaded=FALSE [proxy:5:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:5:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:5:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_5_52802906_n001.cluster.pssclabs.com;rc=0; [proxy:5:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:5:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_5_52802906_n001.cluster.pssclabs.com;rc=0; [proxy:5:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#48713$ifname#172.16.56.1$ [proxy:5:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:5:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:5:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:5:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:5:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:5:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_5_52802906_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:5:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:5:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:5:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_5_52802906_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:5:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:5:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:6:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:6:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 81): cmd=job-getid-response;jobid=kvs_36231_6_99919134_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 81): cmd=job-getid-response;jobid=kvs_36231_6_99919134_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 6] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#48991$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 32 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#48991$ifname#172.16.56.2$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 6] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 32 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 6] got PMI command: 106 cmd=kvs-get;jobid=kvs_36231_6_99919134_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 32 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 106 cmd=kvs-get;jobid=kvs_36231_6_99919134_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 100 cmd=kvs-get;jobid=kvs_36231_6_99919134_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 6] got PMI command: 100 cmd=kvs-get;jobid=kvs_36231_6_99919134_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 32 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#48991$ifname#172.16.56.2$;rc=0; [proxy:6:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.6 pmirank=0 threaded=FALSE [proxy:6:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:6:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:6:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_6_99919134_n001.cluster.pssclabs.com;rc=0; [proxy:6:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:6:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_6_99919134_n001.cluster.pssclabs.com;rc=0; [proxy:6:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#48991$ifname#172.16.56.2$ [proxy:6:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:6:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:6:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:6:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:6:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:6:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_6_99919134_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:6:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:6:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:6:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_6_99919134_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:6:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#48991$ifname#172.16.56.2$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = 6.09973: spawned new worker 5 of 23 sub_id = 4 ARGS passed to NeedlesMpiWorker (pid = 340406) NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx SPAWNED NEW WORKER 340368 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 4 HERE worker pid =340406 manager_pid = 340368 OPENED SHARED MEMORY 340406 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=5; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=5; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:38575 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:38575 --debug --rmk user --launcher slurm --demux poll --pgid 7 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_7_930150963_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 5 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:38575 --debug --rmk user --launcher slurm --demux poll --pgid 7 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_7_930150963_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=5  [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI received (cmdlen 86): cmd=spawn-response;rc=0;jobid=kvs_36231_7_930150963_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream [proxy:6:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:7:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:7:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 82): cmd=job-getid-response;jobid=kvs_36231_7_930150963_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 82): cmd=job-getid-response;jobid=kvs_36231_7_930150963_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 7] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#59531$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 36 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#59531$ifname#172.16.56.1$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 7] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 36 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 107 cmd=kvs-get;jobid=kvs_36231_7_930150963_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 7] got PMI command: 107 cmd=kvs-get;jobid=kvs_36231_7_930150963_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 36 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 101 cmd=kvs-get;jobid=kvs_36231_7_930150963_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 7] got PMI command: 101 cmd=kvs-get;jobid=kvs_36231_7_930150963_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 36 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59531$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59531$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream ARGS passed to NeedlesMpiWorker (pid = MANAGER 34036836371) NeedlesMpiWork n002.cluster.pssclabs.com 1 t = NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 06.22002: spawned new worker 6 -features_ndx 4 -metrics_ndx 54 -metric of 23 sub_id = 5 59 -manager_rank 1 -worker_rank 5 SPAWNED NEW WORKER 340368 HERE worker pid =36371 manager_pid = 340368 OPENED SHARED MEMORY 36371 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=6; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=6; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:37257 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:37257 --debug --rmk user --launcher slurm --demux poll --pgid 8 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_8_145676096_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 6 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:37257 --debug --rmk user --launcher slurm --demux poll --pgid 8 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_8_145676096_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=6  [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream WORKER 36371 n001.cluster.pssclabs.com 0 t = 6.22146: suicide due to failed manager: pid = 340368 PMI received (cmdlen 86): cmd=spawn-response;rc=0;jobid=kvs_36231_8_145676096_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream [proxy:7:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.7 pmirank=0 threaded=FALSE [proxy:7:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:7:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:7:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_7_930150963_n001.cluster.pssclabs.com;rc=0; [proxy:7:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:7:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_7_930150963_n001.cluster.pssclabs.com;rc=0; [proxy:7:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#59531$ifname#172.16.56.1$ [proxy:7:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:7:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:7:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:7:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:7:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:7:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_7_930150963_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:7:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:7:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:7:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_7_930150963_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:7:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:7:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:8:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:8:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 82): cmd=job-getid-response;jobid=kvs_36231_8_145676096_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 82): cmd=job-getid-response;jobid=kvs_36231_8_145676096_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 8] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#57953$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 40 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#57953$ifname#172.16.56.2$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 8] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 40 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 8] got PMI command: 107 cmd=kvs-get;jobid=kvs_36231_8_145676096_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 40 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 107 cmd=kvs-get;jobid=kvs_36231_8_145676096_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 101 cmd=kvs-get;jobid=kvs_36231_8_145676096_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 8] got PMI command: 101 cmd=kvs-get;jobid=kvs_36231_8_145676096_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 40 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#57953$ifname#172.16.56.2$;rc=0; [proxy:8:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.8 pmirank=0 threaded=FALSE [proxy:8:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:8:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:8:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_8_145676096_n001.cluster.pssclabs.com;rc=0; [proxy:8:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:8:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_8_145676096_n001.cluster.pssclabs.com;rc=0; [proxy:8:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#57953$ifname#172.16.56.2$ [proxy:8:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:8:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:8:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:8:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:8:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:8:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_8_145676096_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:8:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:8:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:8:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_8_145676096_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:8:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#57953$ifname#172.16.56.2$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = 6.34466: spawned new worker 7 of 23 sub_id = 6 SPAWNED NEW WORKER 340368 ARGS passed to NeedlesMpiWorker (pid = 340416) NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 6 HERE worker pid =340416 manager_pid = 340368 OPENED SHARED MEMORY 340416 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=7; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=7; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:35027 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:35027 --debug --rmk user --launcher slurm --demux poll --pgid 9 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_9_846862981_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 7 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:35027 --debug --rmk user --launcher slurm --demux poll --pgid 9 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_9_846862981_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=7  [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI received (cmdlen 86): cmd=spawn-response;rc=0;jobid=kvs_36231_9_846862981_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream [proxy:8:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:9:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:9:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 82): cmd=job-getid-response;jobid=kvs_36231_9_846862981_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 82): cmd=job-getid-response;jobid=kvs_36231_9_846862981_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 9] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#55237$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 44 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#55237$ifname#172.16.56.1$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 9] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 44 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 107 cmd=kvs-get;jobid=kvs_36231_9_846862981_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 9] got PMI command: 107 cmd=kvs-get;jobid=kvs_36231_9_846862981_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 44 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 101 cmd=kvs-get;jobid=kvs_36231_9_846862981_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 9] got PMI command: 101 cmd=kvs-get;jobid=kvs_36231_9_846862981_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 44 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#55237$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#55237$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; ARGS passed to NeedlesMpiWorker (pid = MANAGER 34036836395) NeedlesMpiWork NeedlesMpiWork 1643395222 n002.cluster.pssclabs.com 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 1 t = 2003 -n_ndxs 1 -dispersions_ndx 6.44457: spawned new worker 0 -features_ndx 4 -metrics_ndx 54 8 of 23 sub_id = 7-metric 59 -manager_rank SPAWNED NEW WORKER 340368 1 -worker_rank 7 HERE worker pid =36395 manager_pid = 340368 OPENED SHARED MEMORY 36395 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=8; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=8  [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=8; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:38667 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:38667 --debug --rmk user --launcher slurm --demux poll --pgid 10 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_10_1729864246_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 8 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:38667 --debug --rmk user --launcher slurm --demux poll --pgid 10 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_10_1729864246_n001.cluster.pssclabs.com;nerrs=0; WORKER 36395 n001.cluster.pssclabs.com 0 t = 6.44602: suicide due to failed manager: pid = 340368 [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_10_1729864246_n001.cluster.pssclabs.com;nerrs=0; HERE: short host name = n001 SPAWNED NEW WORKER 36266 [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=0;ÿÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:34375 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:34375 --debug --rmk user --launcher slurm --demux poll --pgid 11 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_11_771604342_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 0 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:34375 --debug --rmk user --launcher slurm --demux poll --pgid 11 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_11_771604342_n001.cluster.pssclabs.com;nerrs=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=0 ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI sending: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=0;ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 87): cmd=spawn-response;rc=0;jobid=kvs_36231_11_771604342_n001.cluster.pssclabs.com;nerrs=0; [proxy:9:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.9 pmirank=0 threaded=FALSE [proxy:9:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:9:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:9:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_9_846862981_n001.cluster.pssclabs.com;rc=0; [proxy:9:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:9:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_9_846862981_n001.cluster.pssclabs.com;rc=0; [proxy:9:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#55237$ifname#172.16.56.1$ [proxy:9:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:9:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:9:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:9:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:9:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:9:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_9_846862981_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:9:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:9:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:9:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_9_846862981_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:9:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:9:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:10:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:10:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_10_1729864246_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_10_1729864246_n001.cluster.pssclabs.com;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#41947$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 10] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#41947$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 51 pid 4: cmd=kvs-put-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 10] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 51 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 10] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_10_1729864246_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 51 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_10_1729864246_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_10_1729864246_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 10] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_10_1729864246_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 51 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#41947$ifname#172.16.56.2$;rc=0; [proxy:10:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.10 pmirank=0 threaded=FALSE [proxy:10:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:10:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:10:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_10_1729864246_n001.cluster.pssclabs.com;rc=0; [proxy:10:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:10:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_10_1729864246_n001.cluster.pssclabs.com;rc=0; [proxy:10:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#41947$ifname#172.16.56.2$ [proxy:10:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:10:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:10:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:10:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:10:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:10:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_10_1729864246_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:10:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:10:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:10:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_10_1729864246_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:10:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#41947$ifname#172.16.56.2$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = 6.56903: spawned new worker 9 of 23 sub_id = 8 SPAWNED NEW WORKER 340368 ARGS passed to NeedlesMpiWorker (pid = 340426) NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 8 HERE worker pid =340426 manager_pid = 340368 OPENED SHARED MEMORY 340426 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=9; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=9; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:38327 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:38327 --debug --rmk user --launcher slurm --demux poll --pgid 12 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_12_1188737661_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 9 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:38327 --debug --rmk user --launcher slurm --demux poll --pgid 12 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_12_1188737661_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=9  [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_12_1188737661_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream [proxy:11:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:11:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_11_771604342_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_11_771604342_n001.cluster.pssclabs.com;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#39253$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 11] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#39253$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 53 pid 4: cmd=kvs-put-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 11] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 53 pid 4: cmd=kvs-fence-response;rc=0; [proxy:10:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 108 cmd=kvs-get;jobid=kvs_36231_11_771604342_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 11] got PMI command: 108 cmd=kvs-get;jobid=kvs_36231_11_771604342_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 53 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_11_771604342_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 11] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_11_771604342_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 53 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#39253$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#39253$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; ARGS passed to NeedlesMpiWorker (pid = MANAGER 36419) NeedlesMpiWork NeedlesMpiWork36266 n001.cluster.pssclabs.com 0 t = 1643395222 36266 -job_id 065 -n_managers6.61236: 2 -works_per_man 22 -n_vars_pm 61spawned new worker 1 of 22 sub_id = 0 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx SPAWNED NEW WORKER 36266 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 0 HERE worker pid =36419 manager_pid = 36266 OPENED SHARED MEMORY 36419 PMI sending: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=1;ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=1;ÿÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:33821 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:33821 --debug --rmk user --launcher slurm --demux poll --pgid 13 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_13_1964306575_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 1 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:33821 --debug --rmk user --launcher slurm --demux poll --pgid 13 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_13_1964306575_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=1 ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_13_1964306575_n001.cluster.pssclabs.com;nerrs=0; [proxy:11:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.11 pmirank=0 threaded=FALSE [proxy:11:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:11:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:11:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_11_771604342_n001.cluster.pssclabs.com;rc=0; [proxy:11:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:11:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_11_771604342_n001.cluster.pssclabs.com;rc=0; [proxy:11:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#39253$ifname#172.16.56.1$ [proxy:11:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:11:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:11:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:11:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:11:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:11:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_11_771604342_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:11:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:11:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:11:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_11_771604342_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:11:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:11:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:12:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:12:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_12_1188737661_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_12_1188737661_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 12] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#57721$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 59 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#57721$ifname#172.16.56.2$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 12] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 59 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 12] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_12_1188737661_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 59 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_12_1188737661_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_12_1188737661_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 12] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_12_1188737661_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 59 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#57721$ifname#172.16.56.2$;rc=0; [proxy:12:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.12 pmirank=0 threaded=FALSE [proxy:12:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:12:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:12:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_12_1188737661_n001.cluster.pssclabs.com;rc=0; [proxy:12:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:12:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_12_1188737661_n001.cluster.pssclabs.com;rc=0; [proxy:12:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#57721$ifname#172.16.56.2$ [proxy:12:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:12:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:12:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:12:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:12:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:12:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_12_1188737661_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:12:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:12:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:12:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_12_1188737661_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:12:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#57721$ifname#172.16.56.2$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = 6.72363: spawned new worker 10 of 23 sub_id = 9 SPAWNED NEW WORKER 340368 ARGS passed to NeedlesMpiWorker (pid = 340436) NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 9 HERE worker pid =340436 manager_pid = 340368 OPENED SHARED MEMORY 340436 [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=10; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:42787 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:42787 --debug --rmk user --launcher slurm --demux poll --pgid 14 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_14_1651429872_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 10 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:42787 --debug --rmk user --launcher slurm --demux poll --pgid 14 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_14_1651429872_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=10 [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI sending: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=10; PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_14_1651429872_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream [proxy:13:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:13:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_13_1964306575_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_13_1964306575_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 13] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#56339$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 61 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#56339$ifname#172.16.56.1$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 13] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 61 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_13_1964306575_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 13] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_13_1964306575_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 61 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_13_1964306575_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 13] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_13_1964306575_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 61 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#56339$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#56339$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream MANAGER ARGS passed to NeedlesMpiWorker (pid = 36266 n001.cluster.pssclabs.com 0 t = 36443) NeedlesMpiWork NeedlesMpiWork 1643395222 6.73694: spawned new worker 2 of 36266 -job_id 065 -n_managers 2 22 sub_id = 1 SPAWNED NEW WORKER 36266 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 1 HERE worker pid =36443 manager_pid = 36266 OPENED SHARED MEMORY 36443 PMI sending: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=2;ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=2;ÿÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:39747 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:39747 --debug --rmk user --launcher slurm --demux poll --pgid 15 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_15_1028448283_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 2 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:39747 --debug --rmk user --launcher slurm --demux poll --pgid 15 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_15_1028448283_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=2 ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_15_1028448283_n001.cluster.pssclabs.com;nerrs=0; [proxy:12:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:13:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.13 pmirank=0 threaded=FALSE [proxy:13:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:13:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:13:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_13_1964306575_n001.cluster.pssclabs.com;rc=0; [proxy:13:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:13:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_13_1964306575_n001.cluster.pssclabs.com;rc=0; [proxy:13:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#56339$ifname#172.16.56.1$ [proxy:13:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:13:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:13:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:13:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:13:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:13:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_13_1964306575_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:13:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:13:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:13:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_13_1964306575_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:13:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:13:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:14:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:14:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_14_1651429872_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_14_1651429872_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 14] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#50459$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 67 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#50459$ifname#172.16.56.2$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 14] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 67 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 14] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_14_1651429872_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 67 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_14_1651429872_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_14_1651429872_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 14] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_14_1651429872_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 67 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#50459$ifname#172.16.56.2$;rc=0; [proxy:14:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.14 pmirank=0 threaded=FALSE [proxy:14:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:14:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:14:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_14_1651429872_n001.cluster.pssclabs.com;rc=0; [proxy:14:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:14:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_14_1651429872_n001.cluster.pssclabs.com;rc=0; [proxy:14:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#50459$ifname#172.16.56.2$ [proxy:14:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:14:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:14:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:14:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:14:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:14:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_14_1651429872_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:14:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:14:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:14:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_14_1651429872_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:14:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#50459$ifname#172.16.56.2$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = ARGS passed to NeedlesMpiWorker (pid = 6.84615: spawned new worker 340446) NeedlesMpiWork 11 of 23 sub_id = NeedlesMpiWork 1643395222 34036810 SPAWNED NEW WORKER 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 10 HERE worker pid =340446 manager_pid = 340368 OPENED SHARED MEMORY 340446 PMI sending: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=11; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=11; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:42729 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:42729 --debug --rmk user --launcher slurm --demux poll --pgid 16 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_16_1624375669_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 11 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:42729 --debug --rmk user --launcher slurm --demux poll --pgid 16 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_16_1624375669_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=11 [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_16_1624375669_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream [proxy:15:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:15:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_15_1028448283_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_15_1028448283_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 15] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#36333$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 69 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#36333$ifname#172.16.56.1$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 15] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 69 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_15_1028448283_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 15] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_15_1028448283_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 69 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_15_1028448283_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 15] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_15_1028448283_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 69 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#36333$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#36333$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream ARGS passed to NeedlesMpiWorker (pid = MANAGER 36467) NeedlesMpiWork NeedlesMpiWork 36266 n001.cluster.pssclabs.com 0 t = 1643395222 36266 -job_id 065 -n_managers 26.85825: spawned new worker -works_per_man 22 -n_vars_pm 61 -n_seeds3 3 of 22 sub_id = 2 SPAWNED NEW WORKER 2003 -n_ndxs 1 -dispersions_ndx 036266 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 2 HERE worker pid =36467 manager_pid = 36266 OPENED SHARED MEMORY 36467 PMI sending: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=3;ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=3;ÿÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:42645 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:42645 --debug --rmk user --launcher slurm --demux poll --pgid 17 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_17_1127052873_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 3 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:42645 --debug --rmk user --launcher slurm --demux poll --pgid 17 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_17_1127052873_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=3 ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_17_1127052873_n001.cluster.pssclabs.com;nerrs=0; [proxy:14:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:15:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.15 pmirank=0 threaded=FALSE [proxy:15:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:15:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:15:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_15_1028448283_n001.cluster.pssclabs.com;rc=0; [proxy:15:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:15:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_15_1028448283_n001.cluster.pssclabs.com;rc=0; [proxy:15:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#36333$ifname#172.16.56.1$ [proxy:15:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:15:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:15:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:15:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:15:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:15:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_15_1028448283_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:15:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:15:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:15:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_15_1028448283_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:15:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:15:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:16:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:16:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_16_1624375669_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_16_1624375669_n001.cluster.pssclabs.com;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#33375$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 16] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#33375$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 75 pid 4: cmd=kvs-put-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 16] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 75 pid 4: cmd=kvs-fence-response;rc=0; [proxy:17:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_17_1127052873_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_17_1127052873_n001.cluster.pssclabs.com;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_16_1624375669_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 16] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_16_1624375669_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 75 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 17] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#37957$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 77 pid 4: cmd=kvs-put-response;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#37957$ifname#172.16.56.1$; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_16_1624375669_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 16] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_16_1624375669_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 75 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#33375$ifname#172.16.56.2$;rc=0; [proxy:16:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.16 pmirank=0 threaded=FALSE [proxy:16:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:16:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:16:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_16_1624375669_n001.cluster.pssclabs.com;rc=0; [proxy:16:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:16:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_16_1624375669_n001.cluster.pssclabs.com;rc=0; [proxy:16:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#33375$ifname#172.16.56.2$ [proxy:16:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:16:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:16:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:16:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:16:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:16:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_16_1624375669_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:16:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:16:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:16:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get [mpiexec@n001.cluster.pssclabs.com] [pgid: 17] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 77 pid 4: cmd=kvs-fence-response;rc=0; jobid=kvs_36231_16_1624375669_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:16:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#33375$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = ARGS passed to NeedlesMpiWorker (pid = 6.98213: spawned new worker 12 of 23 sub_id = 11 SPAWNED NEW WORKER 340456) NeedlesMpiWork NeedlesMpiWork340368 PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_17_1127052873_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 11 [mpiexec@n001.cluster.pssclabs.com] [pgid: 17] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_17_1127052873_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 77 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=12; HERE worker pid =340456 manager_pid = 340368 OPENED SHARED MEMORY 340456 [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=12; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:45475 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:45475 --debug --rmk user --launcher slurm --demux poll --pgid 18 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_18_1356942276_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 12 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:45475 --debug --rmk user --launcher slurm --demux poll --pgid 18 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_18_1356942276_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=12 [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_18_1356942276_n001.cluster.pssclabs.com;nerrs=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_17_1127052873_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 17] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_17_1127052873_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 77 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#37957$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#37957$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER ARGS passed to NeedlesMpiWorker (pid = 36266 n001.cluster.pssclabs.com 0 t = 36491) NeedlesMpiWork NeedlesMpiWork6.98645: spawned new worker 1643395222 36266 -job_id 065 -n_managers4 of 22 sub_id = 3 SPAWNED NEW WORKER 2 -works_per_man 22 -n_vars_pm 36266 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 3 HERE worker pid =36491 manager_pid = 36266 OPENED SHARED MEMORY 36491PMI sending: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=4;ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=4;ÿÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:44029 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:44029 --debug --rmk user --launcher slurm --demux poll --pgid 19 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_19_1796030946_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 4 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:44029 --debug --rmk user --launcher slurm --demux poll --pgid 19 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_19_1796030946_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=4 ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_19_1796030946_n001.cluster.pssclabs.com;nerrs=0; [proxy:17:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 [proxy:17:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.17 pmirank=0 threaded=FALSE [proxy:17:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:17:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:17:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_17_1127052873_n001.cluster.pssclabs.com;rc=0; [proxy:17:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:17:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_17_1127052873_n001.cluster.pssclabs.com;rc=0; [proxy:17:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#37957$ifname#172.16.56.1$ [proxy:17:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:17:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:17:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:17:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:17:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:17:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_17_1127052873_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:17:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:17:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:17:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_17_1127052873_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:17:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:17:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:16:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:18:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:18:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_18_1356942276_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_18_1356942276_n001.cluster.pssclabs.com;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#54987$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 18] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#54987$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 83 pid 4: cmd=kvs-put-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 18] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 83 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_18_1356942276_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 18] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_18_1356942276_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 83 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_18_1356942276_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 18] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_18_1356942276_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 83 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#54987$ifname#172.16.56.2$;rc=0; [proxy:18:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.18 pmirank=0 threaded=FALSE [proxy:18:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:18:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:18:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_18_1356942276_n001.cluster.pssclabs.com;rc=0; [proxy:18:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:18:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_18_1356942276_n001.cluster.pssclabs.com;rc=0; [proxy:18:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#54987$ifname#172.16.56.2$ [proxy:18:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:18:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:18:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:18:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:18:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:18:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_18_1356942276_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:18:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:18:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:18:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_18_1356942276_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:18:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#54987$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = 7.09354: spawned new worker 13 of 23 sub_id = 12 SPAWNED NEW WORKER 340368 ARGS passed to NeedlesMpiWorker (pid = 340466) NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 12 HERE worker pid =340466 manager_pid = 340368 OPENED SHARED MEMORY 340466 PMI sending: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=13; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=13; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:35695 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:35695 --debug --rmk user --launcher slurm --demux poll --pgid 20 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_20_1750759282_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 13 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:35695 --debug --rmk user --launcher slurm --demux poll --pgid 20 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_20_1750759282_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=13 [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_20_1750759282_n001.cluster.pssclabs.com;nerrs=0; [proxy:19:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:19:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_19_1796030946_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_19_1796030946_n001.cluster.pssclabs.com;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#50983$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 19] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#50983$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 85 pid 4: cmd=kvs-put-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 19] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 85 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_19_1796030946_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 19] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_19_1796030946_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 85 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_19_1796030946_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 19] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_19_1796030946_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 85 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#50983$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#50983$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream ARGS passed to NeedlesMpiWorker (pid = MANAGER 3626636515) NeedlesMpiWork NeedlesMpiWork 1643395222 n001.cluster.pssclabs.com 0 t = 7.10575 36266 -job_id 065 -n_managers : spawned new worker 5 of 222 -works_per_man 22 -n_vars_pm 61 sub_id = 4 SPAWNED NEW WORKER 36266 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 4 HERE worker pid =36515 manager_pid = 36266 OPENED SHARED MEMORY 36515 PMI sending: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=5;ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=5;ÿÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:33203 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:33203 --debug --rmk user --launcher slurm --demux poll --pgid 21 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_21_1204692929_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 5 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:33203 --debug --rmk user --launcher slurm --demux poll --pgid 21 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_21_1204692929_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=5 ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_21_1204692929_n001.cluster.pssclabs.com;nerrs=0; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream [proxy:18:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:19:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.19 pmirank=0 threaded=FALSE [proxy:19:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:19:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:19:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_19_1796030946_n001.cluster.pssclabs.com;rc=0; [proxy:19:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:19:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_19_1796030946_n001.cluster.pssclabs.com;rc=0; [proxy:19:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#50983$ifname#172.16.56.1$ [proxy:19:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:19:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:19:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:19:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:19:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:19:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_19_1796030946_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:19:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:19:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:19:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_19_1796030946_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:19:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:19:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:20:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:20:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_20_1750759282_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_20_1750759282_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 20] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#34819$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 91 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#34819$ifname#172.16.56.2$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 20] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 91 pid 4: cmd=kvs-fence-response;rc=0; [proxy:21:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:21:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_21_1204692929_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_21_1204692929_n001.cluster.pssclabs.com;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_20_1750759282_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 20] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_20_1750759282_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 91 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 21] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#59405$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 93 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_20_1750759282_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#59405$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 20] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_20_1750759282_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 91 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#34819$ifname#172.16.56.2$;rc=0; [proxy:20:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.20 pmirank=0 threaded=FALSE [proxy:20:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:20:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:20:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_20_1750759282_n001.cluster.pssclabs.com;rc=0; [proxy:20:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:20:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_20_1750759282_n001.cluster.pssclabs.com;rc=0; [proxy:20:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#34819$ifname#172.16.56.2$ [proxy:20:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:20:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:20:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:20:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:20:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:20:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_20_1750759282_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:20:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:20:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:20:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_20_1750759282_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:20:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] [pgid: 21] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 93 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#34819$ifname#172.16.56.2$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = ARGS passed to NeedlesMpiWorker (pid = 7.21604: spawned new worker 14 of 340476) NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 06523 sub_id = 13 -n_managers 2 -works_per_man 23SPAWNED NEW WORKER 340368 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 13 HERE worker pid =340476 manager_pid = 340368 OPENED SHARED MEMORY 340476 PMI sending: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=14; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=14; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:33651 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:33651 --debug --rmk user --launcher slurm --demux poll --pgid 22 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_22_1068065457_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 14 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:33651 --debug --rmk user --launcher slurm --demux poll --pgid 22 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_22_1068065457_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=14 [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_21_1204692929_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 21] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_21_1204692929_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 93 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_22_1068065457_n001.cluster.pssclabs.com;nerrs=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_21_1204692929_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 21] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_21_1204692929_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 93 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59405$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59405$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER ARGS passed to NeedlesMpiWorker (pid = 36266 n001.cluster.pssclabs.com 0 t = 36539) NeedlesMpiWork NeedlesMpiWork 16433952227.22095: spawned new worker 36266 -job_id 065 -n_managers 26 of 22 sub_id = 5 SPAWNED NEW WORKER -works_per_man 22 -n_vars_pm 61 -n_seeds336266 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 5 HERE worker pid =36539 manager_pid = 36266 OPENED SHARED MEMORY 36539 PMI sending: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=6;ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=6;ÿÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:36811 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:36811 --debug --rmk user --launcher slurm --demux poll --pgid 23 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_23_1086250924_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 6 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:36811 --debug --rmk user --launcher slurm --demux poll --pgid 23 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_23_1086250924_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=6 ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_23_1086250924_n001.cluster.pssclabs.com;nerrs=0; [proxy:21:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.21 pmirank=0 threaded=FALSE [proxy:21:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:21:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:21:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_21_1204692929_n001.cluster.pssclabs.com;rc=0; [proxy:21:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:21:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_21_1204692929_n001.cluster.pssclabs.com;rc=0; [proxy:21:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#59405$ifname#172.16.56.1$ [proxy:21:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:21:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:21:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:21:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:21:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:21:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_21_1204692929_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:21:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:21:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:21:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_21_1204692929_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:21:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:21:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:20:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:22:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:22:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_22_1068065457_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_22_1068065457_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 22] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#37161$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 99 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#37161$ifname#172.16.56.2$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 22] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 99 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 22] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_22_1068065457_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 99 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_22_1068065457_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_22_1068065457_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 22] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_22_1068065457_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 99 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#37161$ifname#172.16.56.2$;rc=0; [proxy:22:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.22 pmirank=0 threaded=FALSE [proxy:22:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:22:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:22:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_22_1068065457_n001.cluster.pssclabs.com;rc=0; [proxy:22:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:22:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_22_1068065457_n001.cluster.pssclabs.com;rc=0; [proxy:22:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#37161$ifname#172.16.56.2$ [proxy:22:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:22:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:22:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:22:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:22:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:22:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_22_1068065457_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:22:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:22:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:22:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#37161$ifname#172.16.56.2$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 ARGS passed to NeedlesMpiWorker (pid = t = 7.33607340486) NeedlesMpiWork : spawned new worker 15NeedlesMpiWork 1643395222 340368 of 23 sub_id = 14 -job_id 065 -n_managers 2 SPAWNED NEW WORKER 340368 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 14 HERE worker pid =340486 manager_pid = 340368 OPENED SHARED MEMORY 340486 PMI sending: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=15; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=15; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:38265 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:38265 --debug --rmk user --launcher slurm --demux poll --pgid 24 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_24_2008169530_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 15 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:38265 --debug --rmk user --launcher slurm --demux poll --pgid 24 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_24_2008169530_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=15 [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:23:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:23:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_24_2008169530_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_23_1086250924_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_23_1086250924_n001.cluster.pssclabs.com;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#34201$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 23] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#34201$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 101 pid 4: cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 23] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 101 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_23_1086250924_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 23] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_23_1086250924_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 101 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 23] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_23_1086250924_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 101 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#34201$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_23_1086250924_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#34201$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER 36266 n001.cluster.pssclabs.com 0 t = 7.34586: spawned new worker 7 of 22 sub_id = 6 SPAWNED NEW WORKER 36266 ARGS passed to NeedlesMpiWorker (pid = 36563) NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 6 HERE worker pid =36563 manager_pid = 36266 OPENED SHARED MEMORY 36563 PMI sending: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=7;ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=7 ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=7;ÿÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:41189 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:41189 --debug --rmk user --launcher slurm --demux poll --pgid 25 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_25_1414957313_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 7 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:41189 --debug --rmk user --launcher slurm --demux poll --pgid 25 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_25_1414957313_n001.cluster.pssclabs.com;nerrs=0; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_25_1414957313_n001.cluster.pssclabs.com;nerrs=0; jobid=kvs_36231_22_1068065457_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:22:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:22:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:23:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.23 pmirank=0 threaded=FALSE [proxy:23:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:23:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:23:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_23_1086250924_n001.cluster.pssclabs.com;rc=0; [proxy:23:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:23:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_23_1086250924_n001.cluster.pssclabs.com;rc=0; [proxy:23:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#34201$ifname#172.16.56.1$ [proxy:23:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:23:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:23:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:23:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:23:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:23:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_23_1086250924_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:23:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:23:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:23:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_23_1086250924_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:23:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:23:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:24:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:24:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_24_2008169530_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_24_2008169530_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 24] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#52921$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 107 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#52921$ifname#172.16.56.2$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 24] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 107 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 24] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_24_2008169530_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 107 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_24_2008169530_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_24_2008169530_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 24] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_24_2008169530_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 107 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#52921$ifname#172.16.56.2$;rc=0; [proxy:24:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.24 pmirank=0 threaded=FALSE [proxy:24:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:24:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:24:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_24_2008169530_n001.cluster.pssclabs.com;rc=0; [proxy:24:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:24:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_24_2008169530_n001.cluster.pssclabs.com;rc=0; [proxy:24:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#52921$ifname#172.16.56.2$ [proxy:24:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:24:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:24:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:24:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:24:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:24:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_24_2008169530_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:24:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:24:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:24:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_24_2008169530_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:24:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#52921$ifname#172.16.56.2$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = ARGS passed to NeedlesMpiWorker (pid = 7.46228: spawned new worker340496) NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 16 of 23 -n_managers 2 -works_per_man sub_id = 15 SPAWNED NEW WORKER 23 -n_vars_pm 61 -n_seeds3 340368 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 15 HERE worker pid =340496 manager_pid = 340368 OPENED SHARED MEMORY 340496PMI sending: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=16; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=16; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:37457 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:37457 --debug --rmk user --launcher slurm --demux poll --pgid 26 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_26_163651608_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 16 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:37457 --debug --rmk user --launcher slurm --demux poll --pgid 26 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_26_163651608_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=16 [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:25:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:25:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_25_1414957313_n001.cluster.pssclabs.com;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 87): cmd=spawn-response;rc=0;jobid=kvs_36231_26_163651608_n001.cluster.pssclabs.com;nerrs=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_25_1414957313_n001.cluster.pssclabs.com;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#40783$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 25] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#40783$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 109 pid 4: cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 25] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 109 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] [pgid: 25] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_25_1414957313_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 109 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_25_1414957313_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 25] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_25_1414957313_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 109 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#40783$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_25_1414957313_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#40783$ifname#172.16.56.1$;rc=0; ARGS passed to NeedlesMpiWorker (pid = PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER 36266 n001.cluster.pssclabs.com 0 t = 7.47089: spawned new worker 8 of 22 sub_id = 7 SPAWNED NEW WORKER 36266 36587) NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 7 PMI sending: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=8;ÿÿ HERE worker pid =36587 manager_pid = 36266 OPENED SHARED MEMORY 36587 [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=8;ÿÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:37595 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:37595 --debug --rmk user --launcher slurm --demux poll --pgid 27 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_27_869128119_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 8 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:37595 --debug --rmk user --launcher slurm --demux poll --pgid 27 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_27_869128119_n001.cluster.pssclabs.com;nerrs=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=8 ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 87): cmd=spawn-response;rc=0;jobid=kvs_36231_27_869128119_n001.cluster.pssclabs.com;nerrs=0; [proxy:24:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:25:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.25 pmirank=0 threaded=FALSE [proxy:25:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:25:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:25:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_25_1414957313_n001.cluster.pssclabs.com;rc=0; [proxy:25:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:25:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_25_1414957313_n001.cluster.pssclabs.com;rc=0; [proxy:25:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#40783$ifname#172.16.56.1$ [proxy:25:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:25:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:25:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:25:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:25:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:25:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_25_1414957313_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:25:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:25:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:25:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_25_1414957313_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:25:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:25:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:26:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:26:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_26_163651608_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_26_163651608_n001.cluster.pssclabs.com;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#47401$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 26] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#47401$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 115 pid 4: cmd=kvs-put-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 26] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 115 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 108 cmd=kvs-get;jobid=kvs_36231_26_163651608_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 26] got PMI command: 108 cmd=kvs-get;jobid=kvs_36231_26_163651608_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 115 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_26_163651608_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 26] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_26_163651608_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 115 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#47401$ifname#172.16.56.2$;rc=0; [proxy:26:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.26 pmirank=0 threaded=FALSE [proxy:26:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:26:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:26:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_26_163651608_n001.cluster.pssclabs.com;rc=0; [proxy:26:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:26:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_26_163651608_n001.cluster.pssclabs.com;rc=0; [proxy:26:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#47401$ifname#172.16.56.2$ [proxy:26:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:26:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:26:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:26:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:26:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:26:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_26_163651608_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:26:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:26:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:26:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_26_163651608_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:26:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#47401$ifname#172.16.56.2$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = ARGS passed to NeedlesMpiWorker (pid = 7.59331: spawned new worker 17 of 23 sub_id = 16 SPAWNED NEW WORKER 340506) NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065340368 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 16 HERE worker pid =340506 manager_pid = 340368 OPENED SHARED MEMORY 340506 PMI sending: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=17; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=17; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:34625 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:34625 --debug --rmk user --launcher slurm --demux poll --pgid 28 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_28_775591275_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 17 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:34625 --debug --rmk user --launcher slurm --demux poll --pgid 28 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_28_775591275_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=17 [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI received (cmdlen 87): cmd=spawn-response;rc=0;jobid=kvs_36231_28_775591275_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream [proxy:27:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:27:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_27_869128119_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_27_869128119_n001.cluster.pssclabs.com;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#58447$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 27] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#58447$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 117 pid 4: cmd=kvs-put-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 27] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 117 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 27] got PMI command: 108 cmd=kvs-get;jobid=kvs_36231_27_869128119_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 117 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 108 cmd=kvs-get;jobid=kvs_36231_27_869128119_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 27] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_27_869128119_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 117 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#58447$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_27_869128119_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#58447$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream MANAGER ARGS passed to NeedlesMpiWorker (pid = 36266 n001.cluster.pssclabs.com 0 t = 36611) NeedlesMpiWork NeedlesMpiWork 16433952227.62725: spawned new worker 9 of 22 sub_id = 36266 -job_id 065 -n_managers 28 SPAWNED NEW WORKER 36266 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 8 HERE worker pid =36611 manager_pid = 36266 OPENED SHARED MEMORY 36611 PMI sending: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=9;ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 556 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=9;ÿÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:38551 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:38551 --debug --rmk user --launcher slurm --demux poll --pgid 29 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_29_1721096767_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 9 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:38551 --debug --rmk user --launcher slurm --demux poll --pgid 29 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_29_1721096767_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=9 ÿÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_29_1721096767_n001.cluster.pssclabs.com;nerrs=0; [proxy:26:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:27:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.27 pmirank=0 threaded=FALSE [proxy:27:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:27:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:27:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_27_869128119_n001.cluster.pssclabs.com;rc=0; [proxy:27:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:27:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_27_869128119_n001.cluster.pssclabs.com;rc=0; [proxy:27:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#58447$ifname#172.16.56.1$ [proxy:27:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:27:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:27:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:27:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:27:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:27:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_27_869128119_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:27:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:27:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:27:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_27_869128119_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:27:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:27:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:28:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:28:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_28_775591275_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_28_775591275_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 28] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#37253$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 123 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#37253$ifname#172.16.56.2$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 28] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 123 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 108 cmd=kvs-get;jobid=kvs_36231_28_775591275_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 28] got PMI command: 108 cmd=kvs-get;jobid=kvs_36231_28_775591275_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 123 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_28_775591275_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 28] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_28_775591275_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 123 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#37253$ifname#172.16.56.2$;rc=0; [proxy:28:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.28 pmirank=0 threaded=FALSE [proxy:28:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:28:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:28:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_28_775591275_n001.cluster.pssclabs.com;rc=0; [proxy:28:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:28:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_28_775591275_n001.cluster.pssclabs.com;rc=0; [proxy:28:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#37253$ifname#172.16.56.2$ [proxy:28:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:28:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:28:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:28:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:28:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:28:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_28_775591275_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:28:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:28:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:28:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_28_775591275_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:28:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#37253$ifname#172.16.56.2$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream ARGS passed to NeedlesMpiWorker (pid = 340516) NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managersPMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = 7.72443: spawned new worker 18 of 23 sub_id = 17 SPAWNED NEW WORKER 340368 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 17 HERE worker pid =340516 manager_pid = 340368 OPENED SHARED MEMORY 340516 PMI sending: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=18; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=18; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:46107 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:46107 --debug --rmk user --launcher slurm --demux poll --pgid 30 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_30_1555946411_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 18 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:46107 --debug --rmk user --launcher slurm --demux poll --pgid 30 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_30_1555946411_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=18 [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_30_1555946411_n001.cluster.pssclabs.com;nerrs=0; [proxy:29:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:29:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_29_1721096767_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; [proxy:28:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_29_1721096767_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 29] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#52309$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 125 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#52309$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 29] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 125 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 29] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_29_1721096767_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 125 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_29_1721096767_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_29_1721096767_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 29] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_29_1721096767_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 125 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#52309$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#52309$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream MANAGER ARGS passed to NeedlesMpiWorker (pid = 36266 n001.cluster.pssclabs.com 0 t = 7.76898: 36635) NeedlesMpiWork NeedlesMpiWork spawned new worker 10 of 22 sub_id = 91643395222 36266 -job_id 065 -n_managers SPAWNED NEW WORKER 36266 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 9 HERE worker pid =36635 manager_pid = 36266 OPENED SHARED MEMORY 36635 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=10;ÿ [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=10;ÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:41145 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:41145 --debug --rmk user --launcher slurm --demux poll --pgid 31 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_31_24448425_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 10 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:41145 --debug --rmk user --launcher slurm --demux poll --pgid 31 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_31_24448425_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=10 ÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 86): cmd=spawn-response;rc=0;jobid=kvs_36231_31_24448425_n001.cluster.pssclabs.com;nerrs=0; [proxy:29:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.29 pmirank=0 threaded=FALSE [proxy:29:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:29:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:29:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_29_1721096767_n001.cluster.pssclabs.com;rc=0; [proxy:29:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:29:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_29_1721096767_n001.cluster.pssclabs.com;rc=0; [proxy:29:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#52309$ifname#172.16.56.1$ [proxy:29:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:29:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:29:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:29:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:29:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:29:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_29_1721096767_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:29:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:29:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:29:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_29_1721096767_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:29:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:29:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:30:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:30:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_30_1555946411_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_30_1555946411_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 30] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#52793$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 131 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#52793$ifname#172.16.56.2$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 30] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 131 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_30_1555946411_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 30] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_30_1555946411_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 131 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_30_1555946411_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 30] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_30_1555946411_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 131 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#52793$ifname#172.16.56.2$;rc=0; [proxy:30:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.30 pmirank=0 threaded=FALSE [proxy:30:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:30:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:30:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_30_1555946411_n001.cluster.pssclabs.com;rc=0; [proxy:30:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:30:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_30_1555946411_n001.cluster.pssclabs.com;rc=0; [proxy:30:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#52793$ifname#172.16.56.2$ [proxy:30:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:30:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:30:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:30:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:30:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:30:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_30_1555946411_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:30:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:30:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:30:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_30_1555946411_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:30:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#52793$ifname#172.16.56.2$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream ARGS passed to NeedlesMpiWorker (pid = 340526) NeedlesMpiWork NeedlesMpiWorkPMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = 7.87346: spawned new worker 19 of 23 sub_id = 18 SPAWNED NEW WORKER 340368 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 18 HERE worker pid =340526 manager_pid = 340368 OPENED SHARED MEMORY 340526 PMI sending: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=19; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=19; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:35577 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:35577 --debug --rmk user --launcher slurm --demux poll --pgid 32 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_32_1761375713_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 19 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:35577 --debug --rmk user --launcher slurm --demux poll --pgid 32 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_32_1761375713_n001.cluster.pssclabs.com;nerrs=0; ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=19 [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_32_1761375713_n001.cluster.pssclabs.com;nerrs=0; [proxy:30:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:31:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:31:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 82): cmd=job-getid-response;jobid=kvs_36231_31_24448425_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 82): cmd=job-getid-response;jobid=kvs_36231_31_24448425_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 31] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#37341$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 133 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#37341$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 31] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 133 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] [pgid: 31] got PMI command: 107 cmd=kvs-get;jobid=kvs_36231_31_24448425_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 133 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 107 cmd=kvs-get;jobid=kvs_36231_31_24448425_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 31] got PMI command: 101 cmd=kvs-get;jobid=kvs_36231_31_24448425_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 133 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#37341$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 101 cmd=kvs-get;jobid=kvs_36231_31_24448425_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#37341$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER 36266 n001.cluster.pssclabs.com 0 t = 7.92492: spawned new worker 11 of 22 sub_id = 10 SPAWNED NEW WORKER 36266 ARGS passed to NeedlesMpiWorker (pid = 36659) NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 10 HERE worker pid =36659 manager_pid = 36266 [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=11 ÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=11;ÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:35609 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:35609 --debug --rmk user --launcher slurm --demux poll --pgid 33 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_33_513435412_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 11 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:35609 --debug --rmk user --launcher slurm --demux poll --pgid 33 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_33_513435412_n001.cluster.pssclabs.com;nerrs=0; PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=11;ÿ OPENED SHARED MEMORY 36659 [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 87): cmd=spawn-response;rc=0;jobid=kvs_36231_33_513435412_n001.cluster.pssclabs.com;nerrs=0; [proxy:31:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.31 pmirank=0 threaded=FALSE [proxy:31:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:31:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:31:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_31_24448425_n001.cluster.pssclabs.com;rc=0; [proxy:31:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:31:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_31_24448425_n001.cluster.pssclabs.com;rc=0; [proxy:31:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#37341$ifname#172.16.56.1$ [proxy:31:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:31:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:31:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:31:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:31:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:31:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_31_24448425_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:31:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:31:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:31:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_31_24448425_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:31:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:31:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:32:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:32:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_32_1761375713_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_32_1761375713_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 32] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#45231$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 139 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#45231$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 32] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 139 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] [pgid: 32] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_32_1761375713_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 139 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_32_1761375713_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 32] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_32_1761375713_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 139 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#45231$ifname#172.16.56.2$;rc=0; [proxy:32:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.32 pmirank=0 threaded=FALSE [proxy:32:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:32:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:32:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_32_1761375713_n001.cluster.pssclabs.com;rc=0; [proxy:32:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:32:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_32_1761375713_n001.cluster.pssclabs.com;rc=0; [proxy:32:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#45231$ifname#172.16.56.2$ [proxy:32:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:32:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:32:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:32:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:32:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:32:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_32_1761375713_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:32:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:32:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:32:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_32_1761375713_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:32:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_32_1761375713_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#45231$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = 8.02052: spawned new worker 20 of 23 sub_id = 19 SPAWNED NEW WORKER 340368 [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=20; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:38491 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:38491 --debug --rmk user --launcher slurm --demux poll --pgid 34 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_34_1269418524_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 20 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:38491 --debug --rmk user --launcher slurm --demux poll --pgid 34 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_34_1269418524_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=20 [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI sending: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=20; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_34_1269418524_n001.cluster.pssclabs.com;nerrs=0; ARGS passed to NeedlesMpiWorker (pid = 340536) NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 19 HERE worker pid =340536 manager_pid = 340368 OPENED SHARED MEMORY 340536 [proxy:32:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:33:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:33:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_33_513435412_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_33_513435412_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 33] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#51489$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 141 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#51489$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 33] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 141 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] [pgid: 33] got PMI command: 108 cmd=kvs-get;jobid=kvs_36231_33_513435412_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 141 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 108 cmd=kvs-get;jobid=kvs_36231_33_513435412_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 33] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_33_513435412_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 141 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#51489$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_33_513435412_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER 36266 n001.cluster.pssclabs.com 0 t = 8.08221: spawned new worker 12 of 22 sub_id = 11 SPAWNED NEW WORKER 36266 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=12;ÿ [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=12;ÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:38909 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:38909 --debug --rmk user --launcher slurm --demux poll --pgid 35 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_35_1546617336_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 12 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:38909 --debug --rmk user --launcher slurm --demux poll --pgid 35 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_35_1546617336_n001.cluster.pssclabs.com;nerrs=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=12 ÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_35_1546617336_n001.cluster.pssclabs.com;nerrs=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#51489$ifname#172.16.56.1$;rc=0; ARGS passed to NeedlesMpiWorker (pid = 36683) NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 11 HERE worker pid =36683 manager_pid = 36266 OPENED SHARED MEMORY 36683 [proxy:33:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.33 pmirank=0 threaded=FALSE [proxy:33:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:33:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:33:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_33_513435412_n001.cluster.pssclabs.com;rc=0; [proxy:33:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:33:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_33_513435412_n001.cluster.pssclabs.com;rc=0; [proxy:33:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#51489$ifname#172.16.56.1$ [proxy:33:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:33:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:33:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:33:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:33:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:33:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_33_513435412_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:33:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:33:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:33:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_33_513435412_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:33:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:33:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:34:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:34:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_34_1269418524_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_34_1269418524_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 34] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#32911$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 144 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#32911$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 34] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 144 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] [pgid: 34] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_34_1269418524_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 144 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_34_1269418524_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 34] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_34_1269418524_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 144 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#32911$ifname#172.16.56.2$;rc=0; [proxy:34:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.34 pmirank=0 threaded=FALSE [proxy:34:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:34:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:34:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_34_1269418524_n001.cluster.pssclabs.com;rc=0; [proxy:34:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:34:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_34_1269418524_n001.cluster.pssclabs.com;rc=0; [proxy:34:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#32911$ifname#172.16.56.2$ [proxy:34:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:34:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:34:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:34:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:34:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:34:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_34_1269418524_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:34:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:34:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:34:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_34_1269418524_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:34:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_34_1269418524_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = 8.17191: spawned new worker 21 of 23 sub_id = 20 SPAWNED NEW WORKER 340368 PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#32911$ifname#172.16.56.2$;rc=0; ARGS passed to NeedlesMpiWorker (pid = 340546) NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 20 HERE worker pid =340546 manager_pid = 340368 OPENED SHARED MEMORY 340546 [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=21; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:36713 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:36713 --debug --rmk user --launcher slurm --demux poll --pgid 36 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_36_1670409615_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 21 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:36713 --debug --rmk user --launcher slurm --demux poll --pgid 36 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_36_1670409615_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=21 [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI sending: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=21; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_36_1670409615_n001.cluster.pssclabs.com;nerrs=0; [proxy:35:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:35:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 [proxy:34:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_35_1546617336_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_35_1546617336_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 35] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#52683$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 148 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#52683$ifname#172.16.56.1$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 35] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 148 pid 4: cmd=kvs-fence-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_35_1546617336_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 35] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_35_1546617336_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 148 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_35_1546617336_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 35] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_35_1546617336_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 148 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#52683$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#52683$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream ARGS passed to NeedlesMpiWorker (pid = 36707) NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER 36266 n001.cluster.pssclabs.com 0 t = 8.21823: spawned new worker 13 of 22 sub_id = 12 SPAWNED NEW WORKER 36266 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 12 HERE worker pid =36707 manager_pid = 36266 OPENED SHARED MEMORY 36707 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=13;ÿ [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=13 ÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=13;ÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:46139 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:46139 --debug --rmk user --launcher slurm --demux poll --pgid 37 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_37_1767134967_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 13 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:46139 --debug --rmk user --launcher slurm --demux poll --pgid 37 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_37_1767134967_n001.cluster.pssclabs.com;nerrs=0; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_37_1767134967_n001.cluster.pssclabs.com;nerrs=0; [proxy:35:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.35 pmirank=0 threaded=FALSE [proxy:35:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:35:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:35:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_35_1546617336_n001.cluster.pssclabs.com;rc=0; [proxy:35:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:35:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_35_1546617336_n001.cluster.pssclabs.com;rc=0; [proxy:35:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#52683$ifname#172.16.56.1$ [proxy:35:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:35:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:35:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:35:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:35:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:35:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_35_1546617336_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:35:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:35:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:35:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_35_1546617336_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:35:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:35:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:36:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:36:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_36_1670409615_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_36_1670409615_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 36] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#43687$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 155 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#43687$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 36] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 155 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] [pgid: 36] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_36_1670409615_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 155 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_36_1670409615_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 36] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_36_1670409615_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 155 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#43687$ifname#172.16.56.2$;rc=0; [proxy:36:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.36 pmirank=0 threaded=FALSE [proxy:36:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:36:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:36:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_36_1670409615_n001.cluster.pssclabs.com;rc=0; [proxy:36:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:36:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_36_1670409615_n001.cluster.pssclabs.com;rc=0; [proxy:36:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#43687$ifname#172.16.56.2$ [proxy:36:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:36:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:36:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:36:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:36:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:36:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_36_1670409615_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:36:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:36:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:36:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_36_1670409615_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:36:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_36_1670409615_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#43687$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = 8.3147: spawned new worker 22 of 23 sub_id = 21 SPAWNED NEW WORKER 340368 [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=22; [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:45827 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:45827 --debug --rmk user --launcher slurm --demux poll --pgid 38 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_38_563385838_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_0_1116582517_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 22 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:45827 --debug --rmk user --launcher slurm --demux poll --pgid 38 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_38_563385838_n001.cluster.pssclabs.com;nerrs=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=340368 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=23 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=1 argv25=-worker_rank argv26=22 [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI sending: 558 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n002$port#51287$ifname#172.16.56.2$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=340368;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=23;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=1;argv25=-worker_rank;argv26=22; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 87): cmd=spawn-response;rc=0;jobid=kvs_36231_38_563385838_n001.cluster.pssclabs.com;nerrs=0; ARGS passed to NeedlesMpiWorker (pid = 340556) NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 21 HERE worker pid =340556 manager_pid = 340368 OPENED SHARED MEMORY 340556 [proxy:37:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:37:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_37_1767134967_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_37_1767134967_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 37] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#42871$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 157 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#42871$ifname#172.16.56.1$; [proxy:36:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 37] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 157 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 37] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_37_1767134967_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 157 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_37_1767134967_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 37] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_37_1767134967_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 157 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#42871$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_37_1767134967_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER 36266 n001.cluster.pssclabs.com 0 t = 8.35825: spawned new worker 14 of 22 sub_id = 13 SPAWNED NEW WORKER 36266 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=14;ÿ PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#42871$ifname#172.16.56.1$;rc=0; ARGS passed to NeedlesMpiWorker (pid = 36731) NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 13 HERE worker pid =36731 manager_pid = 36266 OPENED SHARED MEMORY 36731 [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=14;ÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:37861 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:37861 --debug --rmk user --launcher slurm --demux poll --pgid 39 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_39_1104837659_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 14 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:37861 --debug --rmk user --launcher slurm --demux poll --pgid 39 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_39_1104837659_n001.cluster.pssclabs.com;nerrs=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=14 ÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_39_1104837659_n001.cluster.pssclabs.com;nerrs=0; [proxy:37:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.37 pmirank=0 threaded=FALSE [proxy:37:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:37:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:37:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_37_1767134967_n001.cluster.pssclabs.com;rc=0; [proxy:37:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:37:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_37_1767134967_n001.cluster.pssclabs.com;rc=0; [proxy:37:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#42871$ifname#172.16.56.1$ [proxy:37:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:37:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:37:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:37:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:37:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:37:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_37_1767134967_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:37:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:37:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:37:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_37_1767134967_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:37:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:37:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:38:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:38:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_38_563385838_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_38_563385838_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 38] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#46115$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 163 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#46115$ifname#172.16.56.2$; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] [pgid: 38] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 163 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 38] got PMI command: 108 cmd=kvs-get;jobid=kvs_36231_38_563385838_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 163 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 108 cmd=kvs-get;jobid=kvs_36231_38_563385838_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n002$port#51287$ifname#172.16.56.2$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_38_563385838_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 38] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_38_563385838_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 163 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#46115$ifname#172.16.56.2$;rc=0; [proxy:38:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.38 pmirank=0 threaded=FALSE [proxy:38:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:38:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:38:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_38_563385838_n001.cluster.pssclabs.com;rc=0; [proxy:38:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:38:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_38_563385838_n001.cluster.pssclabs.com;rc=0; [proxy:38:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#46115$ifname#172.16.56.2$ [proxy:38:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:38:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:38:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:38:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:38:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:38:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_38_563385838_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:38:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:38:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:38:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_38_563385838_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:38:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#46115$ifname#172.16.56.2$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 0] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 9 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; [proxy:0:1@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com srcid=-1 key=P1-businesscard [proxy:0:1@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#54399$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_0_1116582517_n001.cluster.pssclabs.com;srcid=-1;key=P1-businesscard; [proxy:0:1@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream ARGS passed to NeedlesMpiWorker (pid = 340566) NeedlesMpiWork NeedlesMpiWork 1643395222 340368 -job_id 065 -n_managers 2 -works_per_man 23 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 1 -worker_rank 22 HERE worker pid =340566 manager_pid = 340368 OPENED SHARED MEMORY 340566 PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#51287$ifname#172.16.56.2$;rc=0; MANAGER 340368 n002.cluster.pssclabs.com 1 t = 8.48824: spawned new worker 23 of 23 sub_id = 22 [proxy:39:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:39:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_39_1104837659_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_39_1104837659_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 39] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#33309$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 165 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#33309$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 39] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 165 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 39] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_39_1104837659_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 165 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_39_1104837659_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 39] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_39_1104837659_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 165 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#33309$ifname#172.16.56.1$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_39_1104837659_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#33309$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER 36266 n001.cluster.pssclabs.com 0 t = 8.51012: spawned new worker 15 of 22 sub_id = 14 SPAWNED NEW WORKER 36266 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=15;ÿ ARGS passed to NeedlesMpiWorker (pid = 36755) NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 14 HERE worker pid =36755 manager_pid = 36266 OPENED SHARED MEMORY 36755 [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=15 ÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=15;ÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:46077 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:46077 --debug --rmk user --launcher slurm --demux poll --pgid 40 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_40_1067059121_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 15 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:46077 --debug --rmk user --launcher slurm --demux poll --pgid 40 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_40_1067059121_n001.cluster.pssclabs.com;nerrs=0; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_40_1067059121_n001.cluster.pssclabs.com;nerrs=0; [proxy:38:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:39:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.39 pmirank=0 threaded=FALSE [proxy:39:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:39:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:39:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_39_1104837659_n001.cluster.pssclabs.com;rc=0; [proxy:39:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:39:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_39_1104837659_n001.cluster.pssclabs.com;rc=0; [proxy:39:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#33309$ifname#172.16.56.1$ [proxy:39:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:39:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:39:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:39:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:39:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:39:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_39_1104837659_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:39:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:39:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:39:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_39_1104837659_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:39:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:39:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:40:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:40:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_40_1067059121_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_40_1067059121_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 40] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#50087$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 168 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#50087$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 40] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 168 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 40] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_40_1067059121_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 168 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_40_1067059121_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 40] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_40_1067059121_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 168 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#50087$ifname#172.16.56.2$;rc=0; [proxy:40:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.40 pmirank=0 threaded=FALSE [proxy:40:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:40:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:40:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_40_1067059121_n001.cluster.pssclabs.com;rc=0; [proxy:40:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:40:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_40_1067059121_n001.cluster.pssclabs.com;rc=0; [proxy:40:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#50087$ifname#172.16.56.2$ [proxy:40:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:40:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:40:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:40:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:40:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:40:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_40_1067059121_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:40:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:40:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:40:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_40_1067059121_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:40:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_40_1067059121_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER 36266 n001.cluster.pssclabs.com 0 t = 8.64237: spawned new worker 16 of 22 sub_id = 15 SPAWNED NEW WORKER 36266 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=16;ÿ [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=16 ÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=16;ÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:46693 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:46693 --debug --rmk user --launcher slurm --demux poll --pgid 41 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_41_1785028386_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 16 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:46693 --debug --rmk user --launcher slurm --demux poll --pgid 41 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_41_1785028386_n001.cluster.pssclabs.com;nerrs=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#50087$ifname#172.16.56.2$;rc=0; ARGS passed to NeedlesMpiWorker (pid = 340576) NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 15 HERE worker pid =340576 manager_pid = 36266 OPENED SHARED MEMORY 340576 WORKER 340576 n002.cluster.pssclabs.com 0 t = 8.6441: suicide due to failed manager: pid = 36266 [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_41_1785028386_n001.cluster.pssclabs.com;nerrs=0; [proxy:40:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:41:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:41:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_41_1785028386_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_41_1785028386_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 41] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#36283$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 172 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#36283$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 41] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 172 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] [pgid: 41] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_41_1785028386_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 172 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_41_1785028386_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 41] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_41_1785028386_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 172 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#36283$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_41_1785028386_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER 36266 n001.cluster.pssclabs.com 0 t = 8.79281: spawned new worker 17 of 22 sub_id = 16 SPAWNED NEW WORKER 36266 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=17;ÿ [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=17;ÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:35589 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:35589 --debug --rmk user --launcher slurm --demux poll --pgid 42 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_42_1155189409_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 17 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:35589 --debug --rmk user --launcher slurm --demux poll --pgid 42 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_42_1155189409_n001.cluster.pssclabs.com;nerrs=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=17 ÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_42_1155189409_n001.cluster.pssclabs.com;nerrs=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#36283$ifname#172.16.56.1$;rc=0; ARGS passed to NeedlesMpiWorker (pid = 36779) NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 16 HERE worker pid =36779 manager_pid = 36266 OPENED SHARED MEMORY 36779 [proxy:41:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.41 pmirank=0 threaded=FALSE [proxy:41:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:41:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:41:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_41_1785028386_n001.cluster.pssclabs.com;rc=0; [proxy:41:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:41:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_41_1785028386_n001.cluster.pssclabs.com;rc=0; [proxy:41:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#36283$ifname#172.16.56.1$ [proxy:41:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:41:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:41:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:41:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:41:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:41:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_41_1785028386_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:41:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:41:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:41:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_41_1785028386_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:41:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:41:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:42:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:42:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_42_1155189409_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_42_1155189409_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 42] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#59693$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 176 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#59693$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 42] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 176 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] [pgid: 42] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_42_1155189409_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 176 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_42_1155189409_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 42] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_42_1155189409_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 176 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#59693$ifname#172.16.56.2$;rc=0; [proxy:42:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.42 pmirank=0 threaded=FALSE [proxy:42:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:42:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:42:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_42_1155189409_n001.cluster.pssclabs.com;rc=0; [proxy:42:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:42:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_42_1155189409_n001.cluster.pssclabs.com;rc=0; [proxy:42:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#59693$ifname#172.16.56.2$ [proxy:42:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:42:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:42:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:42:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:42:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:42:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_42_1155189409_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:42:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:42:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:42:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_42_1155189409_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:42:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_42_1155189409_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER 36266 n001.cluster.pssclabs.com 0 t = 8.94421: spawned new worker 18 of 22 sub_id = 17 SPAWNED NEW WORKER 36266 PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=18;ÿ [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=18 ÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=18;ÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:33561 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:33561 --debug --rmk user --launcher slurm --demux poll --pgid 43 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_43_239470941_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 18 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:33561 --debug --rmk user --launcher slurm --demux poll --pgid 43 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_43_239470941_n001.cluster.pssclabs.com;nerrs=0; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#59693$ifname#172.16.56.2$;rc=0; ARGS passed to NeedlesMpiWorker (pid = 340586) NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 17 HERE worker pid =340586 manager_pid = 36266 OPENED SHARED MEMORY 340586 [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 87): cmd=spawn-response;rc=0;jobid=kvs_36231_43_239470941_n001.cluster.pssclabs.com;nerrs=0; WORKER 340586 n002.cluster.pssclabs.com 0 t = 8.94592: suicide due to failed manager: pid = 36266 [proxy:42:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:43:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:43:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_43_239470941_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_43_239470941_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 43] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#42873$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 180 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#42873$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 43] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 180 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] [pgid: 43] got PMI command: 108 cmd=kvs-get;jobid=kvs_36231_43_239470941_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 180 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 108 cmd=kvs-get;jobid=kvs_36231_43_239470941_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 43] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_43_239470941_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 180 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#42873$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_43_239470941_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#42873$ifname#172.16.56.1$;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream ARGS passed to NeedlesMpiWorker (pid = 36803) NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER 36266 n001.cluster.pssclabs.com 0 t = 9.10162: spawned new worker 19 of 22 sub_id = 18 SPAWNED NEW WORKER 36266 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 18 HERE worker pid =36803 manager_pid = 36266 [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=19;ÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:39713 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:39713 --debug --rmk user --launcher slurm --demux poll --pgid 44 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_44_1538980619_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 19 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:39713 --debug --rmk user --launcher slurm --demux poll --pgid 44 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_44_1538980619_n001.cluster.pssclabs.com;nerrs=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=19 ÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=19;ÿ OPENED SHARED MEMORY 36803 [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 88): cmd=spawn-response;rc=0;jobid=kvs_36231_44_1538980619_n001.cluster.pssclabs.com;nerrs=0; [proxy:43:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.43 pmirank=0 threaded=FALSE [proxy:43:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:43:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:43:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_43_239470941_n001.cluster.pssclabs.com;rc=0; [proxy:43:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:43:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_43_239470941_n001.cluster.pssclabs.com;rc=0; [proxy:43:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#42873$ifname#172.16.56.1$ [proxy:43:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:43:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:43:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:43:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:43:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:43:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_43_239470941_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:43:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:43:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:43:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_43_239470941_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:43:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:43:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:44:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:44:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_44_1538980619_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 84): cmd=job-getid-response;jobid=kvs_36231_44_1538980619_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 44] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#56965$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 184 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#56965$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 44] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 184 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] [pgid: 44] got PMI command: 109 cmd=kvs-get;jobid=kvs_36231_44_1538980619_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 184 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 109 cmd=kvs-get;jobid=kvs_36231_44_1538980619_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [proxy:44:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.44 pmirank=0 threaded=FALSE [proxy:44:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:44:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:44:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_44_1538980619_n001.cluster.pssclabs.com;rc=0; [proxy:44:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:44:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_44_1538980619_n001.cluster.pssclabs.com;rc=0; [proxy:44:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#56965$ifname#172.16.56.2$ [proxy:44:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:44:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:44:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:44:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:44:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:44:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_44_1538980619_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:44:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:44:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:44:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_44_1538980619_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:44:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 44] got PMI command: 103 cmd=kvs-get;jobid=kvs_36231_44_1538980619_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 184 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#56965$ifname#172.16.56.2$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 103 cmd=kvs-get;jobid=kvs_36231_44_1538980619_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER 36266 n001.cluster.pssclabs.com 0 t = 9.2722: spawned new worker 20 of 22 sub_id = 19 SPAWNED NEW WORKER 36266 [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=20 ÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=20;ÿ PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#56965$ifname#172.16.56.2$;rc=0; ARGS passed to NeedlesMpiWorker (pid = 340596) NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 19 HERE worker pid =340596 manager_pid = 36266 OPENED SHARED MEMORY 340596 [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=20;ÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:35581 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:35581 --debug --rmk user --launcher slurm --demux poll --pgid 45 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n001 --global-core-map 0,1,2 --pmi-id-map 0,0 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_45_1174794_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(0,1,1)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 20 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n001, -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:35581 --debug --rmk user --launcher slurm --demux poll --pgid 45 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_45_1174794_n001.cluster.pssclabs.com;nerrs=0; WORKER 340596 n002.cluster.pssclabs.com 0 t = 9.27389: suicide due to failed manager: pid = 36266 [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream PMI received (cmdlen 85): cmd=spawn-response;rc=0;jobid=kvs_36231_45_1174794_n001.cluster.pssclabs.com;nerrs=0; [proxy:44:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:45:0@n001.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:45:0@n001.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 81): cmd=job-getid-response;jobid=kvs_36231_45_1174794_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 81): cmd=job-getid-response;jobid=kvs_36231_45_1174794_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 45] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#44083$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 188 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n001$port#44083$ifname#172.16.56.1$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 45] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 188 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] [pgid: 45] got PMI command: 106 cmd=kvs-get;jobid=kvs_36231_45_1174794_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 188 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 106 cmd=kvs-get;jobid=kvs_36231_45_1174794_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] [pgid: 45] got PMI command: 100 cmd=kvs-get;jobid=kvs_36231_45_1174794_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 188 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#44083$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 100 cmd=kvs-get;jobid=kvs_36231_45_1174794_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER 36266 n001.cluster.pssclabs.com 0 t = 9.4574: spawned new worker 21 of 22 sub_id = 20 SPAWNED NEW WORKER 36266 PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#44083$ifname#172.16.56.1$;rc=0; ARGS passed to NeedlesMpiWorker (pid = 36827) NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 20 HERE worker pid =36827 manager_pid = 36266 [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): spawn ncmds=1 preputcount=1 ppkey0=PARENT_ROOT_PORT_NAME ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$ subcmd=NeedlesMpiWork maxprocs=1 argc=27 argv0=NeedlesMpiWork argv1=1643395222 argv2=36266 argv3=-job_id argv4=065 argv5=-n_managers argv6=2 argv7=-works_per_man argv8=22 argv9=-n_vars_pm argv10=61 argv11=-n_seeds3 argv12=2003 argv13=-n_ndxs argv14=1 argv15=-dispersions_ndx argv16=0 argv17=-features_ndx argv18=4 argv19=-metrics_ndx argv20=54 argv21=-metric argv22=59 argv23=-manager_rank argv24=0 argv25=-worker_rank argv26=21 ÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command spawn; forwarding upstream [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=21;ÿ [mpiexec@n001.cluster.pssclabs.com] Got a control port string of n001.cluster.pssclabs.com:40503 Proxy launch args: /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:40503 --debug --rmk user --launcher slurm --demux poll --pgid 46 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id Arguments being passed to proxy 0: --version 4.0rc3 --iface-ip-env-name MPIR_CVAR_CH3_INTERFACE_HOSTNAME --hostname n002 --global-core-map 1,1,2 --pmi-id-map 0,1 --global-process-count 1 --auto-cleanup 1 --pmi-kvsname kvs_36231_46_204239162_n001.cluster.pssclabs.com --pmi-spawner-kvsname kvs_36231_1_2068064012_n001.cluster.pssclabs.com --pmi-process-mapping (vector,(1,1,2)) --global-inherited-env 117 'SLURM_MPI_TYPE=pmi2' 'LD_LIBRARY_PATH=/opt/mpich/lib:/opt/boost/lib:/opt/mpich/lib:/opt/boost/lib:/usr/local/cuda/lib64:/opt/mpich/lib:/opt/boost/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:/usr/local/cuda/lib64' 'LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:' 'ODBG=' 'SLURM_TASK_PID=36228' 'NEEDLES_NUM_MANAGERS=2' 'SSH_CONNECTION=156.68.206.153 63270 128.158.5.241 22' 'SLURM_PRIO_PROCESS=0' 'n_vars=-n_vars 54' 'CDC_PREW2KHOST=rocci' 'all_orders=-all_orders 0' 'MODULES_RUN_QUARANTINE=LD_LIBRARY_PATH LD_PRELOAD' 'LANG=en_US.UTF-8' 'SLURM_SUBMIT_DIR=/home/kmccall/Needles2' 'HISTCONTROL=ignoredups' 'NEEDLES_INPUT_FILE=haystack_out.bin' 'DISPLAY=localhost:20.0' 'HOSTNAME=n001' 'OLDPWD=/home/kmccall/Needles2' 'NEEDLES_OUTPUT_FILE=needles_out.txt' 'ENVIRONMENT=BATCH' 'PATH_modshare=/usr/sbin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:1:/usr/bin:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:1:/usr/local/sbin:1:/opt/pssc/bin:1:/usr/local/cuda/bin:1:/usr/share/Modules/bin:1:/usr/local/bin:1' 'LOADEDMODULES_modshare=intel/intelmpi:1' 'CDC_JOINED_ZONE=CN=MSMCS,CN=NASA,CN=Zones,CN=Centrify,CN=Program Data,DC=ndc,DC=nasa,DC=gov' 'SLURM_JOB_GID=513' 'CDC_LOCALHOST=rocci.ndc.nasa.gov' 'work_dir=/home/kmccall/Needles2' 'SLURMD_NODENAME=n001' 'FI_PROVIDER_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/prov' 'met_name=-metric m_disp_contin' 'which_declare=declare -f' 'LD_LIBRARY_PATH_modshare=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release:1:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib:1:/usr/local/cuda/lib64:1' 'CLASSPATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/mpi.jar' 'XDG_SESSION_ID=187440' 'MODULES_CMD=/usr/share/Modules/libexec/modulecmd.tcl' 'USER=kmccall' 'XTERM_SHELL=/bin/bash' 'man_mas_cmd=./NeedlesMpiMM' 'job_id_cmd=-job_id 065' 'PWD=/home/kmccall/Needles2' 'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 'SLURM_JOB_NODELIST=n[001-002]' 'HOME=/home/kmccall' 'SLURM_CLUSTER_NAME=cluster' 'SSH_CLIENT=156.68.206.153 63270 22' 'SLURM_NTASKS=2' 'SLURM_JOB_CPUS_PER_NODE=24(x2)' 'XDG_DATA_DIRS=/home/kmccall/.local/share/flatpak/exports/share:/var/lib/flatpak/exports/share:/usr/local/share:/usr/share' 'SLURM_TOPOLOGY_ADDR=n001' '_LMFILES__modshare=/opt/modulefiles/intel/intelmpi:1' 'debug_args=xterm -hold -display localhost:20.0 -e gdb -x /home/kmccall/Needles2/gdb_cmds -args ' 'SLURM_WORKING_CLUSTER=cluster:rocci.ndc.nasa.gov:6817:8960:101' 'SLURM_JOB_NAME=RunNeedles.rocci.bash' 'TMPDIR=/tmp' 'LIBRARY_PATH=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib' 'NEEDLES_METRIC_NAME=m_disp_contin' 'SLURM_CONF=/var/spool/slurm/conf-cache/slurm.conf' 'n_ndxs=-n_ndxs 1' 'LOADEDMODULES=intel/intelmpi' 'XTERM_VERSION=XTerm(331)' 'SLURM_NODE_ALIASES=(null)' 'SLURM_JOB_QOS=normal' 'SLURM_TOPOLOGY_ADDR_PATTERN=node' 'DA_SESSION_ID_AUTH=92b69ce4-0909-ec4a-8e6d-7c48125b3947' 'MAIL=/var/spool/mail/kmccall' 'SLURM_JOB_NUM_NODES=2' 'epoch_arg=-epoch 1643395222' 'SHELL=/bin/bash' 'TERM=xterm' 'EPOCH=1643395222' 'SLURM_JOB_UID=384580260' 'TC_LIB_DIR=/usr/lib64/tc' 'NEEDLES_NUM_NDXS=1' 'STRACE=' 'output_file=-output_file needles_out.txt' 'SLURM_JOB_PARTITION=normal' 'num_proc=2' 'NEEDLES_NUM_WORKERS=23' 'job_id=065' 'SLURM_JOB_USER=kmccall' 'CDC_JOINED_DC=ndmsadc11.ndc.nasa.gov' 'NEEDLES_ALL_ORDERS=0' 'SHLVL=5' 'SLURM_SUBMIT_HOST=rocci.ndc.nasa.gov' 'SLURM_JOB_ACCOUNT=users' 'CDC_JOINED_DOMAIN=ndc.nasa.gov' 'MANPATH=::' 'worker_cmd=-worker_cmd NeedlesMpiWork' 'WINDOWID=56623138' 'workers_per_manager=-works_per_man 23' 'manager_cmd=-manager_cmd NeedlesMpiMM' 'GDK_BACKEND=x11' 'MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles:/opt/modulefiles:/opt/modulefiles' 'SLURM_GTIDS=0' 'LOGNAME=kmccall' 'DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-T1aaj4EzvL,guid=27fd49c3e19718eb96fef2db61f41756' 'XDG_RUNTIME_DIR=/run/user/384580260' 'MODULEPATH_modshare=/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1' 'PATH=.:/opt/mpich/bin:/opt/mpich/bin:/usr/local/cuda/bin:.:/home/kmccall/.local/bin:/home/kmccall/bin:/opt/mpich/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/bin:/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi/intel64/bin:/usr/local/cuda/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/pssc/bin:/opt/pssc/bin' 'SLURM_JOB_ID=37065' '_LMFILES_=/opt/modulefiles/intel/intelmpi' 'in_file=-input_file haystack_out.bin' 'MODULESHOME=/usr/share/Modules' 'I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.2.254/linux/mpi' 'HISTSIZE=1000' 'USER_PRINCIPAL_NAME=kmccall@NDC.NASA.GOV' 'NEEDLES_NUM_VARS=54' 'XTERM_LOCALE=en_US.UTF-8' 'CDC_JOINED_SITE=MSFCPrivate' 'LESSOPEN=||/usr/bin/lesspipe.sh %s' 'BASH_FUNC_which%%=() { ( alias; eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@" }' 'BASH_FUNC_module%%=() { _module_raw "$@" 2>&1 }' 'BASH_FUNC__module_raw%%=() { unset _mlshdbg; if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then case "$-" in *v*x*) set +vx; _mlshdbg='vx' ;; *v*) set +v; _mlshdbg='v' ;; *x*) set +x; _mlshdbg='x' ;; *) _mlshdbg='' ;; esac; fi; unset _mlre _mlIFS; if [ -n "${IFS+x}" ]; then _mlIFS=$IFS; fi; IFS=' '; for _mlv in ${MODULES_RUN_QUARANTINE:-}; do if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' "; fi; _mlrv="MODULES_RUNENV_${_mlv}"; _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' "; fi; done; if [ -n "${_mlre:-}" ]; then eval `eval ${_mlre} /usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash '"$@"'`; else eval `/usr/bin/tclsh /usr/share/Modules/libexec/modulecmd.tcl bash "$@"`; fi; _mlstatus=$?; if [ -n "${_mlIFS+x}" ]; then IFS=$_mlIFS; else unset IFS; fi; unset _mlre _mlv _mlrv _mlIFS; if [ -n "${_mlshdbg:-}" ]; then set -$_mlshdbg; fi; unset _mlshdbg; return $_mlstatus }' 'BASH_FUNC_switchml%%=() { typeset swfound=1; if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then typeset swname='main'; if [ -e /usr/share/Modules/libexec/modulecmd.tcl ]; then typeset swfound=0; unset MODULES_USE_COMPAT_VERSION; fi; else typeset swname='compatibility'; if [ -e /usr/share/Modules/libexec/modulecmd-compat ]; then typeset swfound=0; MODULES_USE_COMPAT_VERSION=1; export MODULES_USE_COMPAT_VERSION; fi; fi; if [ $swfound -eq 0 ]; then echo "Switching to Modules $swname version"; source /usr/share/Modules/init/bash; else echo "Cannot switch to Modules $swname version, command not found"; return 1; fi }' 'BASH_FUNC_ml%%=() { module ml "$@" }' '_=/opt/mpich/bin/mpiexec' 'ZES_ENABLE_SYSMAN=1' --global-user-env 0 --global-system-env 1 'GFORTRAN_UNBUFFERED_PRECONNECTED=y' --proxy-core-count 1 --exec --exec-appnum 0 --exec-proc-count 1 --exec-local-env 1 'PMI_SPAWNED=1' --exec-wdir /home/kmccall/Needles2 --exec-args 28 NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 21 [mpiexec@n001.cluster.pssclabs.com] Launch arguments: /usr/bin/srun --nodelist n002 -N 1 -n 1 --input none /opt/mpich/bin/hydra_pmi_proxy --control-port n001.cluster.pssclabs.com:40503 --debug --rmk user --launcher slurm --demux poll --pgid 46 --retries 10 --usize -2 --pmi-port 0 --gpus-per-proc -2 --proxy-id -1 [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=spawn-response;rc=0;jobid=kvs_36231_46_204239162_n001.cluster.pssclabs.com;nerrs=0; PMI sending: 557 cmd=spawn;ncmds=1;preputcount=1;ppkey0=PARENT_ROOT_PORT_NAME;ppval0=tag#0$description#n001$port#59293$ifname#172.16.56.1$;subcmd=NeedlesMpiWork;maxprocs=1;argc=27;argv0=NeedlesMpiWork;argv1=1643395222;argv2=36266;argv3=-job_id;argv4=065;argv5=-n_managers;argv6=2;argv7=-works_per_man;argv8=22;argv9=-n_vars_pm;argv10=61;argv11=-n_seeds3;argv12=2003;argv13=-n_ndxs;argv14=1;argv15=-dispersions_ndx;argv16=0;argv17=-features_ndx;argv18=4;argv19=-metrics_ndx;argv20=54;argv21=-metric;argv22=59;argv23=-manager_rank;argv24=0;argv25=-worker_rank;argv26=21;ÿ [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response spawn-response; forwarding downstream OPENED SHARED MEMORY 36827 PMI received (cmdlen 87): cmd=spawn-response;rc=0;jobid=kvs_36231_46_204239162_n001.cluster.pssclabs.com;nerrs=0; [proxy:45:0@n001.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.45 pmirank=0 threaded=FALSE [proxy:45:0@n001.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:45:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:45:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_45_1174794_n001.cluster.pssclabs.com;rc=0; [proxy:45:0@n001.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:45:0@n001.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_45_1174794_n001.cluster.pssclabs.com;rc=0; [proxy:45:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n001$port#44083$ifname#172.16.56.1$ [proxy:45:0@n001.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:45:0@n001.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:45:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:45:0@n001.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:45:0@n001.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:45:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_45_1174794_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [proxy:45:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:45:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:45:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_45_1174794_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:45:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:45:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:46:0@n002.cluster.pssclabs.com] got pmi command (from 4): init pmi_version=2 pmi_subversion=0 [proxy:46:0@n002.cluster.pssclabs.com] PMI response: cmd=response_to_init pmi_version=2 pmi_subversion=0 rc=0 PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_46_204239162_n001.cluster.pssclabs.com;rc=0; PMI sending: 20 cmd=job-getid; PMI received (cmdlen 83): cmd=job-getid-response;jobid=kvs_36231_46_204239162_n001.cluster.pssclabs.com;rc=0; [mpiexec@n001.cluster.pssclabs.com] [pgid: 46] got PMI command: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#50139$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 192 pid 4: cmd=kvs-put-response;rc=0; PMI sending: 92 cmd=kvs-put;key=P0-businesscard;value=description#n002$port#50139$ifname#172.16.56.2$; [mpiexec@n001.cluster.pssclabs.com] [pgid: 46] got PMI command: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 192 pid 4: cmd=kvs-fence-response;rc=0; PMI received (cmdlen 26): cmd=kvs-put-response;rc=0; PMI sending: 20 cmd=kvs-fence; [mpiexec@n001.cluster.pssclabs.com] [pgid: 46] got PMI command: 108 cmd=kvs-get;jobid=kvs_36231_46_204239162_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 192 pid 4: cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI received (cmdlen 28): cmd=kvs-fence-response;rc=0; PMI sending: 108 cmd=kvs-get;jobid=kvs_36231_46_204239162_n001.cluster.pssclabs.com;srcid=-1;key=PARENT_ROOT_PORT_NAME; [proxy:46:0@n002.cluster.pssclabs.com] got pmi command (from 4): fullinit pmijobid=37065.46 pmirank=0 threaded=FALSE [proxy:46:0@n002.cluster.pssclabs.com] PMI response: cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank=0;size=1;appnum=0;spawner-jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;debugged=TRUE;pmiverbose=TRUE;rc=0; [proxy:46:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:46:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_46_204239162_n001.cluster.pssclabs.com;rc=0; [proxy:46:0@n002.cluster.pssclabs.com] got pmi command (from 4): job-getid [proxy:46:0@n002.cluster.pssclabs.com] PMI response: cmd=job-getid-response;jobid=kvs_36231_46_204239162_n001.cluster.pssclabs.com;rc=0; [proxy:46:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-put key=P0-businesscard value=description#n002$port#50139$ifname#172.16.56.2$ [proxy:46:0@n002.cluster.pssclabs.com] we don't understand this command kvs-put; forwarding upstream [proxy:46:0@n002.cluster.pssclabs.com] we don't understand the response kvs-put-response; forwarding downstream [proxy:46:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-fence [proxy:46:0@n002.cluster.pssclabs.com] we don't understand this command kvs-fence; forwarding upstream [proxy:46:0@n002.cluster.pssclabs.com] we don't understand the response kvs-fence-response; forwarding downstream [proxy:46:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_46_204239162_n001.cluster.pssclabs.com srcid=-1 key=PARENT_ROOT_PORT_NAME [mpiexec@n001.cluster.pssclabs.com] [pgid: 46] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_46_204239162_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 192 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n002$port#50139$ifname#172.16.56.2$;rc=0; [proxy:46:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream [proxy:46:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream [proxy:46:0@n002.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_46_204239162_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:46:0@n002.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI received (cmdlen 97): cmd=kvs-get-response;found=TRUE;value=tag#0$description#n001$port#59293$ifname#172.16.56.1$;rc=0; PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_46_204239162_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] [pgid: 1] got PMI command: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [mpiexec@n001.cluster.pssclabs.com] PMI response to fd 12 pid 4: cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; [proxy:1:0@n001.cluster.pssclabs.com] got pmi command (from 4): kvs-get jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com srcid=-1 key=P0-businesscard [proxy:1:0@n001.cluster.pssclabs.com] we don't understand this command kvs-get; forwarding upstream PMI sending: 102 cmd=kvs-get;jobid=kvs_36231_1_2068064012_n001.cluster.pssclabs.com;srcid=-1;key=P0-businesscard; [proxy:1:0@n001.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n001$port#59293$ifname#172.16.56.1$;rc=0; MANAGER 36266 n001.cluster.pssclabs.com 0 t = 9.66216: spawned new worker 22 of 22 sub_id = 21 PMI received (cmdlen 91): cmd=kvs-get-response;found=TRUE;value=description#n002$port#50139$ifname#172.16.56.2$;rc=0; ARGS passed to NeedlesMpiWorker (pid = 340606) NeedlesMpiWork NeedlesMpiWork 1643395222 36266 -job_id 065 -n_managers 2 -works_per_man 22 -n_vars_pm 61 -n_seeds3 2003 -n_ndxs 1 -dispersions_ndx 0 -features_ndx 4 -metrics_ndx 54 -metric 59 -manager_rank 0 -worker_rank 21 HERE worker pid =340606 manager_pid = 36266 OPENED SHARED MEMORY 340606 WORKER 340606 n002.cluster.pssclabs.com 0 t = 9.66385: suicide due to failed manager: pid = 36266 [proxy:46:0@n002.cluster.pssclabs.com] we don't understand the response kvs-get-response; forwarding downstream WORKER 36347 n001.cluster.pssclabs.com 0 t = 10.0008: suicide due to failed manager: pid = 340368 WORKER 36323 n001.cluster.pssclabs.com 0 t = 10.0009: suicide due to failed manager: pid = 340368 WORKER 36395 n001.cluster.pssclabs.com 0 t = 11.0008: suicide due to failed manager: pid = 340368 WORKER 36371 n001.cluster.pssclabs.com 0 t = 11.0009: suicide due to failed manager: pid = 340368 WORKER 340586 n002.cluster.pssclabs.com 0 t = 13.0015: suicide due to failed manager: pid = 36266 WORKER 340576 n002.cluster.pssclabs.com 0 t = 13.0015: suicide due to failed manager: pid = 36266 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5034: received worker PID = 340386 sub_id = 0 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5035: received worker PID = 340396 sub_id = 2 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5035: received worker PID = 36347 sub_id = 3 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5035: received worker PID = 340406 sub_id = 4 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5036: received worker PID = 340416 sub_id = 6 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5036: received worker PID = 36395 sub_id = 7 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5036: received worker PID = 340426 sub_id = 8 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5036: received worker PID = 340436 sub_id = 9 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5037: received worker PID = 340446 sub_id = 10 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5037: received worker PID = 340456 sub_id = 11 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5037: received worker PID = 340466 sub_id = 12 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5037: received worker PID = 340476 sub_id = 13 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5037: received worker PID = 340486 sub_id = 14 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5038: received worker PID = 340496 sub_id = 15 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5038: received worker PID = 340506 sub_id = 16 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5038: received worker PID = 340516 sub_id = 17 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5038: received worker PID = 340526 sub_id = 18 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5039: received worker PID = 340536 sub_id = 19 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5039: received worker PID = 340546 sub_id = 20 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5039: received worker PID = 340556 sub_id = 21 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 13.5039: received worker PID = 340566 sub_id = 22 WORKER 340596 n002.cluster.pssclabs.com 0 t = 14.0015: suicide due to failed manager: pid = 36266 WORKER 340606 n002.cluster.pssclabs.com 0 t = 14.0014: suicide due to failed manager: pid = 36266 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6757: received worker PID = 36419 sub_id = 0 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6758: received worker PID = 36443 sub_id = 1 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6758: received worker PID = 36467 sub_id = 2 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6758: received worker PID = 36491 sub_id = 3 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6758: received worker PID = 36515 sub_id = 4 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6759: received worker PID = 36539 sub_id = 5 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6759: received worker PID = 36563 sub_id = 6 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6759: received worker PID = 36587 sub_id = 7 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6759: received worker PID = 36611 sub_id = 8 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6759: received worker PID = 36635 sub_id = 9 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.676: received worker PID = 36659 sub_id = 10 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.676: received worker PID = 36683 sub_id = 11 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.676: received worker PID = 36707 sub_id = 12 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.676: received worker PID = 36731 sub_id = 13 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6761: received worker PID = 36755 sub_id = 14 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6761: received worker PID = 340576 sub_id = 15 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6761: received worker PID = 36779 sub_id = 16 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6761: received worker PID = 340586 sub_id = 17 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6762: received worker PID = 36803 sub_id = 18 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6762: received worker PID = 340596 sub_id = 19 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6762: received worker PID = 36827 sub_id = 20 MANAGER 36266 n001.cluster.pssclabs.com 0 t = 14.6762: received worker PID = 340606 sub_id = 21 WORKER 36347 n001.cluster.pssclabs.com 0 t = 15.0007: suicide due to failed manager: pid = 340368 worker threw MpiRuntimeError: runtime error: Request pending due to failure, error stack: internal_Cancel(80): MPI_Cancel(request=0x11d67ec) failed internal_Cancel(44): Null MPI_Request WORKER 36347 calling MPI_Finalize WORKER 340386 n002.cluster.pssclabs.com 0 t = WORKER 340396 n002.cluster.pssclabs.com 0 t = 15.0013: BUILD START (5) 15.0013: BUILD START (3) WORKER 36323 n001.cluster.pssclabs.com 0 t = 15.0007: suicide due to failed manager: pid = 340368 worker threw MpiRuntimeError: runtime error: Request pending due to failure, error stack: internal_Cancel(80): MPI_Cancel(request=0xaf37ec) failed internal_Cancel(44): Null MPI_Request WORKER 36323 calling MPI_Finalize WORKER 340386 n002.cluster.pssclabs.com 0 t = 15.0018: BUILD STOP (3) WORKER 340396 n002.cluster.pssclabs.com 0 t = 15.0018: BUILD STOP (5) WORKER 36371 n001.cluster.pssclabs.com 0 t = 16.0007: suicide due to failed manager: pid = 340368 WORKER 36395 n001.cluster.pssclabs.com 0 t = 16.0007: suicide due to failed manager: pid = 340368 WORKER 36467 n001.cluster.pssclabs.comWORKER 36419 n001.cluster.pssclabs.com 0 t = 16.0007: BUILD START (0) WORKER 36443 n001.cluster.pssclabs.com 0 t = 16.0007: BUILD START (1) 0 t = 16.0007: BUILD START (2) worker threw MpiRuntimeError: runtime error: Request pending due to failure, error stack: internal_Cancel(80): MPI_Cancel(request=0x14697ec) failed internal_Cancel(44): Null MPI_Request worker threw MpiRuntimeError: runtime error: Request pending due to failure, error stack: internal_Cancel(80): MPI_Cancel(request=0x24467ec) failed internal_Cancel(44): Null MPI_Request WORKER 36395 calling MPI_Finalize WORKER 36371 calling MPI_Finalize WORKER 36443 n001.cluster.pssclabs.com 0 t = 16.0014: BUILD STOP (1) WORKER 36419 n001.cluster.pssclabs.com 0 t = 16.0015: BUILD STOP (0) WORKER 36467 n001.cluster.pssclabs.com 0 t = 16.0015: BUILD STOP (2) WORKER 340576 n002.cluster.pssclabs.com 0 t = 18.0013: suicide due to failed manager: pid = 36266 worker threw MpiRuntimeError: runtime error: Request pending due to failure, error stack: internal_Cancel(80): MPI_Cancel(request=0x1d917ec) failed internal_Cancel(44): Null MPI_Request WORKER 340576 calling MPI_Finalize WORKER 340586 n002.cluster.pssclabs.com 0 t = 18.0013: suicide due to failed manager: pid = 36266 worker threw MpiRuntimeError: runtime error: Request pending due to failure, error stack: internal_Cancel(80): MPI_Cancel(request=0x1c1b7ec) failed internal_Cancel(44): Null MPI_Request WORKER 340586 calling MPI_Finalize MANAGER 340368 n002.cluster.pssclabs.com 1 t = 18.5043: received worker PID = 36323 sub_id = 1 MANAGER 340368 n002.cluster.pssclabs.com 1 t = 18.5044: received worker PID = 36371 sub_id = 5 WORKER 340596 n002.cluster.pssclabs.com 0 t = 19.0013: suicide due to failed manager: pid = 36266 worker threw MpiRuntimeError: runtime error: Request pending due to failure, error stack: internal_Cancel(80): MPI_Cancel(request=0x14dd7ec) failed internal_Cancel(44): Null MPI_Request WORKER 340596 calling MPI_Finalize WORKER 340606 n002.cluster.pssclabs.com 0 t = 19.0013: suicide due to failed manager: pid = 36266 worker threw MpiRuntimeError: runtime error: Request pending due to failure, error stack: internal_Cancel(80): MPI_Cancel(request=0x8b77ec) failed internal_Cancel(44): Null MPI_Request WORKER 340606 calling MPI_Finalize MANAGER 340368 n002.cluster.pssclabs.com 1 t = 28.5067: IntercomList::handleFailedIntercom: sub_id = 1 disconnecting failed intercom MANAGER 340368 n002.cluster.pssclabs.com 1 t = 28.5069: manager failed message MGR_EXCEPTION_ sent to master: manager exited with the following error: runtime error: Invalid communicator, error stack: internal_Comm_free(87): MPI_Comm_free(comm=0x139c7d8) failed internal_Comm_free(44): Null communicator MANAGER 36266 n001.cluster.pssclabs.com 0 t = 29.6792: IntercomList::handleFailedIntercom: sub_id = 15 disconnecting failed intercom MANAGER 36266 n001.cluster.pssclabs.com 0 t = 29.6793: manager failed message MGR_EXCEPTION_ sent to master: manager exited with the following error: runtime error: Invalid communicator, error stack: internal_Comm_free(87): MPI_Comm_free(comm=0xd7a4a8) failed internal_Comm_free(44): Null communicator srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: Job step aborted: Waiting up to 32 seconds for job step to finish. slurmstepd: error: *** STEP 37065.3 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.0 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.27 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.41 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.39 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.17 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.25 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.1 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.9 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.11 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.31 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.43 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.45 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** JOB 37065 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.13 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.37 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.19 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.21 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.35 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.7 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.15 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.23 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.29 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.33 ON n001 CANCELLED AT 2022-01-28T12:40:53 *** slurmstepd: error: *** STEP 37065.5 ON n001 CANCELLED AT 2022-01-28T12:40:53 ***