<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=Windows-1252">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<style><!--
/* Font Definitions */
@font-face
        {font-family:Wingdings;
        panose-1:5 0 0 0 0 0 0 0 0 0;}
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0in;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;}
p.MsoListParagraph, li.MsoListParagraph, div.MsoListParagraph
        {mso-style-priority:34;
        margin-top:0in;
        margin-right:0in;
        margin-bottom:0in;
        margin-left:.5in;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;}
span.EmailStyle19
        {mso-style-type:personal-reply;
        font-family:"Calibri",sans-serif;
        color:windowtext;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;}
@page WordSection1
        {size:8.5in 11.0in;
        margin:1.0in 1.0in 1.0in 1.0in;}
div.WordSection1
        {page:WordSection1;}
/* List Definitions */
@list l0
        {mso-list-id:1277103497;
        mso-list-type:hybrid;
        mso-list-template-ids:-1453934610 253940242 67698691 67698693 67698689 67698691 67698693 67698689 67698691 67698693;}
@list l0:level1
        {mso-level-start-at:0;
        mso-level-number-format:bullet;
        mso-level-text:\F0D8 ;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;
        mso-fareast-font-family:Calibri;
        mso-bidi-font-family:"Times New Roman";}
@list l0:level2
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l0:level3
        {mso-level-number-format:bullet;
        mso-level-text:\F0A7 ;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;}
@list l0:level4
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7 ;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Symbol;}
@list l0:level5
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l0:level6
        {mso-level-number-format:bullet;
        mso-level-text:\F0A7 ;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;}
@list l0:level7
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7 ;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Symbol;}
@list l0:level8
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l0:level9
        {mso-level-number-format:bullet;
        mso-level-text:\F0A7 ;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;}
@list l1
        {mso-list-id:1511137873;
        mso-list-type:hybrid;
        mso-list-template-ids:-1715033584 1632531590 67698691 67698693 67698689 67698691 67698693 67698689 67698691 67698693;}
@list l1:level1
        {mso-level-start-at:0;
        mso-level-number-format:bullet;
        mso-level-text:\F0D8 ;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;
        mso-fareast-font-family:Calibri;
        mso-bidi-font-family:"Times New Roman";}
@list l1:level2
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l1:level3
        {mso-level-number-format:bullet;
        mso-level-text:\F0A7 ;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;}
@list l1:level4
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7 ;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Symbol;}
@list l1:level5
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l1:level6
        {mso-level-number-format:bullet;
        mso-level-text:\F0A7 ;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;}
@list l1:level7
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7 ;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Symbol;}
@list l1:level8
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l1:level9
        {mso-level-number-format:bullet;
        mso-level-text:\F0A7 ;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;}
ol
        {margin-bottom:0in;}
ul
        {margin-bottom:0in;}
--></style>
</head>
<body lang="EN-US" link="#0563C1" vlink="#954F72" style="word-wrap:break-word">
<div class="WordSection1">
<p class="MsoNormal">Hi Bruce,<o:p></o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<ul style="margin-top:0in" type="disc">
<li class="MsoListParagraph" style="margin-left:0in;mso-list:l1 level1 lfo2">srun: error: node003: task 1: Exited with exit code 7<o:p></o:p></li></ul>
<p class="MsoListParagraph"><o:p> </o:p></p>
<p class="MsoNormal">Looks like one of the process crashed unexpectedly.<o:p></o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<div>
<div>
<div>
<p class="MsoNormal">-- <br>
Hui Zhou<o:p></o:p></p>
</div>
</div>
</div>
<p class="MsoNormal"><o:p> </o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in">
<p class="MsoNormal" style="mso-margin-top-alt:0in;margin-right:0in;margin-bottom:12.0pt;margin-left:.5in">
<b><span style="font-size:12.0pt;color:black">From: </span></b><span style="font-size:12.0pt;color:black">Palmer, Bruce J via discuss <discuss@mpich.org><br>
<b>Date: </b>Tuesday, September 27, 2022 at 3:32 PM<br>
<b>To: </b>discuss@mpich.org <discuss@mpich.org><br>
<b>Cc: </b>Palmer, Bruce J <Bruce.Palmer@pnnl.gov><br>
<b>Subject: </b>[mpich-discuss] Crash on MPI_Rput<o:p></o:p></span></p>
</div>
<p class="MsoNormal" style="margin-left:.5in">Hi,<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">I知 testing the MPI-RMA runtime in Global Arrays and I知 getting a lot more crashes than I致e seen in the past. The MPI-RMA runtime code is fairly stable and hasn稚 been modified much recently and all the tests
 used to pass using one of the more recent MPICH releases. However, I知 getting significant crashes at this point. One of them occurs in a program designed to test non-blocking communication. It creates an MPI window, using MPI_Alloc_mem followed by MPI_Win_create
 and then calls MPI_Win_lock_all on the window. The code currently crashes when it gets to an MPI_Rput call. I知 trying to see if there is something different in the environment that might be causing this.<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">I知 currently up to MPICH-4.0b1 configured with<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">./configure --prefix=/people/d3g293/mpich/mpich-4.0b1/install --with-device=ch4:ofi:sockets --with-libfabric=embedded --enable-threads=multiple --with-slurm CC=gcc CXX=g++<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">#./configure --prefix=/people/d3g293/mpich/mpich-3.4.1/install-newell-nocuda --with-device=ch4:ofi:sockets --with-libfabric=embedded --enable-threads=multiple --with-slurm CC=gcc CXX=g++<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">I致e tried other recent vintages of MPICH, but I get similar results. The error I知 seeing when the program crashes is<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">[proxy:0:1@node003.local] HYD_pmcd_pmip_control_cmd_cb (pm/pmiserv/pmip_cb.c:899): assert (!closed) failed<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">[proxy:0:1@node003.local] HYDT_dmxu_poll_wait_for_event (tools/demux/demux_poll.c:76): callback returned error status<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">srun: error: node003: task 1: Exited with exit code 7<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">[proxy:0:1@node003.local] main (pm/pmiserv/pmip.c:169): demux engine error waiting for event<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">[mpiexec@node002.local] HYDT_bscu_wait_for_completion (tools/bootstrap/utils/bscu_wait.c:74): one of the processes terminated badly; aborting<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">[mpiexec@node002.local] HYDT_bsci_wait_for_completion (tools/bootstrap/src/bsci_wait.c:21): launcher returned error waiting for completion<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">[mpiexec@node002.local] HYD_pmci_wait_for_completion (pm/pmiserv/pmiserv_pmci.c:179): launcher returned error waiting for completion<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">[mpiexec@node002.local] main (ui/mpich/mpiexec.c:325): process manager error waiting for completion<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">Any suggestions about what might be going wrong here? It could be a problem with the machine configuration, since this code seemed to be running fine a while ago and has not been modified since then. I値l try building
 the latest stable release and see if that fixes anything, but as I mentioned none of the recent releases seems to work.<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">Bruce Palmer<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">Computer Scientist<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">Pacific Northwest National Laboratory<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">(509) 375-3899<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
</div>
</body>
</html>