<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<style><!--
/* Font Definitions */
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:Aptos;
        panose-1:2 11 0 4 2 2 2 2 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0in;
        font-size:11.0pt;
        font-family:"Aptos",sans-serif;}
p.MsoListParagraph, li.MsoListParagraph, div.MsoListParagraph
        {mso-style-priority:34;
        margin-top:0in;
        margin-right:0in;
        margin-bottom:0in;
        margin-left:.5in;
        font-size:11.0pt;
        font-family:"Aptos",sans-serif;}
span.EmailStyle21
        {mso-style-type:personal-reply;
        font-family:"Aptos",sans-serif;
        color:windowtext;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;
        mso-ligatures:none;}
@page WordSection1
        {size:8.5in 11.0in;
        margin:1.0in 1.0in 1.0in 1.0in;}
div.WordSection1
        {page:WordSection1;}
/* List Definitions */
@list l0
        {mso-list-id:696394268;
        mso-list-template-ids:1860317596;}
@list l0:level1
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l0:level2
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:1.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l0:level3
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:1.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l0:level4
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:2.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l0:level5
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:2.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l0:level6
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:3.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l0:level7
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:3.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l0:level8
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:4.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l0:level9
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:4.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1
        {mso-list-id:894900904;
        mso-list-template-ids:-1684113116;}
@list l1:level1
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level2
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:1.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level3
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:1.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level4
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:2.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level5
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:2.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level6
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:3.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level7
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:3.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level8
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:4.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level9
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:4.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l2
        {mso-list-id:1492021504;
        mso-list-template-ids:-1444359598;}
@list l2:level1
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l2:level2
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:1.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l2:level3
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:1.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l2:level4
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:2.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l2:level5
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:2.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l2:level6
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:3.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l2:level7
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:3.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l2:level8
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:4.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l2:level9
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:4.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
ol
        {margin-bottom:0in;}
ul
        {margin-bottom:0in;}
--></style>
</head>
<body lang="EN-US" link="#467886" vlink="#96607D" style="word-wrap:break-word">
<div class="WordSection1">
<p class="MsoNormal">Can you add -v to your mpiexec command? It will enable verbose output and hopefully give some clues to where things are getting stuck.<o:p></o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<p class="MsoNormal">Ken<o:p></o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<div id="mail-editor-reference-message-container">
<div>
<div>
<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in">
<p class="MsoNormal" style="mso-margin-top-alt:0in;margin-right:0in;margin-bottom:12.0pt;margin-left:.5in">
<b><span style="font-size:12.0pt;color:black">From: </span></b><span style="font-size:12.0pt;color:black">Tanner L. Nelson via discuss <discuss@mpich.org><br>
<b>Date: </b>Monday, February 17, 2025 at 11:43</span><span style="font-size:12.0pt;font-family:"Arial",sans-serif;color:black"> </span><span style="font-size:12.0pt;color:black">AM<br>
<b>To: </b>discuss@mpich.org <discuss@mpich.org><br>
<b>Cc: </b>Tanner L. Nelson <Tanner.Nelson@inl.gov><br>
<b>Subject: </b>Re: [mpich-discuss] Only Run as root<o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal" style="margin-left:.5in;mso-line-height-alt:.75pt"><span style="font-size:1.0pt;color:white">Good morning, We have a small cluster in an isolated environment. Our lead has spearheaded a reinstall to get away from Bright Cluster Manager
 and CentOS. This is the first time our project has done anything like this, and we have hit a snag<o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal" style="margin-left:.5in;mso-line-height-alt:.75pt"><span style="font-size:1.0pt;color:white">ZjQcmQRYFpfptBannerStart<o:p></o:p></span></p>
</div>
<div style="border:none;border-top:solid #90A4AE 3.0pt;padding:0in 0in 0in 0in;display:block!important;text-align:left!important;margin:0px!important;padding:16px!important;border-radius:4px!important;min-width:200px!important;background-color:#D0D8DC!important;border-top:#90a4ae!important" id="pfptBannerwnlsc2e">
<div id="pfptBannerwnlsc2e">
<div id="pfptBannerwnlsc2e">
<p class="MsoNormal" style="margin-left:.5in;line-height:13.5pt;background:#D0D8DC">
<b><span style="font-size:12.0pt;font-family:"Arial",sans-serif;color:black">This Message Is From an External Sender
<o:p></o:p></span></b></p>
</div>
<div id="pfptBannerwnlsc2e">
<p class="MsoNormal" style="margin-left:.5in;line-height:13.5pt;background:#D0D8DC">
<span style="font-size:12.0pt;font-family:"Arial",sans-serif;color:black">This message came from outside your organization.
<o:p></o:p></span></p>
</div>
</div>
<div>
<p class="MsoNormal" style="margin-left:.5in;background:#D0D8DC"><span style="font-size:12.0pt;color:black"> </span><span style="font-size:12.0pt"><o:p></o:p></span></p>
</div>
</div>
<div>
<p class="MsoNormal" style="margin-left:.5in;mso-line-height-alt:.75pt"><span style="font-size:1.0pt;color:white">ZjQcmQRYFpfptBannerEnd<o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal" style="margin-left:.5in">Good morning, <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">We have a small cluster in an isolated environment. Our lead has spearheaded a reinstall to get away from Bright Cluster Manager and CentOS. This is the first time our project has done anything like this, and we
 have hit a snag that we believe is MPI-related. <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">As root:<o:p></o:p></p>
<p class="MsoListParagraph" style="margin-left:1.0in;text-indent:-.25in;mso-list:l2 level1 lfo1">
<![if !supportLists]><span style="font-size:10.0pt;font-family:Symbol"><span style="mso-list:Ignore">·<span style="font:7.0pt "Times New Roman"">        
</span></span></span><![endif]>Job runs as expected (verified with 2 nodes)<o:p></o:p></p>
<p class="MsoListParagraph" style="margin-left:1.0in;text-indent:-.25in;mso-list:l2 level1 lfo1">
<![if !supportLists]><span style="font-size:10.0pt;font-family:Symbol"><span style="mso-list:Ignore">·<span style="font:7.0pt "Times New Roman"">        
</span></span></span><![endif]>mpirun with host file, same result<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">As user account:<o:p></o:p></p>
<p class="MsoListParagraph" style="margin-left:1.0in;text-indent:-.25in;mso-list:l1 level1 lfo2">
<![if !supportLists]><span style="font-size:10.0pt;font-family:Symbol"><span style="mso-list:Ignore">·<span style="font:7.0pt "Times New Roman"">        
</span></span></span><![endif]>Single node works<o:p></o:p></p>
<p class="MsoListParagraph" style="margin-left:1.0in;text-indent:-.25in;mso-list:l1 level1 lfo2">
<![if !supportLists]><span style="font-size:10.0pt;font-family:Symbol"><span style="mso-list:Ignore">·<span style="font:7.0pt "Times New Roman"">        
</span></span></span><![endif]>2 nodes, job hangs with no error output that we can find<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">To us, and our colleagues at our main Computing Center, it seems related to permissions/authentication. But everything we know to check has been checked several times (i.e. user can ssh to nodes 001-002 where root
 was successful, user was not; permissions on mpi directory 755, even tried 777 just to rule it out).
<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">Details I believe may be asked: <o:p>
</o:p></p>
<p class="MsoListParagraph" style="margin-left:1.0in;text-indent:-.25in;mso-list:l0 level1 lfo3">
<![if !supportLists]><span style="font-size:10.0pt;font-family:Symbol"><span style="mso-list:Ignore">·<span style="font:7.0pt "Times New Roman"">        
</span></span></span><![endif]>RHEL9<o:p></o:p></p>
<p class="MsoListParagraph" style="margin-left:1.0in;text-indent:-.25in;mso-list:l0 level1 lfo3">
<![if !supportLists]><span style="font-size:10.0pt;font-family:Symbol"><span style="mso-list:Ignore">·<span style="font:7.0pt "Times New Roman"">        
</span></span></span><![endif]>Mpich4.2.3 was compiled with gcc11.5.0<o:p></o:p></p>
<p class="MsoListParagraph" style="margin-left:1.0in;text-indent:-.25in;mso-list:l0 level1 lfo3">
<![if !supportLists]><span style="font-size:10.0pt;font-family:Symbol"><span style="mso-list:Ignore">·<span style="font:7.0pt "Times New Roman"">        
</span></span></span><![endif]>Consistent results with scheduler and mpirun<o:p></o:p></p>
<p class="MsoListParagraph" style="margin-left:1.0in;text-indent:-.25in;mso-list:l0 level1 lfo3">
<![if !supportLists]><span style="font-size:10.0pt;font-family:Symbol"><span style="mso-list:Ignore">·<span style="font:7.0pt "Times New Roman"">        
</span></span></span><![endif]>Tested HPL and our primary application, both built against the same mpich version<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in">Hoping we can get some suggestions. Thanks in advance!<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in;line-height:15.0pt">Respectfully,<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in;line-height:15.0pt">Tanner Nelson<o:p></o:p></p>
<p class="MsoNormal" style="margin-left:.5in"> <o:p></o:p></p>
</div>
</div>
</div>
</div>
</div>
</body>
</html>