[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2-174-g410add1

Service Account noreply at mpich.org
Fri Feb 5 16:24:12 CST 2016


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  410add1042cbd467288c83ffec437c549c93cea3 (commit)
       via  aec9468d5d52d8e780522ce78b4692104c4d8e60 (commit)
       via  7f4206c121b5bb251de90da07161fa414046ffc0 (commit)
      from  14c326b77674d7215fc32d809a21048ebfcded6f (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/410add1042cbd467288c83ffec437c549c93cea3

commit 410add1042cbd467288c83ffec437c549c93cea3
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Thu Feb 4 09:51:18 2016 -0600

    test/mpi/spawn: remove xfail for spawn-rootargs
    
    Closes #2282
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/test/mpi/spawn/testlist.in b/test/mpi/spawn/testlist.in
index e497105..586322d 100644
--- a/test/mpi/spawn/testlist.in
+++ b/test/mpi/spawn/testlist.in
@@ -26,4 +26,4 @@ disconnect3 3
 concurrent_spawns 1
 pgroup_connect_test 4
 pgroup_intercomm_test 4
-spawn-rootargs 10 xfail=ticket2282
+spawn-rootargs 10

http://git.mpich.org/mpich.git/commitdiff/aec9468d5d52d8e780522ce78b4692104c4d8e60

commit aec9468d5d52d8e780522ce78b4692104c4d8e60
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Tue Feb 2 18:14:52 2016 -0600

    test/mpi/spawn: fixup spawn-rootargs test
    
    MPI_Comm_spawn should launch new instances of "spawn-rootargs" for this
    test to succeed. There is no "spawn-nullargs" binary.
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/test/mpi/spawn/spawn-rootargs.c b/test/mpi/spawn/spawn-rootargs.c
index a6a135a..1657d3e 100644
--- a/test/mpi/spawn/spawn-rootargs.c
+++ b/test/mpi/spawn/spawn-rootargs.c
@@ -26,7 +26,7 @@ int main(int argc, char *argv[])
 
     if (master == MPI_COMM_NULL) {
         MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-        MPI_Comm_spawn("./spawn-nullargs", args,        /*MPI_ARGV_NULL, */
+        MPI_Comm_spawn("./spawn-rootargs", args,        /*MPI_ARGV_NULL, */
                        5, MPI_INFO_NULL, 0, MPI_COMM_SELF, &worker, MPI_ERRCODES_IGNORE);
         MPI_Barrier(worker);
         MPI_Comm_disconnect(&worker);

http://git.mpich.org/mpich.git/commitdiff/7f4206c121b5bb251de90da07161fa414046ffc0

commit 7f4206c121b5bb251de90da07161fa414046ffc0
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Tue Feb 2 18:08:58 2016 -0600

    pm/hydra: fix pmi mcmd interleaving problem
    
    Incomplete PMI-1 mcmds suffer from an interleaving problem when there
    are multiple commands written to the PMI proxy server from different procs
    at the same time. This commit forces the server to block until the full mcmd
    is read and processed. Refs #2282
    
    Algo:
      - If we do not have a full command, read more from the fd.
      - If pmi_storage has multiple commands, process all of them, even if
        that means reading more from the fd.
      - else break;
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/src/pm/hydra/pm/pmiserv/pmip_cb.c b/src/pm/hydra/pm/pmiserv/pmip_cb.c
index 1866f29..316c01c 100644
--- a/src/pm/hydra/pm/pmiserv/pmip_cb.c
+++ b/src/pm/hydra/pm/pmiserv/pmip_cb.c
@@ -214,15 +214,6 @@ static HYD_status pmi_cb(int fd, HYD_event_t events, void *userp)
 
     HYD_pmcd_init_header(&hdr);
 
-    /* PMI-1 does not tell us how much to read. We read how much ever
-     * we can, parse out full PMI commands from it, and process
-     * them. When we don't have a full PMI command, we store the
-     * rest. */
-    status =
-        HYDU_sock_read(fd, pmi_storage + pmi_storage_len, HYD_TMPBUF_SIZE - pmi_storage_len,
-                       &linelen, &closed, HYDU_SOCK_COMM_NONE);
-    HYDU_ERR_POP(status, "unable to read PMI command\n");
-
     /* Try to find the PMI FD */
     for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) {
         if (HYD_pmcd_pmip.downstream.pmi_fd[i] == fd) {
@@ -231,6 +222,17 @@ static HYD_status pmi_cb(int fd, HYD_event_t events, void *userp)
         }
     }
 
+ read_cmd:
+    /* PMI-1 does not tell us how much to read. We read how much ever
+     * we can, parse out full PMI commands from it, and process
+     * them. When we don't have a full PMI command, we go back and
+     * read from the same FD until we do. PMI clients (1 and 2) always
+     * send full commands, then wait for response. */
+    status =
+        HYDU_sock_read(fd, pmi_storage + pmi_storage_len, HYD_TMPBUF_SIZE - pmi_storage_len,
+                       &linelen, &closed, HYDU_SOCK_COMM_NONE);
+    HYDU_ERR_POP(status, "unable to read PMI command\n");
+
     if (closed) {
         /* If a PMI application terminates, we clean up the remaining
          * processes. For a correct PMI application, we should never
@@ -282,6 +284,14 @@ static HYD_status pmi_cb(int fd, HYD_event_t events, void *userp)
         pmi_storage[pmi_storage_len] = 0;
     }
 
+ check_cmd:
+    status = check_pmi_cmd(&buf, &hdr.pmi_version, &repeat);
+    HYDU_ERR_POP(status, "error checking the PMI command\n");
+
+    if (buf == NULL)
+        /* read more to get a full command. */
+        goto read_cmd;
+
     /* We were able to read the PMI command correctly. If we were able
      * to identify what PMI FD this is, activate it. If we were not
      * able to identify the PMI FD, we will activate it when we get
@@ -289,62 +299,57 @@ static HYD_status pmi_cb(int fd, HYD_event_t events, void *userp)
     if (pid != -1 && !HYD_pmcd_pmip.downstream.pmi_fd_active[pid])
         HYD_pmcd_pmip.downstream.pmi_fd_active[pid] = 1;
 
-    do {
-        status = check_pmi_cmd(&buf, &hdr.pmi_version, &repeat);
-        HYDU_ERR_POP(status, "error checking the PMI command\n");
-
-        if (buf == NULL)
-            break;
-
-        if (hdr.pmi_version == 1)
-            HYD_pmcd_pmip_pmi_handle = HYD_pmcd_pmip_pmi_v1;
-        else
-            HYD_pmcd_pmip_pmi_handle = HYD_pmcd_pmip_pmi_v2;
+    if (hdr.pmi_version == 1)
+        HYD_pmcd_pmip_pmi_handle = HYD_pmcd_pmip_pmi_v1;
+    else
+        HYD_pmcd_pmip_pmi_handle = HYD_pmcd_pmip_pmi_v2;
 
-        HYDU_MALLOC(args, char **, MAX_PMI_ARGS * sizeof(char *), status);
-        for(i = 0;i < MAX_PMI_ARGS; i++)
-            args[i]= NULL;
+    HYDU_MALLOC(args, char **, MAX_PMI_ARGS * sizeof(char *), status);
+    for(i = 0;i < MAX_PMI_ARGS; i++)
+        args[i]= NULL;
 
-        status = HYD_pmcd_pmi_parse_pmi_cmd(buf, hdr.pmi_version, &pmi_cmd, args);
-        HYDU_ERR_POP(status, "unable to parse PMI command\n");
+    status = HYD_pmcd_pmi_parse_pmi_cmd(buf, hdr.pmi_version, &pmi_cmd, args);
+    HYDU_ERR_POP(status, "unable to parse PMI command\n");
 
-        if (HYD_pmcd_pmip.user_global.debug) {
-            HYDU_dump(stdout, "got pmi command (from %d): %s\n", fd, pmi_cmd);
-            HYDU_print_strlist(args);
-        }
+    if (HYD_pmcd_pmip.user_global.debug) {
+        HYDU_dump(stdout, "got pmi command (from %d): %s\n", fd, pmi_cmd);
+        HYDU_print_strlist(args);
+    }
 
-        h = HYD_pmcd_pmip_pmi_handle;
-        while (h->handler) {
-            if (!strcmp(pmi_cmd, h->cmd)) {
-                status = h->handler(fd, args);
-                HYDU_ERR_POP(status, "PMI handler returned error\n");
-                goto fn_exit;
-            }
-            h++;
+    h = HYD_pmcd_pmip_pmi_handle;
+    while (h->handler) {
+        if (!strcmp(pmi_cmd, h->cmd)) {
+            status = h->handler(fd, args);
+            HYDU_ERR_POP(status, "PMI handler returned error\n");
+            goto fn_exit;
         }
+        h++;
+    }
 
-        if (HYD_pmcd_pmip.user_global.debug) {
-            HYDU_dump(stdout, "we don't understand this command %s; forwarding upstream\n",
-                      pmi_cmd);
-        }
+    if (HYD_pmcd_pmip.user_global.debug) {
+        HYDU_dump(stdout, "we don't understand this command %s; forwarding upstream\n",
+                  pmi_cmd);
+    }
 
-        /* We don't understand the command; forward it upstream */
-        hdr.cmd = PMI_CMD;
-        hdr.pid = fd;
-        hdr.buflen = strlen(buf);
-        status =
-            HYDU_sock_write(HYD_pmcd_pmip.upstream.control, &hdr, sizeof(hdr), &sent, &closed,
-                            HYDU_SOCK_COMM_MSGWAIT);
-        HYDU_ERR_POP(status, "unable to send PMI header upstream\n");
-        HYDU_ASSERT(!closed, status);
+    /* We don't understand the command; forward it upstream */
+    hdr.cmd = PMI_CMD;
+    hdr.pid = fd;
+    hdr.buflen = strlen(buf);
+    status =
+        HYDU_sock_write(HYD_pmcd_pmip.upstream.control, &hdr, sizeof(hdr), &sent, &closed,
+                        HYDU_SOCK_COMM_MSGWAIT);
+    HYDU_ERR_POP(status, "unable to send PMI header upstream\n");
+    HYDU_ASSERT(!closed, status);
 
-        status =
-            HYDU_sock_write(HYD_pmcd_pmip.upstream.control, buf, hdr.buflen, &sent, &closed,
-                            HYDU_SOCK_COMM_MSGWAIT);
-        HYDU_ERR_POP(status, "unable to send PMI command upstream\n");
-        HYDU_ASSERT(!closed, status);
+    status =
+        HYDU_sock_write(HYD_pmcd_pmip.upstream.control, buf, hdr.buflen, &sent, &closed,
+                        HYDU_SOCK_COMM_MSGWAIT);
+    HYDU_ERR_POP(status, "unable to send PMI command upstream\n");
+    HYDU_ASSERT(!closed, status);
 
-    } while (repeat);
+    if (repeat)
+        /* there are more commands to process. */
+        goto check_cmd;
 
   fn_exit:
     if (pmi_cmd)

-----------------------------------------------------------------------

Summary of changes:
 src/pm/hydra/pm/pmiserv/pmip_cb.c |  119 +++++++++++++++++++------------------
 test/mpi/spawn/spawn-rootargs.c   |    2 +-
 test/mpi/spawn/testlist.in        |    2 +-
 3 files changed, 64 insertions(+), 59 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list