[mpich-commits] r10698 - in mpich2/trunk/src/pm/hydra: include pm/pmiserv utils/sock

balaji at mcs.anl.gov balaji at mcs.anl.gov
Fri Nov 30 01:48:57 CST 2012


Author: balaji
Date: 2012-11-30 01:48:57 -0600 (Fri, 30 Nov 2012)
New Revision: 10698

Modified:
   mpich2/trunk/src/pm/hydra/include/hydra.h
   mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c
   mpich2/trunk/src/pm/hydra/utils/sock/sock.c
Log:
Our stdin handling logic is not correct.  I don't see a good way to
fix this, unless we add some form of flow-control logic between the
proxy and mpiexec.  For the time being, at least throw an error when
we can't keep up and are giving up, instead of hanging.

No reviewer.

Modified: mpich2/trunk/src/pm/hydra/include/hydra.h
===================================================================
--- mpich2/trunk/src/pm/hydra/include/hydra.h	2012-11-30 07:48:54 UTC (rev 10697)
+++ mpich2/trunk/src/pm/hydra/include/hydra.h	2012-11-30 07:48:57 UTC (rev 10698)
@@ -535,6 +535,7 @@
                           enum HYDU_sock_comm_flag flag);
 HYD_status HYDU_sock_write(int fd, const void *buf, int maxlen, int *sent, int *closed,
                            enum HYDU_sock_comm_flag flag);
+HYD_status HYDU_sock_set_nonblock(int fd);
 HYD_status HYDU_sock_forward_stdio(int in, int out, int *closed);
 HYD_status HYDU_sock_get_iface_ip(char *iface, char **ip);
 HYD_status HYDU_sock_is_local(char *host, int *is_local);

Modified: mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c
===================================================================
--- mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c	2012-11-30 07:48:54 UTC (rev 10697)
+++ mpich2/trunk/src/pm/hydra/pm/pmiserv/pmip_cb.c	2012-11-30 07:48:57 UTC (rev 10698)
@@ -711,6 +711,9 @@
                                          process_id);
             HYDU_ERR_POP(status, "create process returned error\n");
 
+            status = HYDU_sock_set_nonblock(HYD_pmcd_pmip.downstream.in);
+            HYDU_ERR_POP(status, "unable to set stdin socket to non-blocking\n");
+
             HYDU_free_strlist(client_args);
 
             if (pmi_fds[1] != HYD_FD_UNSET) {
@@ -920,9 +923,6 @@
         int count;
 
         if (hdr.buflen) {
-            if (HYD_pmcd_pmip.downstream.in == HYD_FD_CLOSED)
-                goto fn_exit;
-
             HYDU_MALLOC(buf, char *, hdr.buflen, status);
             HYDU_ERR_POP(status, "unable to allocate memory\n");
 
@@ -931,10 +931,20 @@
             HYDU_ERR_POP(status, "unable to read from control socket\n");
             HYDU_ASSERT(!closed, status);
 
+            if (HYD_pmcd_pmip.downstream.in == HYD_FD_CLOSED) {
+                HYDU_FREE(buf);
+                goto fn_exit;
+            }
+
             status = HYDU_sock_write(HYD_pmcd_pmip.downstream.in, buf, hdr.buflen, &count,
-                                     &closed, HYDU_SOCK_COMM_MSGWAIT);
+                                     &closed, HYDU_SOCK_COMM_NONE);
             HYDU_ERR_POP(status, "unable to write to downstream stdin\n");
 
+            HYDU_ERR_CHKANDJUMP(status, count != hdr.buflen, HYD_INTERNAL_ERROR,
+                                "process reading stdin too slowly; can't keep up\n");
+
+            HYDU_ASSERT(count == hdr.buflen, status);
+
             if (HYD_pmcd_pmip.user_global.auto_cleanup) {
                 HYDU_ASSERT(!closed, status);
             }

Modified: mpich2/trunk/src/pm/hydra/utils/sock/sock.c
===================================================================
--- mpich2/trunk/src/pm/hydra/utils/sock/sock.c	2012-11-30 07:48:54 UTC (rev 10697)
+++ mpich2/trunk/src/pm/hydra/utils/sock/sock.c	2012-11-30 07:48:57 UTC (rev 10698)
@@ -306,7 +306,7 @@
     goto fn_exit;
 }
 
-static HYD_status set_nonblock(int fd)
+HYD_status HYDU_sock_set_nonblock(int fd)
 {
     int flags;
     HYD_status status = HYD_SUCCESS;
@@ -343,10 +343,10 @@
 
     (*fwd_hash)->next = NULL;
 
-    status = set_nonblock(in);
+    status = HYDU_sock_set_nonblock(in);
     HYDU_ERR_POP(status, "unable to set out-socket to non-blocking\n");
 
-    status = set_nonblock(out);
+    status = HYDU_sock_set_nonblock(out);
     HYDU_ERR_POP(status, "unable to set out-socket to non-blocking\n");
 
   fn_exit:



More information about the commits mailing list