[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.0.3-40-gfd583f4

mysql vizuser noreply at mpich.org
Mon Apr 22 13:28:16 CDT 2013


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  fd583f48d7e8b3009e359c35786a8fc0f9cc398c (commit)
      from  1c156acf01ea5ed49252c10d2995d48f3f34c09f (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/fd583f48d7e8b3009e359c35786a8fc0f9cc398c

commit fd583f48d7e8b3009e359c35786a8fc0f9cc398c
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Mon Apr 22 13:21:52 2013 -0500

    Bug fixes in the PMI keyval allgather code from [3ba0a7f].
    
    1. We had forgotten to distribute the pre-initialized keyvals in the
    spawn code to the new proxies.
    
    2. We used a global static for keeping track of the previously
    distributed keyvals, instead of a per-KVS-space static.
    
    Fixes tt#1814.
    
    No reviewer.

diff --git a/src/pm/hydra/pm/pmiserv/pmiserv_pmi.h b/src/pm/hydra/pm/pmiserv/pmiserv_pmi.h
index f939033..3f91267 100644
--- a/src/pm/hydra/pm/pmiserv/pmiserv_pmi.h
+++ b/src/pm/hydra/pm/pmiserv/pmiserv_pmi.h
@@ -37,6 +37,7 @@ struct HYD_pmcd_pmi_pg_scratch {
     int dead_process_count;
 
     struct HYD_pmcd_pmi_kvs *kvs;
+    int keyval_dist_count;  /* Number of keyvals distributed */
 };
 
 struct HYD_pmcd_pmi_publish {
diff --git a/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c b/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c
index dfbed96..cbc4c08 100644
--- a/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c
+++ b/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c
@@ -44,14 +44,13 @@ static HYD_status cmd_response(int fd, int pid, const char *cmd)
     goto fn_exit;
 }
 
-static HYD_status fn_barrier_in(int fd, int pid, int pgid, char *args[])
+static HYD_status bcast_keyvals(int fd, int pid)
 {
+    int keyval_count, arg_count, i, j;
+    char **tmp = NULL, *cmd;
+    struct HYD_pmcd_pmi_kvs_pair *run;
     struct HYD_proxy *proxy, *tproxy;
     struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
-    int proxy_count, keyval_count, i, j, arg_count;
-    struct HYD_pmcd_pmi_kvs_pair *run;
-    char **tmp = NULL, *cmd;
-    static int keyval_count_distributed = 0;
     HYD_status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
@@ -60,88 +59,108 @@ static HYD_status fn_barrier_in(int fd, int pid, int pgid, char *args[])
     HYDU_ASSERT(proxy, status);
     pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch;
 
-    proxy_count = 0;
-    for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next)
-        proxy_count++;
+    /* find the number of keyvals */
+    keyval_count = 0;
+    for (run = pg_scratch->kvs->key_pair; run; run = run->next)
+        keyval_count++;
 
-    proxy->pg->barrier_count++;
-    if (proxy->pg->barrier_count == proxy_count) {
-        proxy->pg->barrier_count = 0;
+    keyval_count -= pg_scratch->keyval_dist_count;
 
-        /* find the number of keyvals */
-        keyval_count = 0;
-        for (run = pg_scratch->kvs->key_pair; run; run = run->next)
-            keyval_count++;
+    /* Each keyval has the following four items: 'key' '=' 'val'
+     * '<space>'.  Two additional items for the command at the start
+     * and the NULL at the end. */
+    HYDU_MALLOC(tmp, char **, (4 * keyval_count + 3) * sizeof(char *), status);
 
-        keyval_count -= keyval_count_distributed;
+    /* send all available keyvals downstream */
+    if (keyval_count) {
+        arg_count = 1;
+        i = 0;
+        tmp[i++] = HYDU_strdup("cmd=keyval_cache ");
+        for (run = pg_scratch->kvs->key_pair, j = 0; run; run = run->next, j++) {
+            if (j < pg_scratch->keyval_dist_count)
+                continue;
 
-        /* Each keyval has the following four items: 'key' '=' 'val'
-         * '<space>'.  Two additional items for the command at the
-         * start and the NULL at the end. */
-        HYDU_MALLOC(tmp, char **, (4 * keyval_count + 3) * sizeof(char *), status);
+            tmp[i++] = HYDU_strdup(run->key);
+            tmp[i++] = HYDU_strdup("=");
+            tmp[i++] = HYDU_strdup(run->val);
+            tmp[i++] = HYDU_strdup(" ");
 
-        /* send all available keyvals downstream */
-        if (keyval_count) {
-            arg_count = 1;
-            i = 0;
-            tmp[i++] = HYDU_strdup("cmd=keyval_cache ");
-            for (run = pg_scratch->kvs->key_pair, j = 0; run; run = run->next, j++) {
-                if (j < keyval_count_distributed)
-                    continue;
-
-                tmp[i++] = HYDU_strdup(run->key);
-                tmp[i++] = HYDU_strdup("=");
-                tmp[i++] = HYDU_strdup(run->val);
-                tmp[i++] = HYDU_strdup(" ");
-
-                arg_count++;
-                if (arg_count >= MAX_PMI_INTERNAL_ARGS) {
-                    tmp[i++] = HYDU_strdup("\n");
-                    tmp[i++] = NULL;
-
-                    status = HYDU_str_alloc_and_join(tmp, &cmd);
-                    HYDU_ERR_POP(status, "unable to join strings\n");
-                    HYDU_free_strlist(tmp);
-
-                    keyval_count_distributed += (arg_count - 1);
-                    for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) {
-                        status = cmd_response(tproxy->control_fd, pid, cmd);
-                        HYDU_ERR_POP(status, "error writing PMI line\n");
-                    }
-                    HYDU_FREE(cmd);
-
-                    i = 0;
-                    tmp[i++] = HYDU_strdup("cmd=keyval_cache ");
-                    arg_count = 1;
-                }
-            }
-            tmp[i++] = HYDU_strdup("\n");
-            tmp[i++] = NULL;
+            arg_count++;
+            if (arg_count >= MAX_PMI_INTERNAL_ARGS) {
+                tmp[i++] = HYDU_strdup("\n");
+                tmp[i++] = NULL;
 
-            if (arg_count > 1) {
                 status = HYDU_str_alloc_and_join(tmp, &cmd);
                 HYDU_ERR_POP(status, "unable to join strings\n");
+                HYDU_free_strlist(tmp);
 
-                keyval_count_distributed += (arg_count - 1);
+                pg_scratch->keyval_dist_count += (arg_count - 1);
                 for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) {
                     status = cmd_response(tproxy->control_fd, pid, cmd);
                     HYDU_ERR_POP(status, "error writing PMI line\n");
                 }
                 HYDU_FREE(cmd);
+
+                i = 0;
+                tmp[i++] = HYDU_strdup("cmd=keyval_cache ");
+                arg_count = 1;
             }
-            HYDU_free_strlist(tmp);
         }
+        tmp[i++] = HYDU_strdup("\n");
+        tmp[i++] = NULL;
+
+        if (arg_count > 1) {
+            status = HYDU_str_alloc_and_join(tmp, &cmd);
+            HYDU_ERR_POP(status, "unable to join strings\n");
+
+            pg_scratch->keyval_dist_count += (arg_count - 1);
+            for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) {
+                status = cmd_response(tproxy->control_fd, pid, cmd);
+                HYDU_ERR_POP(status, "error writing PMI line\n");
+            }
+            HYDU_FREE(cmd);
+        }
+        HYDU_free_strlist(tmp);
+    }
+
+  fn_exit:
+    if (tmp)
+        HYDU_FREE(tmp);
+    HYDU_FUNC_EXIT();
+    return status;
+
+  fn_fail:
+    goto fn_exit;
+}
+
+static HYD_status fn_barrier_in(int fd, int pid, int pgid, char *args[])
+{
+    struct HYD_proxy *proxy, *tproxy;
+    int proxy_count;
+    HYD_status status = HYD_SUCCESS;
+
+    HYDU_FUNC_ENTER();
+
+    proxy = HYD_pmcd_pmi_find_proxy(fd);
+    HYDU_ASSERT(proxy, status);
+
+    proxy_count = 0;
+    for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next)
+        proxy_count++;
+
+    proxy->pg->barrier_count++;
+    if (proxy->pg->barrier_count == proxy_count) {
+        proxy->pg->barrier_count = 0;
+
+        bcast_keyvals(fd, pid);
 
         for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) {
-            /* complete barrier */
             status = cmd_response(tproxy->control_fd, pid, "cmd=barrier_out\n");
             HYDU_ERR_POP(status, "error writing PMI line\n");
         }
     }
 
   fn_exit:
-    if (tmp)
-        HYDU_FREE(tmp);
     HYDU_FUNC_EXIT();
     return status;
 
@@ -510,6 +529,10 @@ static HYD_status fn_spawn(int fd, int pid, int pgid, char *args[])
         HYDU_FREE(cmd);
     }
 
+    /* Cache the pre-initialized keyvals on the new proxies */
+    if (preput_num)
+        bcast_keyvals(fd, pid);
+
   fn_exit:
     HYD_pmcd_pmi_free_tokens(tokens, token_count);
     HYDU_free_strlist(proxy_args);
diff --git a/src/pm/hydra/pm/pmiserv/pmiserv_utils.c b/src/pm/hydra/pm/pmiserv/pmiserv_utils.c
index 46539da..45222aa 100644
--- a/src/pm/hydra/pm/pmiserv/pmiserv_utils.c
+++ b/src/pm/hydra/pm/pmiserv/pmiserv_utils.c
@@ -555,6 +555,8 @@ HYD_status HYD_pmcd_pmi_alloc_pg_scratch(struct HYD_pg *pg)
     status = HYD_pmcd_pmi_allocate_kvs(&pg_scratch->kvs, pg->pgid);
     HYDU_ERR_POP(status, "unable to allocate kvs space\n");
 
+    pg_scratch->keyval_dist_count = 0;
+
   fn_exit:
     HYDU_FUNC_EXIT();
     return status;

-----------------------------------------------------------------------

Summary of changes:
 src/pm/hydra/pm/pmiserv/pmiserv_pmi.h    |    1 +
 src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c |  149 +++++++++++++++++-------------
 src/pm/hydra/pm/pmiserv/pmiserv_utils.c  |    2 +
 3 files changed, 89 insertions(+), 63 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list