[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.0.3-28-g00f9be7

mysql vizuser noreply at mpich.org
Sun Apr 21 00:22:53 CDT 2013


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  00f9be7a088eb90814db2069f5f7a27de496fb96 (commit)
       via  7a57e3f120a261de20c69cfc1648fba2674079c2 (commit)
      from  b51e90e00e6c193c378bb1bcf909a030dc3a3920 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/00f9be7a088eb90814db2069f5f7a27de496fb96

commit 00f9be7a088eb90814db2069f5f7a27de496fb96
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Sat Apr 20 18:49:52 2013 -0500

    Bug-fix: PMI keyval allgather code.
    
    When we overflow our flow-control packet size and send multiple
    packets of keyval pairs, we were overwriting the older information.
    Discovered by artificially making the flow-control size very small.
    
    No reviewer.

diff --git a/src/pm/hydra/include/hydra.h b/src/pm/hydra/include/hydra.h
index 3689417..6cdbfa9 100644
--- a/src/pm/hydra/include/hydra.h
+++ b/src/pm/hydra/include/hydra.h
@@ -564,6 +564,9 @@ HYD_status HYDU_sock_cloexec(int fd);
 #define HYDU_malloc(a) MPL_trmalloc((unsigned)(a),__LINE__,__FILE__)
 #define malloc(a)      'Error use HYDU_malloc' :::
 
+#define HYDU_realloc(a,b) MPL_trrealloc((void *)(a),(unsigned)(b),__LINE__,__FILE__)
+#define realloc(a)      'Error use HYDU_realloc' :::
+
 #define HYDU_free(a) MPL_trfree(a,__LINE__,__FILE__)
 #define free(a)      'Error use HYDU_free' :::
 
@@ -572,6 +575,7 @@ HYD_status HYDU_sock_cloexec(int fd);
 #define HYDU_mem_init()
 #define HYDU_strdup MPL_strdup
 #define HYDU_malloc malloc
+#define HYDU_realloc realloc
 #define HYDU_free free
 
 #endif /* USE_MEMORY_TRACING */
@@ -589,6 +593,16 @@ HYD_status HYDU_sock_cloexec(int fd);
                                 (int) (size));                          \
     }
 
+#define HYDU_REALLOC(p, type, size, status)                             \
+    {                                                                   \
+        HYDU_ASSERT(size, status);                                      \
+        (p) = (type) HYDU_realloc((p),(size));                          \
+        if ((p) == NULL)                                                \
+            HYDU_ERR_SETANDJUMP((status), HYD_NO_MEM,                   \
+                                "failed to allocate %d bytes\n",        \
+                                (int) (size));                          \
+    }
+
 #define HYDU_FREE(p)                            \
     {                                           \
         HYDU_free((void *) p);                  \
diff --git a/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c b/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c
index bfd79a1..5d3693c 100644
--- a/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c
+++ b/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c
@@ -513,14 +513,16 @@ static HYD_status fn_keyval_cache(int fd, char *args[])
     status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
     HYDU_ERR_POP(status, "unable to convert args to tokens\n");
 
-    cache_get.keyval_len = token_count;
-    HYDU_MALLOC(cache_get.key, char **, cache_get.keyval_len * sizeof(char *), status);
-    HYDU_MALLOC(cache_get.val, char **, cache_get.keyval_len * sizeof(char *), status);
+    /* allocate a larger space for the cached keyvals, copy over the
+     * older keyvals and add the new ones in */
+    HYDU_REALLOC(cache_get.key, char **, (cache_get.keyval_len + token_count) * sizeof(char *), status);
+    HYDU_REALLOC(cache_get.val, char **, (cache_get.keyval_len + token_count) * sizeof(char *), status);
 
     for (i = 0; i < token_count; i++) {
-        cache_get.key[i] = HYDU_strdup(tokens[i].key);
-        cache_get.val[i] = HYDU_strdup(tokens[i].val);
+        cache_get.key[cache_get.keyval_len + i] = HYDU_strdup(tokens[i].key);
+        cache_get.val[cache_get.keyval_len + i] = HYDU_strdup(tokens[i].val);
     }
+    cache_get.keyval_len += token_count;
 
   fn_exit:
     HYD_pmcd_pmi_free_tokens(tokens, token_count);
diff --git a/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c b/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c
index e08d425..dfbed96 100644
--- a/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c
+++ b/src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c
@@ -48,9 +48,10 @@ static HYD_status fn_barrier_in(int fd, int pid, int pgid, char *args[])
 {
     struct HYD_proxy *proxy, *tproxy;
     struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
-    int proxy_count, keyval_count, i, arg_count;
+    int proxy_count, keyval_count, i, j, arg_count;
     struct HYD_pmcd_pmi_kvs_pair *run;
     char **tmp = NULL, *cmd;
+    static int keyval_count_distributed = 0;
     HYD_status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
@@ -72,17 +73,22 @@ static HYD_status fn_barrier_in(int fd, int pid, int pgid, char *args[])
         for (run = pg_scratch->kvs->key_pair; run; run = run->next)
             keyval_count++;
 
+        keyval_count -= keyval_count_distributed;
+
         /* Each keyval has the following four items: 'key' '=' 'val'
          * '<space>'.  Two additional items for the command at the
          * start and the NULL at the end. */
-        HYDU_MALLOC(tmp, char **, (4 * keyval_count + 2) * sizeof(char *), status);
+        HYDU_MALLOC(tmp, char **, (4 * keyval_count + 3) * sizeof(char *), status);
 
         /* send all available keyvals downstream */
         if (keyval_count) {
             arg_count = 1;
             i = 0;
             tmp[i++] = HYDU_strdup("cmd=keyval_cache ");
-            for (run = pg_scratch->kvs->key_pair; run; run = run->next) {
+            for (run = pg_scratch->kvs->key_pair, j = 0; run; run = run->next, j++) {
+                if (j < keyval_count_distributed)
+                    continue;
+
                 tmp[i++] = HYDU_strdup(run->key);
                 tmp[i++] = HYDU_strdup("=");
                 tmp[i++] = HYDU_strdup(run->val);
@@ -90,12 +96,14 @@ static HYD_status fn_barrier_in(int fd, int pid, int pgid, char *args[])
 
                 arg_count++;
                 if (arg_count >= MAX_PMI_INTERNAL_ARGS) {
+                    tmp[i++] = HYDU_strdup("\n");
                     tmp[i++] = NULL;
 
                     status = HYDU_str_alloc_and_join(tmp, &cmd);
                     HYDU_ERR_POP(status, "unable to join strings\n");
                     HYDU_free_strlist(tmp);
 
+                    keyval_count_distributed += (arg_count - 1);
                     for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) {
                         status = cmd_response(tproxy->control_fd, pid, cmd);
                         HYDU_ERR_POP(status, "error writing PMI line\n");
@@ -104,21 +112,24 @@ static HYD_status fn_barrier_in(int fd, int pid, int pgid, char *args[])
 
                     i = 0;
                     tmp[i++] = HYDU_strdup("cmd=keyval_cache ");
+                    arg_count = 1;
                 }
             }
+            tmp[i++] = HYDU_strdup("\n");
             tmp[i++] = NULL;
 
             if (arg_count > 1) {
                 status = HYDU_str_alloc_and_join(tmp, &cmd);
                 HYDU_ERR_POP(status, "unable to join strings\n");
-                HYDU_free_strlist(tmp);
 
+                keyval_count_distributed += (arg_count - 1);
                 for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) {
                     status = cmd_response(tproxy->control_fd, pid, cmd);
                     HYDU_ERR_POP(status, "error writing PMI line\n");
                 }
                 HYDU_FREE(cmd);
             }
+            HYDU_free_strlist(tmp);
         }
 
         for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) {

http://git.mpich.org/mpich.git/commitdiff/7a57e3f120a261de20c69cfc1648fba2674079c2

commit 7a57e3f120a261de20c69cfc1648fba2674079c2
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Sat Apr 20 18:52:27 2013 -0500

    Warning squash: unused variables.
    
    No reviewer.

diff --git a/src/pm/hydra/tools/bootstrap/external/pbs_launch.c b/src/pm/hydra/tools/bootstrap/external/pbs_launch.c
index 6ef18dc..c97d634 100644
--- a/src/pm/hydra/tools/bootstrap/external/pbs_launch.c
+++ b/src/pm/hydra/tools/bootstrap/external/pbs_launch.c
@@ -38,7 +38,7 @@ static HYD_status find_pbs_node_id(const char *hostname, int *node_id)
 HYD_status HYDT_bscd_pbs_launch_procs(char **args, struct HYD_proxy *proxy_list,
                                       int *control_fd)
 {
-    int proxy_count, i, args_count, events_count, err, idx, hostid;
+    int proxy_count, i, args_count, err, hostid;
     struct HYD_proxy *proxy;
     char *targs[HYD_NUM_TMP_STRINGS];
     HYD_status status = HYD_SUCCESS;

-----------------------------------------------------------------------

Summary of changes:
 src/pm/hydra/include/hydra.h                       |   14 ++++++++++++++
 src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c              |   12 +++++++-----
 src/pm/hydra/pm/pmiserv/pmiserv_pmi_v1.c           |   19 +++++++++++++++----
 src/pm/hydra/tools/bootstrap/external/pbs_launch.c |    2 +-
 4 files changed, 37 insertions(+), 10 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list