[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1rc2-197-gf46354a

mysql vizuser noreply at mpich.org
Wed Jan 29 19:07:17 CST 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  f46354acff66cb0cf5ccc134399d9f5b1bc46b31 (commit)
       via  7a9210fff1f9545215bea4a2a67161e88c8dcab9 (commit)
       via  687dd1dc8d9d64cdda9926d48d75ebfc70b81301 (commit)
       via  0f0f20a601889ae480958bc95112d0eee686e822 (commit)
      from  b270ae2b734be3e34e1906cfa9cb8e56a89d7b53 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/f46354acff66cb0cf5ccc134399d9f5b1bc46b31

commit f46354acff66cb0cf5ccc134399d9f5b1bc46b31
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Wed Jan 29 15:14:44 2014 -0600

    Bug-fix: deal with wrap around PMI process mapping strings
    
    If the PMI process mapping string wraps around to node 0, we were
    creating a bad node list of which processes are local and which are
    not.  This patch provides a hacky fix for this case by only repeating
    the part of the PMI mapping string from the point where it wrapped
    around.
    
    The patch is hacky because it assumes that seeing a start node ID of 0
    means a wrap around.  This is not necessarily true.  A user-defined
    node list can use the node ID 0 without actually creating a wrap
    around.  The reason this patch still works in this case is because
    Hydra creates a new node list starting from node ID 0 for
    user-specified nodes during MPI_Comm_spawn{_multiple}.  If a different
    process manager searches for allocated nodes in the user-specified
    list, this patch will break.
    
    Fixes #2007.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/src/mpid/ch3/src/mpid_vc.c b/src/mpid/ch3/src/mpid_vc.c
index fc2c970..b9289fe 100644
--- a/src/mpid/ch3/src/mpid_vc.c
+++ b/src/mpid/ch3/src/mpid_vc.c
@@ -1085,6 +1085,7 @@ static int populate_ids_from_mapping(char *mapping, int *num_nodes, MPIDI_PG_t *
     int rank;
     int block, block_node, node_proc;
     int *tmp_rank_list, i;
+    int found_wrap;
     MPIU_CHKLMEM_DECL(1);
 
     *did_map = 1; /* reset upon failure */
@@ -1096,8 +1097,25 @@ static int populate_ids_from_mapping(char *mapping, int *num_nodes, MPIDI_PG_t *
     MPIU_ERR_CHKINTERNAL(mt != VECTOR_MAPPING, mpi_errno, "unsupported mapping type");
 
     /* allocate nodes to ranks */
+    found_wrap = 0;
     for (rank = 0;;) {
-        for (block = 0; block < nblocks; block++) {
+        /* FIXME: The patch is hacky because it assumes that seeing a
+         * start node ID of 0 means a wrap around.  This is not
+         * necessarily true.  A user-defined node list can, in theory,
+         * use the node ID 0 without actually creating a wrap around.
+         * The reason this patch still works in this case is because
+         * Hydra creates a new node list starting from node ID 0 for
+         * user-specified nodes during MPI_Comm_spawn{_multiple}.  If
+         * a different process manager searches for allocated nodes in
+         * the user-specified list, this patch will break. */
+
+        /* If we found that the blocks wrap around, repeat loops
+         * should only start at node id 0 */
+        for (block = 0; found_wrap && mb[block].start_id; block++);
+
+        for (; block < nblocks; block++) {
+            if (mb[block].start_id == 0)
+                found_wrap = 1;
             for (block_node = 0; block_node < mb[block].count; block_node++) {
                 for (node_proc = 0; node_proc < mb[block].size; node_proc++) {
                     pg->vct[rank].node_id = mb[block].start_id + block_node;

http://git.mpich.org/mpich.git/commitdiff/7a9210fff1f9545215bea4a2a67161e88c8dcab9

commit 7a9210fff1f9545215bea4a2a67161e88c8dcab9
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Wed Jan 29 17:19:16 2014 -0600

    handle when qsort is not available in vc setup
    
    When qsort is not available, don't define comparision function and
    fallback to simple insertion sort implementation. In the future, a
    more general function with fallback should be added in MPL so it
    can be used in other cases like comm_split.
    
    Refs #2007
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/mpid/ch3/src/mpid_vc.c b/src/mpid/ch3/src/mpid_vc.c
index acec520..fc2c970 100644
--- a/src/mpid/ch3/src/mpid_vc.c
+++ b/src/mpid/ch3/src/mpid_vc.c
@@ -1056,6 +1056,7 @@ done:
 
 #endif
 
+#if defined HAVE_QSORT
 static int compare_ints(const void *orig_x, const void *orig_y)
 {
     int x = *((int *) orig_x);
@@ -1068,6 +1069,7 @@ static int compare_ints(const void *orig_x, const void *orig_y)
     else
         return 1;
 }
+#endif
 
 #undef FUNCNAME
 #define FUNCNAME populate_ids_from_mapping
@@ -1120,7 +1122,27 @@ break_out:
 #if defined HAVE_QSORT
     qsort(tmp_rank_list, pg->size, sizeof(int), compare_ints);
 #else
-#error "qsort not available"
+    /* fall through to insertion sort if qsort is unavailable/disabled */
+    {
+        int j, tmp;
+
+        for (i = 1; i < pg->size; ++i) {
+            tmp = tmp_rank_list[i];
+            j = i - 1;
+            while (1) {
+                if (tmp_rank_list[j] > tmp) {
+                    tmp_rank_list[j+1] = tmp_rank_list[j];
+                    j = j - 1;
+                    if (j < 0)
+                        break;
+                }
+                else {
+                    break;
+                }
+            }
+            tmp_rank_list[j+1] = tmp;
+        }
+    }
 #endif
 
     *num_nodes = 1;

http://git.mpich.org/mpich.git/commitdiff/687dd1dc8d9d64cdda9926d48d75ebfc70b81301

commit 687dd1dc8d9d64cdda9926d48d75ebfc70b81301
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Wed Jan 29 13:20:41 2014 -0600

    Improve PMI_process_mapping parsing.
    
    The original PMI process mapping parsing code had a number of
    assumptions that would allow it to only work on COMM_WORLD.  This
    patch corrects these to work for dynamic processes as well.
    
    It also corrects the evaluation of the number of nodes used to be
    correct in the general case.
    
    Refs #2007.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/src/mpid/ch3/src/mpid_vc.c b/src/mpid/ch3/src/mpid_vc.c
index e35ed5d..acec520 100644
--- a/src/mpid/ch3/src/mpid_vc.c
+++ b/src/mpid/ch3/src/mpid_vc.c
@@ -1056,6 +1056,19 @@ done:
 
 #endif
 
+static int compare_ints(const void *orig_x, const void *orig_y)
+{
+    int x = *((int *) orig_x);
+    int y = *((int *) orig_y);
+
+    if (x == y)
+        return 0;
+    else if (x < y)
+        return -1;
+    else
+        return 1;
+}
+
 #undef FUNCNAME
 #define FUNCNAME populate_ids_from_mapping
 #undef FCNAME
@@ -1069,6 +1082,8 @@ static int populate_ids_from_mapping(char *mapping, int *num_nodes, MPIDI_PG_t *
     int nblocks = 0;
     int rank;
     int block, block_node, node_proc;
+    int *tmp_rank_list, i;
+    MPIU_CHKLMEM_DECL(1);
 
     *did_map = 1; /* reset upon failure */
 
@@ -1078,31 +1093,43 @@ static int populate_ids_from_mapping(char *mapping, int *num_nodes, MPIDI_PG_t *
     if (NULL_MAPPING == mt) goto fn_fail;
     MPIU_ERR_CHKINTERNAL(mt != VECTOR_MAPPING, mpi_errno, "unsupported mapping type");
 
-    rank = 0;
-    /* for a representation like (block,N,(1,1)) this while loop causes us to
-     * re-use that sole map block over and over until we have assigned node
-     * ids to every process */
-    while (rank < pg->size) {
-        for (block = 0; block < nblocks; ++block) {
-            int node_id = mb[block].start_id;
-            for (block_node = 0; block_node < mb[block].count; ++block_node) {
-                if (node_id > *num_nodes)
-                    *num_nodes = node_id;
-
-                for (node_proc = 0; node_proc < mb[block].size; ++node_proc) {
-                    pg->vct[rank].node_id = node_id;
-                    ++rank;
-                    if (rank == pg->size)
-                        goto map_done;
+    /* allocate nodes to ranks */
+    for (rank = 0;;) {
+        for (block = 0; block < nblocks; block++) {
+            for (block_node = 0; block_node < mb[block].count; block_node++) {
+                for (node_proc = 0; node_proc < mb[block].size; node_proc++) {
+                    pg->vct[rank].node_id = mb[block].start_id + block_node;
+                    if (++rank == pg->size)
+                        goto break_out;
                 }
-                ++node_id;
             }
         }
     }
 
-map_done:
-    ++(*num_nodes); /* add one to get the num instead of the max */
+break_out:
+    /* Find the number of unique node ids.  This is the classic
+     * element distinctness problem, for which the lower bound time
+     * complexity is O(N log N).  Here we use a simple algorithm to
+     * sort the array and find the number of changes in the array
+     * through a linear search.  There are certainly better algorithms
+     * available, which can be employed. */
+    MPIU_CHKLMEM_MALLOC(tmp_rank_list, int *, pg->size * sizeof(int), mpi_errno, "tmp_rank_list");
+    for (i = 0; i < pg->size; i++)
+        tmp_rank_list[i] = pg->vct[i].node_id;
+
+#if defined HAVE_QSORT
+    qsort(tmp_rank_list, pg->size, sizeof(int), compare_ints);
+#else
+#error "qsort not available"
+#endif
+
+    *num_nodes = 1;
+    for (i = 1; i < pg->size; i++)
+        if (tmp_rank_list[i] != tmp_rank_list[i-1])
+            (*num_nodes)++;
+
 fn_exit:
+    MPIU_CHKLMEM_FREEALL();
     MPIU_Free(mb);
     return mpi_errno;
 fn_fail:
@@ -1226,7 +1253,6 @@ int MPIDI_Populate_vc_node_ids(MPIDI_PG_t *pg, int our_pg_rank)
 
     /* See if process manager supports PMI_process_mapping keyval */
 
-    /* FIXME 'PMI_process_mapping' only applies for the original PG (MPI_COMM_WORLD) */
     if (pmi_version == 1 && pmi_subversion == 1) {
         pmi_errno = PMI_KVS_Get(kvs_name, "PMI_process_mapping", value, val_max_sz);
         if (pmi_errno == 0) {

http://git.mpich.org/mpich.git/commitdiff/0f0f20a601889ae480958bc95112d0eee686e822

commit 0f0f20a601889ae480958bc95112d0eee686e822
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Wed Jan 29 13:20:35 2014 -0600

    Remove unused flag.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/src/pm/hydra/pm/pmiserv/pmiserv_utils.c b/src/pm/hydra/pm/pmiserv/pmiserv_utils.c
index 954a0fc..95abb81 100644
--- a/src/pm/hydra/pm/pmiserv/pmiserv_utils.c
+++ b/src/pm/hydra/pm/pmiserv/pmiserv_utils.c
@@ -201,7 +201,6 @@ static HYD_status pmi_process_mapping(struct HYD_pg *pg, char **process_mapping_
         blocklist_tail = blocklist_head;
     }
 
-  create_mapping_key:
     /* Create the mapping out of the blocks */
     HYD_STRING_STASH_INIT(stash);
     HYD_STRING_STASH(stash, HYDU_strdup("("), status);

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/ch3/src/mpid_vc.c              |  104 +++++++++++++++++++++++++------
 src/pm/hydra/pm/pmiserv/pmiserv_utils.c |    1 -
 2 files changed, 85 insertions(+), 20 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list