[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1rc2-185-gca2d0bc

mysql vizuser noreply at mpich.org
Sun Jan 26 13:37:33 CST 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  ca2d0bca0ac10a7ac411bdd6ad128ad6a2f2bd65 (commit)
       via  45cf8b90b10de7f3b0ef6278e3abc29b41295ef4 (commit)
       via  486fb25941e6a615b60e6d70281cc6a28decd69b (commit)
       via  e35ab750b9cd0b0093e2aaad60cd44410f6911c8 (commit)
       via  15aec608d167138bbe92b977e8b3f00fd342395f (commit)
       via  bec02c2a6a4c050875502a77bd1ed3bd040c286e (commit)
      from  b65699f50df5edc76f65514105e4f3e9df43acd2 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/ca2d0bca0ac10a7ac411bdd6ad128ad6a2f2bd65

commit ca2d0bca0ac10a7ac411bdd6ad128ad6a2f2bd65
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Tue Jan 21 14:31:57 2014 -0600

    add debugging for process affinity options
    
    Users can specify the envvar HYDRA_TOPO_DEBUG such that hydra will
    print out the cpu bindings for an MPI job, then run without actually
    binding the processes. This is useful for debugging with hwloc's
    arbitrary topology loading functionality.
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/pm/hydra/tools/topo/hwloc/topo_hwloc.c b/src/pm/hydra/tools/topo/hwloc/topo_hwloc.c
index 5645fa5..be8978f 100644
--- a/src/pm/hydra/tools/topo/hwloc/topo_hwloc.c
+++ b/src/pm/hydra/tools/topo/hwloc/topo_hwloc.c
@@ -6,6 +6,7 @@
  */
 
 #include "hydra.h"
+#include "topo.h"
 #include "topo_hwloc.h"
 
 #define MAP_LENGTH      (5)
@@ -165,8 +166,8 @@ static int parse_cache_string(const char *str)
 static HYD_status handle_bitmap_binding(const char *binding, const char *mapping)
 {
     int i, j, k, bind_count, map_count, cache_depth = 0, bind_depth = 0, map_depth = 0;
-    int total_num_pus, total_map_objs, total_bind_objs, num_pus_in_map_domain,
-        num_pus_in_bind_domain, total_map_domains;
+    int total_map_objs, total_bind_objs, num_pus_in_map_domain, num_pus_in_bind_domain,
+        total_map_domains;
     hwloc_obj_t map_obj, bind_obj, *start_pu;
     hwloc_cpuset_t *map_domains;
     char *bind_str, *map_str;
@@ -251,12 +252,11 @@ static HYD_status handle_bitmap_binding(const char *binding, const char *mapping
      */
 
     /* calculate the number of map domains */
-    total_num_pus = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
-
     total_map_objs = hwloc_get_nbobjs_by_depth(topology, map_depth);
-    num_pus_in_map_domain = (total_num_pus / total_map_objs) * map_count;
-    HYDU_ERR_CHKANDJUMP(status, num_pus_in_map_domain > total_num_pus, HYD_INTERNAL_ERROR,
-                        "mapping option \"%s\" larger than total system size\n", mapping);
+    num_pus_in_map_domain = (HYDT_topo_hwloc_info.total_num_pus / total_map_objs) * map_count;
+    HYDU_ERR_CHKANDJUMP(status, num_pus_in_map_domain > HYDT_topo_hwloc_info.total_num_pus,
+                        HYD_INTERNAL_ERROR, "mapping option \"%s\" larger than total system size\n",
+                        mapping);
 
     /* The number of total_map_domains should be large enough to
      * contain all contiguous map object collections of length
@@ -307,11 +307,12 @@ static HYD_status handle_bitmap_binding(const char *binding, const char *mapping
 
     /* calculate the number of possible bindings and allocate bitmaps for them */
     total_bind_objs = hwloc_get_nbobjs_by_depth(topology, bind_depth);
-    num_pus_in_bind_domain = (total_num_pus / total_bind_objs) * bind_count;
+    num_pus_in_bind_domain = (HYDT_topo_hwloc_info.total_num_pus / total_bind_objs) * bind_count;
 
     if (num_pus_in_bind_domain < num_pus_in_map_domain) {
         for (i = 1; (i * num_pus_in_map_domain) % num_pus_in_bind_domain; i++);
-        HYDT_topo_hwloc_info.num_bitmaps = (i * num_pus_in_map_domain * total_map_domains) / num_pus_in_bind_domain;
+        HYDT_topo_hwloc_info.num_bitmaps =
+            (i * num_pus_in_map_domain * total_map_domains) / num_pus_in_bind_domain;
     }
     else {
         HYDT_topo_hwloc_info.num_bitmaps = total_map_domains;
@@ -393,6 +394,8 @@ HYD_status HYDT_topo_hwloc_init(const char *binding, const char *mapping, const
     hwloc_topology_init(&topology);
     hwloc_topology_load(topology);
 
+    HYDT_topo_hwloc_info.total_num_pus = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
+
     hwloc_initialized = 1;
 
     /* bindings that don't require mapping */
@@ -450,9 +453,24 @@ HYD_status HYDT_topo_hwloc_bind(int idx)
     /* For processes where the user did not specify a binding unit, no binding is needed. */
     if (!HYDT_topo_hwloc_info.user_binding || (idx < HYDT_topo_hwloc_info.num_bitmaps)) {
         id = idx % HYDT_topo_hwloc_info.num_bitmaps;
-        hwloc_set_cpubind(topology, HYDT_topo_hwloc_info.bitmap[id], 0);
-        hwloc_set_membind(topology, HYDT_topo_hwloc_info.bitmap[id], HYDT_topo_hwloc_info.membind,
-                          0);
+
+        /* For debugging, print the binding bitmaps but don't actually bind. */
+        if (HYDT_topo_info.debug) {
+            int i;
+
+            HYDU_dump_noprefix(stdout, "process %d binding: ", idx);
+
+            for (i = 0; i < HYDT_topo_hwloc_info.total_num_pus; i++)
+                HYDU_dump_noprefix(stdout, "%d ",
+                                   hwloc_bitmap_isset(HYDT_topo_hwloc_info.bitmap[id], i));
+
+            HYDU_dump_noprefix(stdout, "\n");
+        }
+        else {
+            hwloc_set_cpubind(topology, HYDT_topo_hwloc_info.bitmap[id], 0);
+            hwloc_set_membind(topology, HYDT_topo_hwloc_info.bitmap[id],
+                              HYDT_topo_hwloc_info.membind, 0);
+        }
     }
 
     HYDU_FUNC_EXIT();
diff --git a/src/pm/hydra/tools/topo/hwloc/topo_hwloc.h b/src/pm/hydra/tools/topo/hwloc/topo_hwloc.h
index 5147ad9..ecef20c 100644
--- a/src/pm/hydra/tools/topo/hwloc/topo_hwloc.h
+++ b/src/pm/hydra/tools/topo/hwloc/topo_hwloc.h
@@ -19,6 +19,7 @@ struct HYDT_topo_hwloc_info {
     hwloc_bitmap_t *bitmap;
     hwloc_membind_policy_t membind;
     int user_binding;
+    int total_num_pus;
 };
 extern struct HYDT_topo_hwloc_info HYDT_topo_hwloc_info;
 
diff --git a/src/pm/hydra/tools/topo/topo.c b/src/pm/hydra/tools/topo/topo.c
index 384dadb..72a26de 100644
--- a/src/pm/hydra/tools/topo/topo.c
+++ b/src/pm/hydra/tools/topo/topo.c
@@ -47,6 +47,8 @@ HYD_status HYDT_topo_init(char *user_topolib, char *user_binding, char *user_map
     else if (MPL_env2str("HYDRA_MEMBIND", &membind) == 0)
         membind = NULL;
 
+    if (MPL_env2bool("HYDRA_TOPO_DEBUG", &HYDT_topo_info.debug) == 0)
+        HYDT_topo_info.debug = 0;
 
     if (!binding || !strcmp(binding, "none")) {
         ignore_binding = 1;
diff --git a/src/pm/hydra/tools/topo/topo.h b/src/pm/hydra/tools/topo/topo.h
index 7436ded..deeff79 100644
--- a/src/pm/hydra/tools/topo/topo.h
+++ b/src/pm/hydra/tools/topo/topo.h
@@ -24,6 +24,8 @@
 struct HYDT_topo_info {
     /** \brief Topology library to use */
     char *topolib;
+    /** \brief Enable debugging output */
+    int debug;
 };
 
 /*! \cond */

http://git.mpich.org/mpich.git/commitdiff/45cf8b90b10de7f3b0ef6278e3abc29b41295ef4

commit 45cf8b90b10de7f3b0ef6278e3abc29b41295ef4
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Fri Jan 17 14:00:38 2014 -0600

    remove help text for unsupported mapping option
    
    Hydra does not currently support user-defined process mapping strings.
    Remove the help text for now.
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/pm/hydra/ui/mpich/utils.c b/src/pm/hydra/ui/mpich/utils.c
index cf6c797..7941673 100644
--- a/src/pm/hydra/ui/mpich/utils.c
+++ b/src/pm/hydra/ui/mpich/utils.c
@@ -926,8 +926,6 @@ static void bind_to_help_fn(void)
     printf("            l1cache          -- map to L1 cache domain\n");
     printf("            l2cache          -- map to L2 cache domain\n");
     printf("            l3cache          -- map to L3 cache domain\n");
-    printf("            TCSNB            -- map in order of T, C, S, N, B\n");
-    printf("            CTSNB            -- map in order of C, T, S, N, B\n");
 
     printf("\n\n");
 

http://git.mpich.org/mpich.git/commitdiff/486fb25941e6a615b60e6d70281cc6a28decd69b

commit 486fb25941e6a615b60e6d70281cc6a28decd69b
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Thu Jan 23 18:35:36 2014 -0600

    White space cleanup.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/src/pm/hydra/examples/binding.c b/src/pm/hydra/examples/binding.c
index 85884b3..53880d7 100644
--- a/src/pm/hydra/examples/binding.c
+++ b/src/pm/hydra/examples/binding.c
@@ -27,8 +27,7 @@ int main(int argc, char **argv)
             num_cpus = atoi(argv[1]);
             argv += 2;
         }
-        else if (!strcmp(argv[0], "--help") || !strcmp(argv[0], "-help") ||
-                 !strcmp(argv[0], "-h")) {
+        else if (!strcmp(argv[0], "--help") || !strcmp(argv[0], "-help") || !strcmp(argv[0], "-h")) {
             fprintf(stderr, "Usage: ./binding {--num-cpus [CPUs]}\n");
             MPI_Abort(MPI_COMM_WORLD, -1);
         }
diff --git a/src/pm/hydra/examples/print_cpus_allowed.c b/src/pm/hydra/examples/print_cpus_allowed.c
index 4035b9b..9e5b6f2 100644
--- a/src/pm/hydra/examples/print_cpus_allowed.c
+++ b/src/pm/hydra/examples/print_cpus_allowed.c
@@ -13,12 +13,12 @@ int main(int argc, char **argv)
     MPI_Init(&argc, &argv);
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 
-    MPI_Get_processor_name(processor_name,&namelen);
+    MPI_Get_processor_name(processor_name, &namelen);
 
     fp = popen("grep Cpus_allowed_list /proc/$$/status", "r");
 
     while (fgets(path, PATH_MAX, fp) != NULL) {
-      printf("%s[%d]: %s", processor_name, rank, path);
+        printf("%s[%d]: %s", processor_name, rank, path);
     }
 
     pclose(fp);

http://git.mpich.org/mpich.git/commitdiff/e35ab750b9cd0b0093e2aaad60cd44410f6911c8

commit e35ab750b9cd0b0093e2aaad60cd44410f6911c8
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Thu Jan 16 23:33:12 2014 -0600

    hydra process binding/mapping overhaul
    
    Significant rework of the process binding/mapping features in hydra.
    There were a number of bugs in the existing code. This commit addresses
    them and simplifies the binding/mapping logic.
    
    It also makes binding/mapping options more permissive.  If a user
    specifies a system element that does not exist in the process affinity
    options, use the next largest element in the topology.  This makes
    things safer for systems in which hwloc does not report certain
    elements, e.g., as an Haswell-based MBP that shows no sockets, but
    does have a NUMA node.
    
    Other comments:
    
    1. User-defined mapping strings (e.g. TCSNB) are no longer supported.
    Support may be added back at a later time, depending on user feedback.
    
    2. Properly support cache-level binding/mapping. To accomodate all levels
    of processor cache, we define objects by their absolute depth in the
    topology.
    
    3. Allocate the correct number of binding/mapping combinations given
    the user-provided options, and populate them accordingly.
    
    Refs #1858
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/pm/hydra/tools/topo/hwloc/topo_hwloc.c b/src/pm/hydra/tools/topo/hwloc/topo_hwloc.c
index 4af224e..5645fa5 100644
--- a/src/pm/hydra/tools/topo/hwloc/topo_hwloc.c
+++ b/src/pm/hydra/tools/topo/hwloc/topo_hwloc.c
@@ -69,8 +69,7 @@ static HYD_status handle_user_binding(const char *binding)
         hwloc_bitmap_zero(HYDT_topo_hwloc_info.bitmap[i]);
         bindstr = strtok(bind_entries[i], "+");
         while (bindstr) {
-            hwloc_bitmap_set(HYDT_topo_hwloc_info.bitmap[i],
-                             atoi(bindstr));
+            hwloc_bitmap_set(HYDT_topo_hwloc_info.bitmap[i], atoi(bindstr));
             bindstr = strtok(NULL, "+");
         }
     }
@@ -78,6 +77,13 @@ static HYD_status handle_user_binding(const char *binding)
     HYDT_topo_hwloc_info.num_bitmaps = num_bind_entries;
     HYDT_topo_hwloc_info.user_binding = 1;
 
+    /* free temporary memory */
+    for (i = 0; i < num_bind_entries; i++) {
+        HYDU_FREE(bind_entries[i]);
+    }
+    HYDU_FREE(bind_entries);
+    HYDU_FREE(bind_entry_lengths);
+
   fn_exit:
     HYDU_FUNC_EXIT();
     return status;
@@ -114,93 +120,6 @@ static HYD_status handle_rr_binding(void)
     goto fn_exit;
 }
 
-static hwloc_obj_t find_obj_containing_pu(hwloc_obj_type_t type, int idx, int cache_depth)
-{
-    int i;
-    hwloc_obj_t obj;
-
-    obj = hwloc_get_root_obj(topology);
-    if (!obj || !hwloc_bitmap_isset(obj->cpuset, idx))
-        return NULL;
-
-    while (obj) {
-        if (obj->type == type)
-            if (type != HWLOC_OBJ_CACHE || obj->attr->cache.depth == cache_depth)
-                break;
-        for (i = 0; i < obj->arity; i++) {
-            if (hwloc_bitmap_isset(obj->children[i]->cpuset, idx)) {
-                obj = obj->children[i];
-                break;
-            }
-        }
-    }
-
-    return obj;
-}
-
-static HYD_status get_nbobjs_by_type(hwloc_obj_type_t type, int *nbobjs, int *nbobjs_per_parent)
-{
-    int x, nb;
-    hwloc_obj_type_t parent, t;
-    HYD_status status = HYD_SUCCESS;
-
-    HYDU_FUNC_ENTER();
-
-    parent = HWLOC_OBJ_SYSTEM;
-
-    if (type == HWLOC_OBJ_PU)
-        parent = HWLOC_OBJ_CORE;
-    else if (type == HWLOC_OBJ_CORE)
-        parent = HWLOC_OBJ_SOCKET;
-    else if (type == HWLOC_OBJ_SOCKET)
-        parent = HWLOC_OBJ_NODE;
-    else if (type == HWLOC_OBJ_NODE)
-        parent = HWLOC_OBJ_MACHINE;
-    else if (type == HWLOC_OBJ_MACHINE)
-        parent = HWLOC_OBJ_MACHINE;
-
-    HYDU_ASSERT(parent != HWLOC_OBJ_SYSTEM, status);
-
-    nb = 0;
-    t = type;
-    while (1) {
-        nb = hwloc_get_nbobjs_by_type(topology, t);
-        if (nb)
-            break;
-        if (t == HWLOC_OBJ_SYSTEM)
-            break;
-        while (--t == HWLOC_OBJ_CACHE);
-    }
-    HYDU_ASSERT(nb, status);
-    if (nbobjs)
-        *nbobjs = nb;
-
-    if (nbobjs_per_parent == NULL)
-        goto fn_exit;
-
-    x = 0;
-    t = parent;
-    while (1) {
-        x = hwloc_get_nbobjs_by_type(topology, t);
-        if (x)
-            break;
-        while (--t == HWLOC_OBJ_CACHE);
-        if (t == HWLOC_OBJ_SYSTEM)
-            break;
-    }
-    HYDU_ASSERT(x, status);
-    HYDU_ASSERT(nb % x == 0, status);
-
-    *nbobjs_per_parent = (nb / x);
-
-  fn_exit:
-    HYDU_FUNC_EXIT();
-    return status;
-
-  fn_fail:
-    goto fn_exit;
-}
-
 static HYD_status split_count_field(const char *str, char **split_str, int *count)
 {
     char *full_str = HYDU_strdup(str), *count_str;
@@ -243,118 +162,13 @@ static int parse_cache_string(const char *str)
     return atoi(t1);
 }
 
-static HYD_status cache_to_cpu_type(int cache_depth, hwloc_obj_type_t * cpu_type)
-{
-    hwloc_obj_t cache_obj, cpu_obj;
-    HYD_status status = HYD_SUCCESS;
-
-    HYDU_FUNC_ENTER();
-
-    cache_obj = hwloc_get_root_obj(topology);
-    HYDU_ASSERT(cache_obj, status);
-
-    while (cache_obj && cache_obj->type != HWLOC_OBJ_CACHE &&
-           cache_obj->attr->cache.depth != cache_depth)
-        cache_obj = cache_obj->first_child;
-    if (cache_obj == NULL) {
-        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "cache level %d not found\n", cache_depth);
-    }
-
-    cpu_obj = hwloc_get_root_obj(topology);
-    HYDU_ASSERT(cpu_obj, status);
-
-    while (cpu_obj && cpu_obj->type == HWLOC_OBJ_CACHE &&
-           !hwloc_bitmap_isequal(cpu_obj->cpuset, cache_obj->cpuset))
-        cpu_obj = cpu_obj->first_child;
-    if (cpu_obj == NULL) {
-        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
-                            "could not find cpu type that maps cache\n");
-    }
-
-    *cpu_type = cpu_obj->type;
-
-  fn_exit:
-    HYDU_FUNC_EXIT();
-    return status;
-
-  fn_fail:
-    goto fn_exit;
-}
-
-static HYD_status obj_type_to_map_str(hwloc_obj_type_t type, int cache_depth, char **map)
-{
-    hwloc_obj_type_t cpu_type;
-    HYD_status status = HYD_SUCCESS;
-
-    HYDU_FUNC_ENTER();
-
-    if (type == HWLOC_OBJ_MACHINE) {
-        *map = HYDU_strdup("BTCSN");
-        goto fn_exit;
-    }
-    else if (type == HWLOC_OBJ_NODE) {
-        *map = HYDU_strdup("NTCSB");
-        goto fn_exit;
-    }
-    else if (type == HWLOC_OBJ_SOCKET) {
-        *map = HYDU_strdup("STCNB");
-        goto fn_exit;
-    }
-    else if (type == HWLOC_OBJ_CORE) {
-        *map = HYDU_strdup("CTSNB");
-        goto fn_exit;
-    }
-    else if (type == HWLOC_OBJ_PU) {
-        *map = HYDU_strdup("TCSNB");
-        goto fn_exit;
-    }
-
-    HYDU_ASSERT(type == HWLOC_OBJ_CACHE, status);
-
-    status = cache_to_cpu_type(cache_depth, &cpu_type);
-    HYDU_ERR_POP(status, "error while mapping cache to cpu object\n");
-
-    status = obj_type_to_map_str(cpu_type, cache_depth, map);
-    HYDU_ERR_POP(status, "error while mapping object to map string\n");
-
-  fn_exit:
-    HYDU_FUNC_EXIT();
-    return status;
-
-  fn_fail:
-    goto fn_exit;
-}
-
-static int balance_obj_idx(int *obj_idx, int *nbobjs_per_parent)
-{
-    int i, ret;
-
-    ret = 0;
-    for (i = 0; i < MAP_LENGTH - 1; i++) {
-        while (obj_idx[i] >= nbobjs_per_parent[i]) {
-            obj_idx[i] -= nbobjs_per_parent[i];
-            obj_idx[i + 1]++;
-        }
-        while (obj_idx[i] < 0) {
-            obj_idx[i] += nbobjs_per_parent[i];
-            obj_idx[i + 1]--;
-        }
-    }
-    while (obj_idx[MAP_LENGTH - 1] >= nbobjs_per_parent[MAP_LENGTH - 1]) {
-        obj_idx[MAP_LENGTH - 1] -= nbobjs_per_parent[MAP_LENGTH - 1];
-        ret = 1;
-    }
-
-    return ret;
-}
-
 static HYD_status handle_bitmap_binding(const char *binding, const char *mapping)
 {
-    int i, j, k, idx, bind_count, map_count, cache_depth = 0;
-    hwloc_obj_t obj;
-    hwloc_obj_type_t bind_obj_type;
-    int total_nbobjs[MAP_LENGTH], obj_idx[MAP_LENGTH], nbpu_per_obj[MAP_LENGTH];
-    int nbobjs_per_parent[MAP_LENGTH];
+    int i, j, k, bind_count, map_count, cache_depth = 0, bind_depth = 0, map_depth = 0;
+    int total_num_pus, total_map_objs, total_bind_objs, num_pus_in_map_domain,
+        num_pus_in_bind_domain, total_map_domains;
+    hwloc_obj_t map_obj, bind_obj, *start_pu;
+    hwloc_cpuset_t *map_domains;
     char *bind_str, *map_str;
     HYD_status status = HYD_SUCCESS;
 
@@ -370,15 +184,15 @@ static HYD_status handle_bitmap_binding(const char *binding, const char *mapping
 
     /* get the binding object */
     if (!strcmp(bind_str, "board"))
-        bind_obj_type = HWLOC_OBJ_MACHINE;
+        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_MACHINE);
     else if (!strcmp(bind_str, "numa"))
-        bind_obj_type = HWLOC_OBJ_NODE;
+        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_NODE);
     else if (!strcmp(bind_str, "socket"))
-        bind_obj_type = HWLOC_OBJ_SOCKET;
+        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_SOCKET);
     else if (!strcmp(bind_str, "core"))
-        bind_obj_type = HWLOC_OBJ_CORE;
+        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_CORE);
     else if (!strcmp(bind_str, "hwthread"))
-        bind_obj_type = HWLOC_OBJ_PU;
+        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_PU);
     else {
         /* check if it's in the l*cache format */
         cache_depth = parse_cache_string(bind_str);
@@ -386,56 +200,124 @@ static HYD_status handle_bitmap_binding(const char *binding, const char *mapping
             HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                 "unrecognized binding string \"%s\"\n", binding);
         }
-        bind_obj_type = HWLOC_OBJ_CACHE;
+        bind_depth = hwloc_get_cache_type_depth(topology, cache_depth, -1);
+    }
+
+    /* get the mapping */
+    if (!strcmp(map_str, "board"))
+        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_MACHINE);
+    else if (!strcmp(map_str, "numa"))
+        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_NODE);
+    else if (!strcmp(map_str, "socket"))
+        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_SOCKET);
+    else if (!strcmp(map_str, "core"))
+        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_CORE);
+    else if (!strcmp(map_str, "hwthread"))
+        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_PU);
+    else {
+        cache_depth = parse_cache_string(map_str);
+        if (!cache_depth) {
+            HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
+                                "unrecognized mapping string \"%s\"\n", mapping);
+        }
+        map_depth = hwloc_get_cache_type_depth(topology, cache_depth, -1);
+    }
+
+    /*
+     * Process Affinity Algorithm:
+     *
+     * The code below works in 3 stages. The end result is an array of all the possible
+     * binding bitmaps for a system, based on the options specified.
+     *
+     * 1. Define all possible mapping "domains" in a system. A mapping domain is a group
+     *    of hardware elements found by traversing the topology. Each traversal skips the
+     *    number of elements the user specified in the mapping string. The traversal ends
+     *    when the next mapping domain == the first mapping domain. Note that if the
+     *    mapping string defines a domain that is larger than the system size, we exit
+     *    with an error.
+     *
+     * 2. Define the number of possible binding domains within a mapping domain. This
+     *    process is similar to step 1, in that we traverse the mapping domain finding
+     *    all possible bind combinations, stopping when a duplicate of the first binding
+     *    is reached. If a binding is larger (in # of PUs) than the mapping domain,
+     *    the number of possible bindings for that domain is 1. In this stage, we also
+     *    locate the first PU in each mapping domain for use later during binding.
+     *
+     * 3. Create the binding bitmaps. We allocate an array of bitmaps and fill them in
+     *    with all possible bindings. The starting PU in each mapping domain is advanced
+     *    if and when we wrap around to the beginning of the mapping domains. This ensures
+     *    that we do not repeat.
+     *
+     */
+
+    /* calculate the number of map domains */
+    total_num_pus = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
+
+    total_map_objs = hwloc_get_nbobjs_by_depth(topology, map_depth);
+    num_pus_in_map_domain = (total_num_pus / total_map_objs) * map_count;
+    HYDU_ERR_CHKANDJUMP(status, num_pus_in_map_domain > total_num_pus, HYD_INTERNAL_ERROR,
+                        "mapping option \"%s\" larger than total system size\n", mapping);
+
+    /* The number of total_map_domains should be large enough to
+     * contain all contiguous map object collections of length
+     * map_count.  For example, if the map object is "socket" and the
+     * map_count is 3, on a system with 4 sockets, the following map
+     * domains should be included: (0,1,2), (3,0,1), (2,3,0), (1,2,3).
+     * We do this by finding how many times we need to replicate the
+     * list of the map objects so that an integral number of map
+     * domains can map to them.  In the above case, the list of map
+     * objects is replicated 3 times. */
+    for (i = 1; (i * total_map_objs) % map_count; i++);
+    total_map_domains = (i * total_map_objs) / map_count;
+
+    /* initialize the map domains */
+    HYDU_MALLOC(map_domains, hwloc_bitmap_t *, total_map_domains * sizeof(hwloc_bitmap_t), status);
+    HYDU_MALLOC(start_pu, hwloc_obj_t *, total_map_domains * sizeof(hwloc_obj_t), status);
+
+    /* For each map domain, find the next map object (first map object
+     * for the first map domain) and add the following "map_count"
+     * number of contiguous map objects, wrapping to the first one if
+     * needed, to the map domain.  Store the first PU in the first map
+     * object of the map domain as "start_pu".  This is needed later
+     * for the actual binding. */
+    map_obj = NULL;
+    for (i = 0; i < total_map_domains; i++) {
+        map_domains[i] = hwloc_bitmap_alloc();
+        hwloc_bitmap_zero(map_domains[i]);
+
+        for (j = 0; j < map_count; j++) {
+            map_obj = hwloc_get_next_obj_by_depth(topology, map_depth, map_obj);
+            /* map_obj will be NULL if it reaches the end. call again to wrap around */
+            if (!map_obj)
+                map_obj = hwloc_get_next_obj_by_depth(topology, map_depth, map_obj);
+
+            if (j == 0)
+                start_pu[i] =
+                    hwloc_get_obj_inside_cpuset_by_type(topology, map_obj->cpuset, HWLOC_OBJ_PU, 0);
+
+            hwloc_bitmap_or(map_domains[i], map_domains[i], map_obj->cpuset);
+        }
     }
 
 
-    /* get the mapping string */
-    if (!strcmp(map_str, "board")) {
-        HYDU_FREE(map_str);
-        obj_type_to_map_str(HWLOC_OBJ_MACHINE, 0, &map_str);
-    }
-    else if (!strcmp(map_str, "numa")) {
-        HYDU_FREE(map_str);
-        obj_type_to_map_str(HWLOC_OBJ_NODE, 0, &map_str);
-    }
-    else if (!strcmp(map_str, "socket")) {
-        HYDU_FREE(map_str);
-        obj_type_to_map_str(HWLOC_OBJ_SOCKET, 0, &map_str);
-    }
-    else if (!strcmp(map_str, "core")) {
-        HYDU_FREE(map_str);
-        obj_type_to_map_str(HWLOC_OBJ_CORE, 0, &map_str);
-    }
-    else if (!strcmp(map_str, "hwthread")) {
-        HYDU_FREE(map_str);
-        obj_type_to_map_str(HWLOC_OBJ_PU, 0, &map_str);
+    /* Find the possible binding domains is similar to that of map
+     * domains.  But if a binding domain is larger (in # of PUs) than
+     * the mapping domain, the number of possible bindings for that
+     * domain is 1. */
+
+    /* calculate the number of possible bindings and allocate bitmaps for them */
+    total_bind_objs = hwloc_get_nbobjs_by_depth(topology, bind_depth);
+    num_pus_in_bind_domain = (total_num_pus / total_bind_objs) * bind_count;
+
+    if (num_pus_in_bind_domain < num_pus_in_map_domain) {
+        for (i = 1; (i * num_pus_in_map_domain) % num_pus_in_bind_domain; i++);
+        HYDT_topo_hwloc_info.num_bitmaps = (i * num_pus_in_map_domain * total_map_domains) / num_pus_in_bind_domain;
     }
     else {
-        cache_depth = parse_cache_string(map_str);
-        if (cache_depth) {
-            HYDU_FREE(map_str);
-            obj_type_to_map_str(HWLOC_OBJ_CACHE, cache_depth, &map_str);
-        }
-        else {
-            for (i = 0; i < strlen(map_str); i++) {
-                if (map_str[i] >= 'a' && map_str[i] <= 'z')
-                    map_str[i] += ('A' - 'a');
-
-                /* If any of the characters are not in the form, we
-                 * want, return an error */
-                if (map_str[i] != 'T' && map_str[i] != 'C' && map_str[i] != 'S' &&
-                    map_str[i] != 'N' && map_str[i] != 'B') {
-                    HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
-                                        "unrecognized mapping string \"%s\"\n", mapping);
-                }
-            }
-        }
+        HYDT_topo_hwloc_info.num_bitmaps = total_map_domains;
     }
 
     /* initialize bitmaps */
-    HYDT_topo_hwloc_info.num_bitmaps = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
-
     HYDU_MALLOC(HYDT_topo_hwloc_info.bitmap, hwloc_bitmap_t *,
                 HYDT_topo_hwloc_info.num_bitmaps * sizeof(hwloc_bitmap_t), status);
 
@@ -444,44 +326,53 @@ static HYD_status handle_bitmap_binding(const char *binding, const char *mapping
         hwloc_bitmap_zero(HYDT_topo_hwloc_info.bitmap[i]);
     }
 
-    for (i = 0; i < MAP_LENGTH; i++) {
-        if (map_str[i] == 'T')
-            status = get_nbobjs_by_type(HWLOC_OBJ_PU, &total_nbobjs[i], &nbobjs_per_parent[i]);
-        else if (map_str[i] == 'C')
-            status = get_nbobjs_by_type(HWLOC_OBJ_CORE, &total_nbobjs[i], &nbobjs_per_parent[i]);
-        else if (map_str[i] == 'S')
-            status = get_nbobjs_by_type(HWLOC_OBJ_SOCKET, &total_nbobjs[i], &nbobjs_per_parent[i]);
-        else if (map_str[i] == 'N')
-            status = get_nbobjs_by_type(HWLOC_OBJ_NODE, &total_nbobjs[i], &nbobjs_per_parent[i]);
-        else if (map_str[i] == 'B')
-            status = get_nbobjs_by_type(HWLOC_OBJ_MACHINE, &total_nbobjs[i], &nbobjs_per_parent[i]);
-        HYDU_ERR_POP(status, "unable to get number of objects\n");
-
-        nbpu_per_obj[i] = HYDT_topo_hwloc_info.num_bitmaps / total_nbobjs[i];
-        obj_idx[i] = 0;
-    }
-
+    /* do bindings */
     i = 0;
     while (i < HYDT_topo_hwloc_info.num_bitmaps) {
-        for (j = 0; j < bind_count; j++) {
-            for (idx = 0, k = 0; k < MAP_LENGTH; k++)
-                idx += (obj_idx[k] * nbpu_per_obj[k]);
-
-            obj = find_obj_containing_pu(bind_obj_type, idx++, cache_depth);
-            if (obj == NULL)
-                break;
-
-            hwloc_bitmap_or(HYDT_topo_hwloc_info.bitmap[i], HYDT_topo_hwloc_info.bitmap[i],
-                            obj->cpuset);
+        for (j = 0; j < total_map_domains; j++) {
+            bind_obj = hwloc_get_ancestor_obj_by_depth(topology, bind_depth, start_pu[j]);
+
+            for (k = 0; k < bind_count; k++) {
+                hwloc_bitmap_or(HYDT_topo_hwloc_info.bitmap[i], HYDT_topo_hwloc_info.bitmap[i],
+                                bind_obj->cpuset);
+
+                /* if the binding is smaller than the mapping domain, wrap around inside that domain */
+                if (num_pus_in_bind_domain < num_pus_in_map_domain) {
+                    bind_obj =
+                        hwloc_get_next_obj_inside_cpuset_by_depth(topology, map_domains[j],
+                                                                  bind_depth, bind_obj);
+                    if (!bind_obj)
+                        bind_obj =
+                            hwloc_get_next_obj_inside_cpuset_by_depth(topology, map_domains[j],
+                                                                      bind_depth, bind_obj);
+                }
+                else {
+                    bind_obj = hwloc_get_next_obj_by_depth(topology, bind_depth, bind_obj);
+                    if (!bind_obj)
+                        bind_obj = hwloc_get_next_obj_by_depth(topology, bind_depth, bind_obj);
+                }
 
-            obj_idx[0] += map_count;
-            balance_obj_idx(obj_idx, nbobjs_per_parent);
+            }
+            i++;
+
+            /* advance the starting position for this map domain, if needed */
+            if (num_pus_in_bind_domain < num_pus_in_map_domain) {
+                for (k = 0; k < num_pus_in_bind_domain; k++) {
+                    start_pu[j] = hwloc_get_next_obj_inside_cpuset_by_type(topology, map_domains[j],
+                                                                           HWLOC_OBJ_PU,
+                                                                           start_pu[j]);
+                    if (!start_pu[j])
+                        start_pu[j] =
+                            hwloc_get_next_obj_inside_cpuset_by_type(topology, map_domains[j],
+                                                                     HWLOC_OBJ_PU, start_pu[j]);
+                }
+            }
         }
-        i++;
     }
 
-    /* reset the number of bitmaps available to what we actually set */
-    HYDT_topo_hwloc_info.num_bitmaps = i;
+    /* free temporary memory */
+    HYDU_FREE(map_domains);
+    HYDU_FREE(start_pu);
 
   fn_exit:
     HYDU_FUNC_EXIT();
@@ -560,7 +451,8 @@ HYD_status HYDT_topo_hwloc_bind(int idx)
     if (!HYDT_topo_hwloc_info.user_binding || (idx < HYDT_topo_hwloc_info.num_bitmaps)) {
         id = idx % HYDT_topo_hwloc_info.num_bitmaps;
         hwloc_set_cpubind(topology, HYDT_topo_hwloc_info.bitmap[id], 0);
-        hwloc_set_membind(topology, HYDT_topo_hwloc_info.bitmap[id], HYDT_topo_hwloc_info.membind, 0);
+        hwloc_set_membind(topology, HYDT_topo_hwloc_info.bitmap[id], HYDT_topo_hwloc_info.membind,
+                          0);
     }
 
     HYDU_FUNC_EXIT();

http://git.mpich.org/mpich.git/commitdiff/15aec608d167138bbe92b977e8b3f00fd342395f

commit 15aec608d167138bbe92b977e8b3f00fd342395f
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Fri Jan 17 10:58:02 2014 -0600

    example program to display processor affinity
    
    Add a program in the hydra examples directory for use with the hydra
    binding/mapping options. This program will print out which CPUs it
    is allowed to run on according to the OS.
    
    Refs #1858
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/pm/hydra/examples/print_cpus_allowed.c b/src/pm/hydra/examples/print_cpus_allowed.c
new file mode 100644
index 0000000..4035b9b
--- /dev/null
+++ b/src/pm/hydra/examples/print_cpus_allowed.c
@@ -0,0 +1,31 @@
+#define PATH_MAX 1000
+
+#include "mpi.h"
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+    int rank, size, namelen;
+    FILE *fp;
+    char path[PATH_MAX];
+    char processor_name[MPI_MAX_PROCESSOR_NAME];
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    MPI_Get_processor_name(processor_name,&namelen);
+
+    fp = popen("grep Cpus_allowed_list /proc/$$/status", "r");
+
+    while (fgets(path, PATH_MAX, fp) != NULL) {
+      printf("%s[%d]: %s", processor_name, rank, path);
+    }
+
+    pclose(fp);
+
+    fflush(stdout);
+
+    MPI_Finalize();
+
+    return 0;
+}

http://git.mpich.org/mpich.git/commitdiff/bec02c2a6a4c050875502a77bd1ed3bd040c286e

commit bec02c2a6a4c050875502a77bd1ed3bd040c286e
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Thu Jan 2 23:36:05 2014 -0600

    simplify bitmap intialization in process binding
    
    Move the bitmap allocation out of init so we can correctly allocate
    what is needed for a user binding. Remove unnecessary duplicate code
    and use simpler hwloc provided functions where possible.
    
    Refs #1858
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/pm/hydra/tools/topo/hwloc/topo_hwloc.c b/src/pm/hydra/tools/topo/hwloc/topo_hwloc.c
index 38da519..4af224e 100644
--- a/src/pm/hydra/tools/topo/hwloc/topo_hwloc.c
+++ b/src/pm/hydra/tools/topo/hwloc/topo_hwloc.c
@@ -23,15 +23,7 @@ static HYD_status handle_user_binding(const char *binding)
 
     HYDU_FUNC_ENTER();
 
-    HYDT_topo_hwloc_info.num_bitmaps = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
-    HYDU_MALLOC(HYDT_topo_hwloc_info.bitmap, hwloc_bitmap_t *,
-                HYDT_topo_hwloc_info.num_bitmaps * sizeof(hwloc_bitmap_t), status);
-
-    /* Initialize all values to map to all CPUs */
-    for (i = 0; i < HYDT_topo_hwloc_info.num_bitmaps; i++) {
-        HYDT_topo_hwloc_info.bitmap[i] = hwloc_bitmap_alloc();
-        hwloc_bitmap_zero(HYDT_topo_hwloc_info.bitmap[i]);
-    }
+    HYDU_ASSERT(hwloc_initialized, status);
 
     num_bind_entries = 1;
     for (i = 0; binding[i]; i++)
@@ -68,16 +60,24 @@ static HYD_status handle_user_binding(const char *binding)
     }
     bind_entries[j][k++] = 0;
 
+    /* initialize bitmaps */
+    HYDU_MALLOC(HYDT_topo_hwloc_info.bitmap, hwloc_bitmap_t *,
+                num_bind_entries * sizeof(hwloc_bitmap_t), status);
 
     for (i = 0; i < num_bind_entries; i++) {
+        HYDT_topo_hwloc_info.bitmap[i] = hwloc_bitmap_alloc();
+        hwloc_bitmap_zero(HYDT_topo_hwloc_info.bitmap[i]);
         bindstr = strtok(bind_entries[i], "+");
         while (bindstr) {
             hwloc_bitmap_set(HYDT_topo_hwloc_info.bitmap[i],
-                             atoi(bindstr) % HYDT_topo_hwloc_info.num_bitmaps);
+                             atoi(bindstr));
             bindstr = strtok(NULL, "+");
         }
     }
 
+    HYDT_topo_hwloc_info.num_bitmaps = num_bind_entries;
+    HYDT_topo_hwloc_info.user_binding = 1;
+
   fn_exit:
     HYDU_FUNC_EXIT();
     return status;
@@ -93,7 +93,11 @@ static HYD_status handle_rr_binding(void)
 
     HYDU_FUNC_ENTER();
 
+    HYDU_ASSERT(hwloc_initialized, status);
+
+    /* initialize bitmaps */
     HYDT_topo_hwloc_info.num_bitmaps = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
+
     HYDU_MALLOC(HYDT_topo_hwloc_info.bitmap, hwloc_bitmap_t *,
                 HYDT_topo_hwloc_info.num_bitmaps * sizeof(hwloc_bitmap_t), status);
 
@@ -429,6 +433,16 @@ static HYD_status handle_bitmap_binding(const char *binding, const char *mapping
         }
     }
 
+    /* initialize bitmaps */
+    HYDT_topo_hwloc_info.num_bitmaps = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
+
+    HYDU_MALLOC(HYDT_topo_hwloc_info.bitmap, hwloc_bitmap_t *,
+                HYDT_topo_hwloc_info.num_bitmaps * sizeof(hwloc_bitmap_t), status);
+
+    for (i = 0; i < HYDT_topo_hwloc_info.num_bitmaps; i++) {
+        HYDT_topo_hwloc_info.bitmap[i] = hwloc_bitmap_alloc();
+        hwloc_bitmap_zero(HYDT_topo_hwloc_info.bitmap[i]);
+    }
 
     for (i = 0; i < MAP_LENGTH; i++) {
         if (map_str[i] == 'T')
@@ -479,7 +493,6 @@ static HYD_status handle_bitmap_binding(const char *binding, const char *mapping
 
 HYD_status HYDT_topo_hwloc_init(const char *binding, const char *mapping, const char *membind)
 {
-    int i;
     HYD_status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
@@ -491,20 +504,6 @@ HYD_status HYDT_topo_hwloc_init(const char *binding, const char *mapping, const
 
     hwloc_initialized = 1;
 
-
-    /* initialize bitmaps */
-    status = get_nbobjs_by_type(HWLOC_OBJ_PU, &HYDT_topo_hwloc_info.num_bitmaps, NULL);
-    HYDU_ERR_POP(status, "unable to get number of PUs\n");
-
-    HYDU_MALLOC(HYDT_topo_hwloc_info.bitmap, hwloc_bitmap_t *,
-                HYDT_topo_hwloc_info.num_bitmaps * sizeof(hwloc_bitmap_t), status);
-
-    for (i = 0; i < HYDT_topo_hwloc_info.num_bitmaps; i++) {
-        HYDT_topo_hwloc_info.bitmap[i] = hwloc_bitmap_alloc();
-        hwloc_bitmap_zero(HYDT_topo_hwloc_info.bitmap[i]);
-    }
-
-
     /* bindings that don't require mapping */
     if (!strncmp(binding, "user:", strlen("user:"))) {
         status = handle_user_binding(binding + strlen("user:"));
@@ -552,13 +551,17 @@ HYD_status HYDT_topo_hwloc_init(const char *binding, const char *mapping, const
 
 HYD_status HYDT_topo_hwloc_bind(int idx)
 {
-    int id = idx % HYDT_topo_hwloc_info.num_bitmaps;
+    int id;
     HYD_status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
 
-    hwloc_set_cpubind(topology, HYDT_topo_hwloc_info.bitmap[id], 0);
-    hwloc_set_membind(topology, HYDT_topo_hwloc_info.bitmap[id], HYDT_topo_hwloc_info.membind, 0);
+    /* For processes where the user did not specify a binding unit, no binding is needed. */
+    if (!HYDT_topo_hwloc_info.user_binding || (idx < HYDT_topo_hwloc_info.num_bitmaps)) {
+        id = idx % HYDT_topo_hwloc_info.num_bitmaps;
+        hwloc_set_cpubind(topology, HYDT_topo_hwloc_info.bitmap[id], 0);
+        hwloc_set_membind(topology, HYDT_topo_hwloc_info.bitmap[id], HYDT_topo_hwloc_info.membind, 0);
+    }
 
     HYDU_FUNC_EXIT();
     return status;
diff --git a/src/pm/hydra/tools/topo/hwloc/topo_hwloc.h b/src/pm/hydra/tools/topo/hwloc/topo_hwloc.h
index 6d3b77f..5147ad9 100644
--- a/src/pm/hydra/tools/topo/hwloc/topo_hwloc.h
+++ b/src/pm/hydra/tools/topo/hwloc/topo_hwloc.h
@@ -18,6 +18,7 @@ struct HYDT_topo_hwloc_info {
     int num_bitmaps;
     hwloc_bitmap_t *bitmap;
     hwloc_membind_policy_t membind;
+    int user_binding;
 };
 extern struct HYDT_topo_hwloc_info HYDT_topo_hwloc_info;
 

-----------------------------------------------------------------------

Summary of changes:
 src/pm/hydra/examples/binding.c            |    3 +-
 src/pm/hydra/examples/print_cpus_allowed.c |   31 ++
 src/pm/hydra/tools/topo/hwloc/topo_hwloc.c |  515 ++++++++++++----------------
 src/pm/hydra/tools/topo/hwloc/topo_hwloc.h |    2 +
 src/pm/hydra/tools/topo/topo.c             |    2 +
 src/pm/hydra/tools/topo/topo.h             |    2 +
 src/pm/hydra/ui/mpich/utils.c              |    2 -
 7 files changed, 252 insertions(+), 305 deletions(-)
 create mode 100644 src/pm/hydra/examples/print_cpus_allowed.c


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list