[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1-282-g33aacdb

Service Account noreply at mpich.org
Tue May 27 15:13:31 CDT 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  33aacdb8fe417c3279b3ee5508e5ed93bdd93f22 (commit)
       via  2b401aaf6188830db802bd9432028ca3526a699c (commit)
       via  2ff6d4931312b4e17fe1c813ef6b2450732be88d (commit)
       via  3018d28c8e320e8432ca45b135ffa98ebce35040 (commit)
       via  c892ac58e4ac7645429030d02cff648dcbd7f6d2 (commit)
      from  3d3a4b3a1038f7809ffdba5a7ebe1e02a7325daf (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/33aacdb8fe417c3279b3ee5508e5ed93bdd93f22

commit 33aacdb8fe417c3279b3ee5508e5ed93bdd93f22
Author: Su Huang <suhuang at us.ibm.com>
Date:   Tue May 27 07:58:50 2014 -0400

    pamid: mutiple fixes for RMA shared window
    
    The following fixes applied to PAMID for MPI_Win_allocate_shared:
    1) include  fcntl.h in mpid_win_allocate_shared.c
    1) for one task window, needs to allocate space for mutex_lock and initialize it.
       the address of allocated memory is stored in win->mpid.shm->base_addr
    2) in MPID_Win_fetch_and_op(),  the base address for target_rank is changed
       from base = win->mpid.shm->base_addr;
       to   base = win->mpid.info[target_rank].base_addr;
    3) MPID_Win_free(): to free shared window for one task window, change from
       MPIU_Free((*win_ptr)->base);
       to
       MPIU_Free((*win_ptr)->mpid.shm->base_addr);
    4) MPID_Abort() requires arguments.
       MPID_Abort(NULL, MPI_ERR_RMA_SHARED, -1, "MPI_Win_free error");
    
    Signed-off-by: Michael Blocksome <blocksom at us.ibm.com>

diff --git a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
index 161d0d8..c9a93fe 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
@@ -23,7 +23,7 @@
 #include <sys/shm.h>
 #include <sys/ipc.h>
 #include <sys/stat.h>
-
+#include <fcntl.h>
 #include <sys/mman.h>
 
 #undef FUNCNAME 
@@ -40,7 +40,6 @@ extern int mpidi_dynamic_tasking;
 #define ALIGN_BOUNDARY 128     /* Align data structures to cache line */
 #define PAD_SIZE(s) (ALIGN_BOUNDARY - (sizeof(s) & (ALIGN_BOUNDARY-1)))
 
-
 int CheckRankOnNode(MPID_Comm  * comm_ptr,int *onNode ) {
     int comm_size, i;
     int mpi_errno = PAMI_SUCCESS;
@@ -318,6 +317,7 @@ MPID_getSharedSegment(MPI_Aint     size,
     MPI_Aint pageSize,pageSize2, len,new_size;
     MPID_Win  *win;
     int    padSize;
+    void   *base_pp;
 
     win =  *win_ptr;
     comm_size = win->comm_ptr->local_size;
@@ -332,26 +332,39 @@ MPID_getSharedSegment(MPI_Aint     size,
     if (comm_size == 1) {
         /* Do not use shared memory when there is only one rank on the node */
 
+        /* 'size' must not be < 0 */
+        MPIU_ERR_CHKANDSTMT(size < 0 , mpi_errno, MPI_ERR_SIZE,return mpi_errno, "**rmasize");
+
+        /* The beginning of the heap allocation contains a control block
+         * before the data begins.
+         */
+        new_size = MPIDI_ROUND_UP_PAGESIZE(sizeof(MPIDI_Win_shm_ctrl_t),pageSize);
+
         if (size > 0) {
             if (*noncontig)
-                new_size = MPIDI_ROUND_UP_PAGESIZE(size,pageSize);
+                new_size += MPIDI_ROUND_UP_PAGESIZE(size,pageSize);
             else
-                new_size = size;
-
-            win->base = MPIU_Malloc(new_size);
-            MPIU_ERR_CHKANDJUMP((win->base == NULL), mpi_errno, MPI_ERR_BUFFER, "**bufnull");
-
-        } else if (size == 0) {
-            win->base = NULL;
-
-        } else {
-            /* 'size' must be >= 0 */
-            MPIU_ERR_CHKANDSTMT(size >=0 , mpi_errno, MPI_ERR_SIZE,return mpi_errno, "**rmasize");
+                new_size += size;
         }
 
+        base_pp = MPIU_Malloc(new_size);
+        MPID_assert(base_pp !=NULL);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
         win->mpid.shm->segment_len = new_size;
+        win->mpid.shm->base_addr = base_pp;
+        if (size !=0) {
+            win->mpid.info[rank].base_addr = (void *)((MPI_Aint) base_pp + MPIDI_ROUND_UP_PAGESIZE(sizeof(MPIDI_Win_shm_ctrl_t),pageSize));
+        } else {
+            win->mpid.info[rank].base_addr = NULL;
+        }
+        win->base = win->mpid.info[rank].base_addr;
+
+        /* set mutex_lock address and initialize it   */
+        win->mpid.shm->mutex_lock = (pthread_mutex_t *) win->mpid.shm->base_addr;
+        win->mpid.shm->shm_count = (int *)((MPI_Aint) win->mpid.shm->mutex_lock + (MPI_Aint) sizeof(pthread_mutex_t));
+        MPIDI_SHM_MUTEX_INIT(win);
+        OPA_fetch_and_add_int((OPA_int_t *) win->mpid.shm->shm_count,1);
 
     } else {
         /* allocate a temporary buffer to gather the 'size' of each buffer on
@@ -392,7 +405,7 @@ MPID_getSharedSegment(MPI_Aint     size,
 #elif  USE_MMAP_SHM
         win->mpid.shm->base_addr = MPID_getSharedSegment_mmap(win);
 #else
-        MPID_Abort();
+        MPID_Abort(NULL, MPI_ERR_RMA_SHARED, -1, "RMA shared segment error");
 #endif
 
         /* increment the shared counter */
diff --git a/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c b/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
index 4a6e831..b1a415c 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
@@ -300,7 +300,7 @@ int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
         if (win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
             MPIDI_SHM_MUTEX_LOCK(win);
             shm_locked = 1;
-            base = win->mpid.shm->base_addr;
+            base = win->mpid.info[target_rank].base_addr;
             disp_unit = win->disp_unit;
 
         }
diff --git a/src/mpid/pamid/src/onesided/mpid_win_free.c b/src/mpid/pamid/src/onesided/mpid_win_free.c
index c31ecf0..e926448 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_free.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_free.c
@@ -49,10 +49,10 @@ int MPIDI_SHM_Win_free(MPID_Win **win_ptr)
     munmap ((*win_ptr)->mpid.shm->base_addr, (*win_ptr)->mpid.shm->segment_len);
     if (0 == (*win_ptr)->comm_ptr->rank) shm_unlink ((*win_ptr)->mpid.shm->shm_key);
 #else
-    MPID_Abort();
+    MPID_Abort(NULL, MPI_ERR_RMA_SHARED, -1, "MPI_Win_free error");
 #endif
   } else {/* one task on a node */
-    MPIU_Free((*win_ptr)->base);
+    MPIU_Free((*win_ptr)->mpid.shm->base_addr);
   }
   MPIU_Free((*win_ptr)->mpid.shm);
   (*win_ptr)->mpid.shm = NULL;
diff --git a/src/mpid/pamid/src/onesided/mpidi_onesided.h b/src/mpid/pamid/src/onesided/mpidi_onesided.h
index 09af8b7..214926a 100644
--- a/src/mpid/pamid/src/onesided/mpidi_onesided.h
+++ b/src/mpid/pamid/src/onesided/mpidi_onesided.h
@@ -37,14 +37,13 @@ pami_rmw_t   zero_rmw_parms;
 #define MPIDI_QUICKSLEEP     usleep(1);
 #define MAX_NUM_CTRLSEND  1024          /* no more than 1024 outstanding control sends */
 
-
 #define MPIDI_SHM_MUTEX_LOCK(win)                                                       \
     do {                                                                                \
         pthread_mutex_t *shm_mutex = win->mpid.shm->mutex_lock;                         \
         int rval = pthread_mutex_lock(shm_mutex);                                       \
         MPIU_ERR_CHKANDJUMP1(rval, mpi_errno, MPI_ERR_OTHER, "**pthread_lock",          \
                              "**pthread_lock %s", strerror(rval));                      \
-    } while (0)
+    } while (0);
 
 #define MPIDI_SHM_MUTEX_UNLOCK(win)                                                     \
     do {                                                                                \
@@ -52,7 +51,7 @@ pami_rmw_t   zero_rmw_parms;
         int rval = pthread_mutex_unlock(shm_mutex);                                     \
         MPIU_ERR_CHKANDJUMP1(rval, mpi_errno, MPI_ERR_OTHER, "**pthread_unlock",        \
                              "**pthread_unlock %s", strerror(rval));                    \
-    } while (0)
+    } while (0);
 
 #define MPIDI_SHM_MUTEX_INIT(win)                                                       \
     do {                                                                                \

http://git.mpich.org/mpich.git/commitdiff/2b401aaf6188830db802bd9432028ca3526a699c

commit 2b401aaf6188830db802bd9432028ca3526a699c
Author: Michael Blocksome <blocksom at us.ibm.com>
Date:   Fri May 16 15:20:37 2014 -0500

    pamid: add a posix (mmap) shared memory impl. for bgq

diff --git a/src/mpid/pamid/include/mpidi_datatypes.h b/src/mpid/pamid/include/mpidi_datatypes.h
index 26da181..73cbb3a 100644
--- a/src/mpid/pamid/include/mpidi_datatypes.h
+++ b/src/mpid/pamid/include/mpidi_datatypes.h
@@ -457,7 +457,11 @@ typedef struct MPIDI_Win_shm_t
                                                  region associated with it */
     void *base_addr;                /* base address of shared memory region */
     MPI_Aint segment_len;           /* size of shared memory region         */
-    uint32_t  shm_id;                /* shared memory id                    */
+    union
+    {
+      uint32_t shm_id;                /* shared memory id - sysv            */
+      char     shm_key[64];         /* shared memory key - posix            */
+    };
     int       *shm_count;
     MPIDI_SHM_MUTEX *mutex_lock;    /* shared memory windows -- lock for    */
                                      /*     accumulate/atomic operations     */
diff --git a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
index 10fba99..161d0d8 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
@@ -24,6 +24,8 @@
 #include <sys/ipc.h>
 #include <sys/stat.h>
 
+#include <sys/mman.h>
+
 #undef FUNCNAME 
 #define FUNCNAME MPID_Win_allocate_shared
 #undef FCNAME
@@ -154,6 +156,57 @@ int GetPageSize(void *addr, ulong *pageSize)
 }
 
 void *
+MPID_getSharedSegment_mmap(MPID_Win * win)
+{
+  void * base_addr;
+  int rank, rc, fd;
+  int mpi_errno = MPI_SUCCESS;
+  int errflag = FALSE;
+  int first = 0;
+
+  snprintf (win->mpid.shm->shm_key, 63, "/mpich/comm-%d/win_shared", win->comm_ptr->context_id);
+  rc = shm_open (win->mpid.shm->shm_key, O_RDWR | O_CREAT | O_EXCL, 0600);
+  if (0 == rc)
+  {
+    first = 1;
+  } else {
+    rc = shm_open (win->mpid.shm->shm_key, O_RDWR, 0);
+    MPIU_ERR_CHKANDJUMP((rc == -1), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
+  }
+
+  fd = rc;
+  rc = ftruncate (fd, win->mpid.shm->segment_len);
+  MPIU_ERR_CHKANDJUMP((rc == -1), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
+
+
+  base_addr = mmap (NULL, win->mpid.shm->segment_len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+  if (base_addr == NULL || base_addr == MAP_FAILED || base_addr == (void *) -1) { /* error */
+    if (0 == rank) shm_unlink (win->mpid.shm->shm_key);
+    MPIU_ERR_CHKANDJUMP((win->mpid.shm->shm_id == -1), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
+  }
+
+  close (fd); /* no longer needed */
+
+  /* set mutex_lock address and initialize it   */
+  win->mpid.shm->mutex_lock = (MPIDI_SHM_MUTEX *) base_addr;
+  if (1 == first) {
+    MPIDI_SHM_MUTEX_INIT(win);
+  }
+
+  mpi_errno = MPIR_Barrier_impl(win->comm_ptr, &errflag);
+  MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
+  win->mpid.shm->allocated = 1;
+
+fn_exit:
+    return base_addr;
+    /* --BEGIN ERROR HANDLING-- */
+fn_fail:
+    goto fn_exit;
+    /* --END ERROR HANDLING-- */
+}
+
+void *
 MPID_getSharedSegment_sysv(MPID_Win * win)
 {
     int mpi_errno = MPI_SUCCESS;
@@ -335,8 +388,9 @@ MPID_getSharedSegment(MPI_Aint     size,
          * data buffer - possibly padded if non-contiguous.
          */
 #ifdef USE_SYSV_SHM
-        win->mpid.shm->base_addr =
-          MPID_getSharedSegment_sysv(win);
+        win->mpid.shm->base_addr = MPID_getSharedSegment_sysv(win);
+#elif  USE_MMAP_SHM
+        win->mpid.shm->base_addr = MPID_getSharedSegment_mmap(win);
 #else
         MPID_Abort();
 #endif
@@ -435,40 +489,6 @@ MPID_Win_allocate_shared(MPI_Aint     size,
   win->mpid.info[rank].win = win;
   win->mpid.info[rank].disp_unit = disp_unit;
 
-#ifdef __BGQ__
-  /* verify BG_MAPCOMMONHEAP=1 env. variable is set */
-  if (rank == 0) {
-    assert(NULL!=getenv("BG_MAPCOMMONHEAP"));
-    baseP = MPIU_Malloc(size+sizeof(pthread_mutex_t));
-#ifdef MPIDI_NO_ASSERT
-    MPIU_ERR_CHKANDJUMP((baseP == NULL), mpi_errno, MPI_ERR_BUFFER, "**bufnull");
-#else
-    MPID_assert(baseP != NULL);
-#endif
-
-    pthread_mutex_t *mutex = (pthread_mutex_t *)(((uintptr_t) baseP) + size);
-    pthread_mutexattr_t attr;
-    pthread_mutexattr_init(&attr);
-    pthread_mutex_init(mutex, &attr);
-  }
-
-  int errflag = 0;
-  mpi_errno = MPIR_Bcast_impl(&baseP, sizeof(char*), MPI_BYTE, 0,
-                              win->comm_ptr, &errflag);
-
-  win->mpid.shm->mutex_lock = (pthread_mutex_t *)(((uintptr_t) baseP) + size);
-  win->mpid.shm->allocated = 1;
-  win->mpid.shm->base_addr = baseP;
-
-  win->base = baseP;
-  win->mpid.info[rank].base_addr = baseP;
-
-  mpi_errno = MPIDI_Win_allgather(size,win_ptr);
-  if (mpi_errno != MPI_SUCCESS) {
-    MPIU_Free(win->mpid.shm);
-    return mpi_errno;
-  }
-#else
   mpi_errno=CheckSpaceType(win_ptr,info,&noncontig);
   comm_size = (*win_ptr)->comm_ptr->local_size;
   MPID_getSharedSegment(size, disp_unit,comm_ptr, win_ptr, &pageSize, &noncontig);
@@ -476,6 +496,7 @@ MPID_Win_allocate_shared(MPI_Aint     size,
   mpi_errno = MPIDI_Win_allgather(size,win_ptr);
   if (mpi_errno != MPI_SUCCESS)
       return mpi_errno;
+
   win->mpid.info[0].base_addr = win->base;
   if (comm_size > 1) {
      char *cur_base = (*win_ptr)->base;
@@ -492,7 +513,6 @@ MPID_Win_allocate_shared(MPI_Aint     size,
            }
       }
   }
-#endif
 
   *(void**) base_ptr = (void *) win->mpid.info[rank].base_addr;
 
diff --git a/src/mpid/pamid/src/onesided/mpid_win_free.c b/src/mpid/pamid/src/onesided/mpid_win_free.c
index b09fe6c..c31ecf0 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_free.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_free.c
@@ -31,15 +31,6 @@ int MPIDI_SHM_Win_free(MPID_Win **win_ptr)
   int    rc;
   int mpi_errno = MPI_SUCCESS;
 
-#ifdef __BGQ__
-  if ((*win_ptr)->comm_ptr->rank == 0) {
-    MPIDI_SHM_MUTEX_DESTROY(*win_ptr);
-    MPIU_Free ((*win_ptr)->base);
-  }
-  (*win_ptr)->mpid.shm = NULL;
-#endif
-
-#ifdef __PE__
   /* Free shared memory region */
   /* free shm_base_addrs that's only used for shared memory windows */
   if ((*win_ptr)->mpid.shm->allocated) {
@@ -48,17 +39,23 @@ int MPIDI_SHM_Win_free(MPID_Win **win_ptr)
     if ((*win_ptr)->comm_ptr->rank == 0) {
       MPIDI_SHM_MUTEX_DESTROY(*win_ptr);
       }
+#ifdef USE_SYSV_SHM
     mpi_errno = shmdt((*win_ptr)->mpid.shm->base_addr);
     if ((*win_ptr)->comm_ptr->rank == 0) {
 	rc=shmctl((*win_ptr)->mpid.shm->shm_id,IPC_RMID,NULL);
 	MPIU_ERR_CHKANDJUMP((rc == -1), errno,MPI_ERR_RMA_SHARED, "**shmctl");
     }
+#elif USE_MMAP_SHM
+    munmap ((*win_ptr)->mpid.shm->base_addr, (*win_ptr)->mpid.shm->segment_len);
+    if (0 == (*win_ptr)->comm_ptr->rank) shm_unlink ((*win_ptr)->mpid.shm->shm_key);
+#else
+    MPID_Abort();
+#endif
   } else {/* one task on a node */
     MPIU_Free((*win_ptr)->base);
   }
   MPIU_Free((*win_ptr)->mpid.shm);
   (*win_ptr)->mpid.shm = NULL;
-#endif
 
  fn_fail:
   return mpi_errno;
diff --git a/src/mpid/pamid/subconfigure.m4 b/src/mpid/pamid/subconfigure.m4
index 8b196fa..c1992b1 100644
--- a/src/mpid/pamid/subconfigure.m4
+++ b/src/mpid/pamid/subconfigure.m4
@@ -56,6 +56,7 @@ if test "${pamid_platform}" = "PE" ; then
   with_shared_memory=sysv
         PM_REQUIRES_PMI=pmi2/poe
 elif test "${pamid_platform}" = "BGQ" ; then
+  with_shared_memory=mmap
   MPID_DEFAULT_CROSS_FILE=${master_top_srcdir}/src/mpid/pamid/cross/bgq8
   MPID_DEFAULT_PM=no
 fi

http://git.mpich.org/mpich.git/commitdiff/2ff6d4931312b4e17fe1c813ef6b2450732be88d

commit 2ff6d4931312b4e17fe1c813ef6b2450732be88d
Author: Michael Blocksome <blocksom at us.ibm.com>
Date:   Fri May 16 15:26:38 2014 -0500

    pamid: move pe-specific code to sysv function

diff --git a/src/mpid/pamid/include/mpidi_datatypes.h b/src/mpid/pamid/include/mpidi_datatypes.h
index b773ce7..26da181 100644
--- a/src/mpid/pamid/include/mpidi_datatypes.h
+++ b/src/mpid/pamid/include/mpidi_datatypes.h
@@ -36,6 +36,8 @@
 #include "pami.h"
 #include "mpidi_trace.h"
 
+#include "opa_primitives.h"
+
 #if (MPIU_HANDLE_ALLOCATION_METHOD == MPIU_HANDLE_ALLOCATION_THREAD_LOCAL) && defined(__BGQ__)
 struct MPID_Request;
 typedef struct
@@ -443,6 +445,12 @@ typedef struct MPIDI_Win_info
 
 typedef pthread_mutex_t MPIDI_SHM_MUTEX;
 
+typedef struct MPIDI_Win_shm_ctrl_t
+{
+  MPIDI_SHM_MUTEX mutex_lock;
+  OPA_int_t       active;
+} MPIDI_Win_shm_ctrl_t;
+
 typedef struct MPIDI_Win_shm_t
 {
     int allocated;                  /* flag: TRUE iff this window has a shared memory
diff --git a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
index 1ecd9b5..10fba99 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
@@ -153,168 +153,206 @@ int GetPageSize(void *addr, ulong *pageSize)
   return 0;
 }
 
+void *
+MPID_getSharedSegment_sysv(MPID_Win * win)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int errflag = FALSE;
+    uint32_t shm_key;
+    int rank;
+    char *cp;
+    int shm_flag = IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR;
+    void * base_addr;
+
+    shm_key = (uint32_t) -1;
+
+    rank = win->comm_ptr->rank;
+
+    if (rank == 0) {
+#ifdef DYNAMIC_TASKING
+        /* generate an appropriate key */
+        if (!mpidi_dynamic_tasking) {
+            cp = getenv("MP_I_PMD_PID");
+            if (cp) {
+                shm_key = atoi(cp);
+                shm_key = shm_key & 0x07ffffff;
+                shm_key = shm_key | 0x80000000;
+            } else {
+                cp = getenv("MP_PARTITION");
+                if (cp ) {
+                    shm_key = atol(cp);
+                    shm_key = (shm_key << 16) + SHM_KEY_TAIL;
+                } else {
+                    TRACE_ERR("ERROR MP_PARTITION not set \n");
+                }
+            }
+        } else {
+            cp = getenv("MP_I_KEY_RANGE");
+            if (cp) {
+                sscanf(cp, "0x%x", &shm_key);
+                shm_key = shm_key | 0x80;
+            } else {
+                TRACE_ERR("ERROR MP_I_KEY_RANGE not set \n");
+            }
+        }
+#else
+        cp = getenv("MP_I_PMD_PID");
+        if (cp) {
+            shm_key = atoi(cp);
+            shm_key = shm_key & 0x07ffffff;
+            shm_key = shm_key | 0x80000000;
+        } else {
+            cp = getenv("MP_PARTITION");
+            if (cp ) {
+                shm_key = atol(cp);
+                shm_key = (shm_key << 16);
+#ifdef SHMCC_KEY_TAIL
+                shm_key += SHMCC_KEY_TAIL;
+#endif
+            } else {
+                TRACE_ERR("ERROR MP_PARTITION not set \n");
+            }
+        }
+#endif
+
+        MPID_assert(shm_key != -1);
+
+        win->mpid.shm->shm_id = shmget(shm_key, win->mpid.shm->segment_len, shm_flag);
+        MPIU_ERR_CHKANDJUMP((win->mpid.shm->shm_id == -1), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
+
+        base_addr = (void *) shmat(win->mpid.shm->shm_id,0,0);
+        MPIU_ERR_CHKANDJUMP((base_addr == (void*) -1), mpi_errno,MPI_ERR_BUFFER, "**bufnull");
+
+        /* set mutex_lock address and initialize it */
+        win->mpid.shm->mutex_lock = (MPIDI_SHM_MUTEX *) base_addr;
+        MPIDI_SHM_MUTEX_INIT(win);
+
+        /* successfully created shm segment - shared the key with other tasks */
+        mpi_errno = MPIR_Bcast_impl((void *) &shm_key, sizeof(int), MPI_CHAR, 0, win->comm_ptr, &errflag);
+
+    } else { /* task other than task 0  */
+        mpi_errno = MPIR_Bcast_impl((void *) &shm_key,  sizeof(int), MPI_CHAR, 0, win->comm_ptr, &errflag);
+        MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+
+        win->mpid.shm->shm_id = shmget(shm_key, 0, 0);
+        if (win->mpid.shm->shm_id != -1) { /* shm segment is available */
+            base_addr = (void *) shmat(win->mpid.shm->shm_id,0,0);
+        }
+        win->mpid.shm->mutex_lock = (MPIDI_SHM_MUTEX *) base_addr;
+    }
+
+    win->mpid.shm->allocated = 1;
+
+fn_exit:
+    return base_addr;
+    /* --BEGIN ERROR HANDLING-- */
+fn_fail:
+    goto fn_exit;
+    /* --END ERROR HANDLING-- */
+}
+
 int
-MPID_getSharedSegment(MPI_Aint        size,
-                         int          disp_unit,
-                         MPID_Comm  * comm_ptr,
-                         void       **base_ptr,
-                         MPID_Win   **win_ptr,
-                         MPI_Aint      *pSize,
-                         int        *noncontig)
+MPID_getSharedSegment(MPI_Aint     size,
+                      int          disp_unit,
+                      MPID_Comm  * comm_ptr,
+                      MPID_Win   **win_ptr,
+                      MPI_Aint   * pSize,
+                      int        * noncontig)
 {
     int mpi_errno = MPI_SUCCESS;
-    void **base_pp = base_ptr;
     int i, comm_size, rank;
-    uint32_t shm_key; 
-    int  shm_id;
-    MPI_Aint *tmp_buf;
     int errflag = FALSE;
     MPI_Aint pageSize,pageSize2, len,new_size;
-    char *cp;
     MPID_Win  *win;
     int    padSize;
-    int shm_flag = IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR;
 
     win =  *win_ptr;
     comm_size = win->comm_ptr->local_size;
     rank = win->comm_ptr->rank;
-    tmp_buf = MPIU_Malloc( 2*comm_size*sizeof(MPI_Aint));
 
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     GetPageSize((void *) win_ptr, (ulong *) &pageSize);
     *pSize = pageSize;
     win->mpid.shm->segment_len = 0;
+
     if (comm_size == 1) {
-         if (size > 0) {
-             if (*noncontig) 
-                 new_size = MPIDI_ROUND_UP_PAGESIZE(size,pageSize);
-             else 
-                 new_size = size;
-             *base_pp = MPIU_Malloc(new_size);
-             #ifndef MPIDI_NO_ASSERT
-                     MPID_assert(*base_pp != NULL);
-             #else
-              MPIU_ERR_CHKANDJUMP((*base_pp == NULL), mpi_errno, MPI_ERR_BUFFER, "**bufnull");
-             #endif
-         } else if (size == 0) {
-                   *base_pp = NULL;
-         } else {
-               MPIU_ERR_CHKANDSTMT(size >=0 , mpi_errno, MPI_ERR_SIZE,return mpi_errno, "**rmasize");
-         }
+        /* Do not use shared memory when there is only one rank on the node */
+
+        if (size > 0) {
+            if (*noncontig)
+                new_size = MPIDI_ROUND_UP_PAGESIZE(size,pageSize);
+            else
+                new_size = size;
+
+            win->base = MPIU_Malloc(new_size);
+            MPIU_ERR_CHKANDJUMP((win->base == NULL), mpi_errno, MPI_ERR_BUFFER, "**bufnull");
+
+        } else if (size == 0) {
+            win->base = NULL;
+
+        } else {
+            /* 'size' must be >= 0 */
+            MPIU_ERR_CHKANDSTMT(size >=0 , mpi_errno, MPI_ERR_SIZE,return mpi_errno, "**rmasize");
+        }
+
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-         win->mpid.shm->segment_len = new_size;
-         win->mpid.info[rank].base_addr = *base_pp;
-         win->base = *base_pp;
-     } else {
-         tmp_buf[rank]   = (MPI_Aint) size;
-         mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
-                                         tmp_buf, 1 * sizeof(MPI_Aint), MPI_BYTE,
-                                         (*win_ptr)->comm_ptr, &errflag);
-         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
-         /* calculate total number of bytes needed */
-         for (i = 0; i < comm_size; ++i) {
-             len = tmp_buf[i];
-             if (*noncontig)
+
+        win->mpid.shm->segment_len = new_size;
+
+    } else {
+        /* allocate a temporary buffer to gather the 'size' of each buffer on
+         * the node to determine the amount of shared memory to allocate
+         */
+        MPI_Aint *tmp_buf;
+        tmp_buf = MPIU_Malloc (2*comm_size*sizeof(MPI_Aint));
+        tmp_buf[rank] = (MPI_Aint) size;
+        mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
+                                        tmp_buf, 1 * sizeof(MPI_Aint), MPI_BYTE,
+                                        (*win_ptr)->comm_ptr, &errflag);
+        if (mpi_errno) {
+            MPIU_Free(tmp_buf);
+            MPIU_ERR_POP(mpi_errno);
+        }
+
+        /* calculate total number of bytes needed */
+        for (i = 0; i < comm_size; ++i) {
+            len = tmp_buf[i];
+            if (*noncontig)
                 /* Round up to next page size */
-                 win->mpid.shm->segment_len += MPIDI_ROUND_UP_PAGESIZE(len,pageSize);
-             else
-                 win->mpid.shm->segment_len += len;
-          }
-          len = len + 128; /* needed for mutex_lock etc */
-          /* get shared segment   */
-
-          shm_key=-1;
-          if (rank == 0) {
-             #ifdef DYNAMIC_TASKING
-             /* generate an appropriate key */
-             if (!mpidi_dynamic_tasking) {
-                cp = getenv("MP_I_PMD_PID");
-                if (cp) {
-                    shm_key = atoi(cp);
-                    shm_key = shm_key & 0x07ffffff;
-                    shm_key = shm_key | 0x80000000;
-                 } else {
-                    cp = getenv("MP_PARTITION");
-                    if (cp ) {
-                       shm_key = atol(cp);
-                       shm_key = (shm_key << 16) + SHM_KEY_TAIL;
-                    } else {
-                       TRACE_ERR("ERROR MP_PARTITION not set \n"); 
-                    }
-                  }
-              } else {
-                cp = getenv("MP_I_KEY_RANGE");
-                if (cp) {
-                    sscanf(cp, "0x%x", &shm_key);
-                    shm_key = shm_key | 0x80;
-                } else {
-                    TRACE_ERR("ERROR MP_I_KEY_RANGE not set \n"); 
-                }
-               }
-              #else 
-              cp = getenv("MP_I_PMD_PID");
-              if (cp) {
-                  shm_key = atoi(cp);
-                  shm_key = shm_key & 0x07ffffff;
-                  shm_key = shm_key | 0x80000000;
-              } else {
-                  cp = getenv("MP_PARTITION");
-                  if (cp ) {
-                      shm_key = atol(cp);
-#ifdef __PE__
-                      shm_key = (shm_key << 16) + SHMCC_KEY_TAIL;
+                win->mpid.shm->segment_len += MPIDI_ROUND_UP_PAGESIZE(len,pageSize);
+            else
+                win->mpid.shm->segment_len += len;
+        }
+        MPIU_Free(tmp_buf);
+
+        /* The beginning of the shared memory allocation contains a control
+         * block before the data begins.
+         */
+        win->mpid.shm->segment_len += MPIDI_ROUND_UP_PAGESIZE(sizeof(MPIDI_Win_shm_ctrl_t),pageSize);
+
+        /* Get the shared segment which includes the control block header and
+         * data buffer - possibly padded if non-contiguous.
+         */
+#ifdef USE_SYSV_SHM
+        win->mpid.shm->base_addr =
+          MPID_getSharedSegment_sysv(win);
 #else
-                      shm_key = (shm_key << 16);
+        MPID_Abort();
 #endif
-                  } else {
-                      TRACE_ERR("ERROR MP_PARTITION not set \n"); 
-                  }
-               }
-              #endif
-              MPID_assert(shm_key != -1);
-              shm_id = shmget(shm_key, win->mpid.shm->segment_len, shm_flag);
-              MPIU_ERR_CHKANDJUMP((shm_id == -1), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
-              win->mpid.shm->base_addr = (void *) shmat(shm_id,0,0);
-              MPIU_ERR_CHKANDJUMP((win->mpid.shm->base_addr == NULL), mpi_errno,MPI_ERR_BUFFER, "**bufnull");
-              GetPageSize((void *) win->mpid.shm->base_addr, (ulong*)&pageSize2);
-              MPID_assert(pageSize == pageSize2);
-              /* set mutex_lock address and initialize it   */
-              win->mpid.shm->mutex_lock = (pthread_mutex_t *) win->mpid.shm->base_addr;
-              win->mpid.shm->shm_count=(int *)((MPI_Aint) win->mpid.shm->mutex_lock + (MPI_Aint) sizeof(pthread_mutex_t));
-              MPIDI_SHM_MUTEX_INIT(win);
-              win->mpid.shm->allocated = 1;
-              /* successfully created shm segment */
-               mpi_errno = MPIR_Bcast_impl((void *) &shm_key, sizeof(int), MPI_CHAR, 0, comm_ptr, &errflag);
-             } else { /* task other than task 0  */
-               mpi_errno = MPIR_Bcast_impl((void *) &shm_key,  sizeof(int), MPI_CHAR, 0, comm_ptr, &errflag);
-               MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
-               MPID_assert(shm_key != -1);
-               shm_id = shmget(shm_key, 0, 0);
-               if (shm_id != -1) { /* shm segment is available */
-                   win->mpid.shm->base_addr = (void *) shmat(shm_id,0,0);
-                   win->mpid.shm->allocated = 1;
-                   MPIU_ERR_CHKANDJUMP((win->mpid.shm->base_addr == (void *) -1), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
-               } else { /* node leader failed, no need to try here */
-                  MPIU_ERR_CHKANDJUMP((shm_id == -1), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
-               }
-               win->mpid.shm->mutex_lock = (pthread_mutex_t *) win->mpid.shm->base_addr;
-               win->mpid.shm->shm_count=(int *)((MPI_Aint) win->mpid.shm->mutex_lock + (MPI_Aint) sizeof(pthread_mutex_t));
-              }
-         win->mpid.shm->shm_id = shm_id;
-         OPA_fetch_and_add_int((OPA_int_t *) win->mpid.shm->shm_count,1);
-         while(*win->mpid.shm->shm_count != comm_size) MPIDI_QUICKSLEEP;  /* wait for all ranks complete shmat */
-         /* compute the base addresses of each process within the shared memory segment */
-        {
-         padSize=sizeof(pthread_mutex_t) + sizeof(OPA_int_t);
-         win->base = (void *) ((long) win->mpid.shm->base_addr + (long ) PAD_SIZE(padSize));
-         }
-          *base_pp = win->base;
-     }
+
+        /* increment the shared counter */
+        win->mpid.shm->shm_count=(int *)((MPI_Aint) win->mpid.shm->mutex_lock + (MPI_Aint) sizeof(MPIDI_SHM_MUTEX));
+        OPA_fetch_and_add_int((OPA_int_t *) win->mpid.shm->shm_count,1);
+
+        /* wait for all ranks complete */
+        while(*win->mpid.shm->shm_count != comm_size) MPIDI_QUICKSLEEP;
+
+        /* compute the base addresses of each process within the shared memory segment */
+        win->base = (void *) ((long) win->mpid.shm->base_addr + (long ) MPIDI_ROUND_UP_PAGESIZE(sizeof(MPIDI_Win_shm_ctrl_t),pageSize));
+    }
 
 fn_exit:
-    MPIU_Free(tmp_buf);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
 fn_fail:
@@ -375,7 +413,6 @@ MPID_Win_allocate_shared(MPI_Aint     size,
   MPID_Win    *win = NULL;
   int rank;
 
-  void **baseP = base_ptr;
   MPIDI_Win_info  *winfo;
   int         comm_size,i;
   int         noncontig=FALSE;
@@ -434,7 +471,7 @@ MPID_Win_allocate_shared(MPI_Aint     size,
 #else
   mpi_errno=CheckSpaceType(win_ptr,info,&noncontig);
   comm_size = (*win_ptr)->comm_ptr->local_size;
-  MPID_getSharedSegment(size, disp_unit,comm_ptr,baseP, win_ptr,&pageSize,&noncontig);
+  MPID_getSharedSegment(size, disp_unit,comm_ptr, win_ptr, &pageSize, &noncontig);
 
   mpi_errno = MPIDI_Win_allgather(size,win_ptr);
   if (mpi_errno != MPI_SUCCESS)
diff --git a/src/mpid/pamid/subconfigure.m4 b/src/mpid/pamid/subconfigure.m4
index f6128da..8b196fa 100644
--- a/src/mpid/pamid/subconfigure.m4
+++ b/src/mpid/pamid/subconfigure.m4
@@ -53,6 +53,7 @@ dnl Set a value for the maximum processor name.
 MPID_MAX_PROCESSOR_NAME=128
 PM_REQUIRES_PMI=pmi2
 if test "${pamid_platform}" = "PE" ; then
+  with_shared_memory=sysv
         PM_REQUIRES_PMI=pmi2/poe
 elif test "${pamid_platform}" = "BGQ" ; then
   MPID_DEFAULT_CROSS_FILE=${master_top_srcdir}/src/mpid/pamid/cross/bgq8

http://git.mpich.org/mpich.git/commitdiff/3018d28c8e320e8432ca45b135ffa98ebce35040

commit 3018d28c8e320e8432ca45b135ffa98ebce35040
Author: Michael Blocksome <blocksom at us.ibm.com>
Date:   Fri May 16 15:06:37 2014 -0500

    pamid: change macro to not reference a local variable

diff --git a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
index 8392ea8..1ecd9b5 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
@@ -33,9 +33,8 @@
                                       /* in PMD (mp_pmd.c) as well.        */
 
 extern int mpidi_dynamic_tasking;
-#define MPIDI_PAGESIZE ((MPI_Aint)pageSize)
-#define MPIDI_PAGESIZE_MASK (~(MPIDI_PAGESIZE-1))
-#define MPIDI_ROUND_UP_PAGESIZE(x) ((((MPI_Aint)x)+(~MPIDI_PAGESIZE_MASK)) & MPIDI_PAGESIZE_MASK)
+#define MPIDI_PAGESIZE_MASK(y) (~(((MPI_Aint)y)-1))
+#define MPIDI_ROUND_UP_PAGESIZE(x,y) ((((MPI_Aint)x)+(~MPIDI_PAGESIZE_MASK(y))) & MPIDI_PAGESIZE_MASK(y))
 #define ALIGN_BOUNDARY 128     /* Align data structures to cache line */
 #define PAD_SIZE(s) (ALIGN_BOUNDARY - (sizeof(s) & (ALIGN_BOUNDARY-1)))
 
@@ -189,7 +188,7 @@ MPID_getSharedSegment(MPI_Aint        size,
     if (comm_size == 1) {
          if (size > 0) {
              if (*noncontig) 
-                 new_size = MPIDI_ROUND_UP_PAGESIZE(size);
+                 new_size = MPIDI_ROUND_UP_PAGESIZE(size,pageSize);
              else 
                  new_size = size;
              *base_pp = MPIU_Malloc(new_size);
@@ -219,7 +218,7 @@ MPID_getSharedSegment(MPI_Aint        size,
              len = tmp_buf[i];
              if (*noncontig)
                 /* Round up to next page size */
-                 win->mpid.shm->segment_len += MPIDI_ROUND_UP_PAGESIZE(len); 
+                 win->mpid.shm->segment_len += MPIDI_ROUND_UP_PAGESIZE(len,pageSize);
              else
                  win->mpid.shm->segment_len += len;
           }
@@ -447,7 +446,7 @@ MPID_Win_allocate_shared(MPI_Aint     size,
           if (size) {
               if (noncontig)  
                   /* Round up to next page size */
-                   win->mpid.info[i].base_addr =(void *) ((MPI_Aint) cur_base + (MPI_Aint) MPIDI_ROUND_UP_PAGESIZE(size));
+                   win->mpid.info[i].base_addr =(void *) ((MPI_Aint) cur_base + (MPI_Aint) MPIDI_ROUND_UP_PAGESIZE(size,pageSize));
                 else
                     win->mpid.info[i].base_addr = (void *) ((MPI_Aint) cur_base + (MPI_Aint) size);
                 cur_base = win->mpid.info[i].base_addr;

http://git.mpich.org/mpich.git/commitdiff/c892ac58e4ac7645429030d02cff648dcbd7f6d2

commit c892ac58e4ac7645429030d02cff648dcbd7f6d2
Author: Michael Blocksome <blocksom at us.ibm.com>
Date:   Fri May 16 14:18:03 2014 -0500

    pamid: remove unnecessary bgq specialization.

diff --git a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
index 332cf47..8392ea8 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
@@ -41,38 +41,23 @@ extern int mpidi_dynamic_tasking;
 
 
 int CheckRankOnNode(MPID_Comm  * comm_ptr,int *onNode ) {
-      int comm_size,i;
-      int mpi_errno=PAMI_SUCCESS;
+    int comm_size, i;
+    int mpi_errno = PAMI_SUCCESS;
 
-      comm_size = comm_ptr->local_size;
+    comm_size = comm_ptr->local_size;
 
-      *onNode=1;
+    *onNode = 1;
 
-#ifdef __PE__
-        for (i=0; i< comm_size; i++) {
-          if (comm_ptr->intranode_table[i] == -1) {
-            *onNode=0;
+    for (i = 0; i < comm_size; i++) {
+        if (comm_ptr->intranode_table[i] == -1) {
+            *onNode = 0;
             break;
-          }
-      }
-#else
-#ifdef PAMIX_IS_LOCAL_TASK
-      for (i=0; i< comm_size; i++) {
-        if (!PAMIX_Task_is_local(comm_ptr->vcr[i]->taskid)) {
-          *onNode=0;
-          break;
         }
-      } 
-#else
-      if (comm_ptr->intranode_table == NULL)
-        *onNode = 0;
-#endif
-#endif
-
+     }
 
      if (*onNode== 0) {
-      MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_CONFLICT,
-                          return mpi_errno, "**rmaconflict");
+         MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_CONFLICT,
+                             return mpi_errno, "**rmaconflict");
      }
 
      return mpi_errno;

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/pamid/include/mpidi_datatypes.h           |   14 +-
 .../pamid/src/onesided/mpid_win_allocate_shared.c  |  462 +++++++++++---------
 .../pamid/src/onesided/mpid_win_fetch_and_op.c     |    2 +-
 src/mpid/pamid/src/onesided/mpid_win_free.c        |   19 +-
 src/mpid/pamid/src/onesided/mpidi_onesided.h       |    5 +-
 src/mpid/pamid/subconfigure.m4                     |    2 +
 6 files changed, 284 insertions(+), 220 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list