[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.1-3-ge6ddea1

Service Account noreply at mpich.org
Fri Jun 6 14:12:47 CDT 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  e6ddea13901a9f718af9108638f4755327442e14 (commit)
       via  66c07f53bd389af488adcf421f28a7d9e6660ce2 (commit)
       via  cd168292eed8ee98fd15554d6109fda6f860d150 (commit)
      from  d39b7f534d72ca862fea1ec80894f5633bc8d926 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/e6ddea13901a9f718af9108638f4755327442e14

commit e6ddea13901a9f718af9108638f4755327442e14
Author: Michael Blocksome <blocksom at us.ibm.com>
Date:   Thu Jun 5 23:10:15 2014 -0500

    pamid: remove blocking shmem mutex; remove shmem CAS/FOP optimizations
    
    The blocking pthread mutex in the shared memory window causes a deadlock
    on bgq - perhaps because the messaging state is not advanced while the
    thread is waiting for the mutex to be released.
    
    Removing this blocking mutex resolves the bgq failures:
    
     - rma/strided_putget_indexed_shared
     - rma/strided_getacc_indexed_shared
    
    With the calls to the blocking mutex removed the CAS and FOP functions
    are not atomic. Solution is to remove the shared memory optimization and
    instead use the common (network) code path.
    
    Removing these shared memory optimizations from CAS/FOP resolves the
    bgq hangs:
    
     - rma/mutex_bench_shared

diff --git a/src/mpid/pamid/src/onesided/mpid_win_accumulate.c b/src/mpid/pamid/src/onesided/mpid_win_accumulate.c
index 47fe880..26ed220 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_accumulate.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_accumulate.c
@@ -298,8 +298,6 @@ MPID_Accumulate(const void   *origin_addr,
        int len, one;
 
        ++win->mpid.sync.total;
-       MPIDI_SHM_MUTEX_LOCK(win);
-       shm_locked = 1;
        base = win->mpid.info[target_rank].base_addr;
        disp_unit = win->mpid.info[target_rank].disp_unit;
        dest_addr = (char *) base + disp_unit * target_disp;
@@ -311,10 +309,6 @@ MPID_Accumulate(const void   *origin_addr,
 
        (*uop)((void *) origin_addr, dest_addr, &one, &origin_datatype);
 
-       if (shm_locked) {
-           MPIDI_SHM_MUTEX_UNLOCK(win);
-           shm_locked = 0;
-       }
 
         MPIU_Free(req);
         ++win->mpid.sync.complete;
diff --git a/src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c b/src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c
index e75c921..d31b4be 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c
@@ -109,46 +109,6 @@ int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
     }
 
   req->target.rank = target_rank;
-
-  if (target_rank == win->comm_ptr->rank || win->create_flavor == MPI_WIN_FLAVOR_SHARED)
-    {
-        void *base, *dest_addr;
-        int disp_unit;
-        int len;
-
-        ++win->mpid.sync.total;
-        if (win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
-            MPIDI_SHM_MUTEX_LOCK(win);
-            shm_locked = 1;
-
-            base = win->mpid.info[target_rank].base_addr;
-            disp_unit = win->disp_unit;
-        }
-	else if (win->create_flavor == MPI_WIN_FLAVOR_DYNAMIC) {
-	    base = NULL;
-	    disp_unit = win->disp_unit;
-	}
-        else {
-            base = win->mpid.info[target_rank].base_addr;
-            disp_unit = win->disp_unit;
-        }
-
-        dest_addr = (char *) base + disp_unit * target_disp;
-
-        MPID_Datatype_get_size_macro(datatype, len);
-        MPIU_Memcpy(result_addr, dest_addr, len);
-
-        if (MPIR_Compare_equal(compare_addr, dest_addr, datatype))
-            MPIU_Memcpy(dest_addr, origin_addr, len); 
-
-        if (shm_locked) {
-            MPIDI_SHM_MUTEX_UNLOCK(win);
-            shm_locked = 0;
-        }
-        MPIU_Free(req);
-       ++win->mpid.sync.complete;
-    } 
-  else {
     req->buffer           = (void *) ((uintptr_t) origin_addr + req->origin.dt.true_lb);
     req->user_buffer      = result_addr + req->origin.dt.true_lb;
     req->compare_buffer   = (void *) ((uintptr_t) compare_addr + req->origin.dt.true_lb);
@@ -198,7 +158,6 @@ int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
       {
       PAMI_Context_post(MPIDI_Context[0], &req->post_request, MPIDI_Atomic, req);
     }
-   }
 
 fn_fail:
   return mpi_errno;
diff --git a/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c b/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
index 6343484..749165c 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
@@ -289,51 +289,6 @@ int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
 
   req->target.rank = target_rank;
 
-
-  if (target_rank == win->comm_ptr->rank || win->create_flavor == MPI_WIN_FLAVOR_SHARED)
-    {
-        MPI_User_function *uop;
-        void *base, *dest_addr;
-        int disp_unit;
-        int len, one;
-
-        ++win->mpid.sync.total;
-        if (win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
-            MPIDI_SHM_MUTEX_LOCK(win);
-            shm_locked = 1;
-            base = win->mpid.info[target_rank].base_addr;
-            disp_unit = win->mpid.info[target_rank].disp_unit;
-
-        }
-	else if (win->create_flavor == MPI_WIN_FLAVOR_DYNAMIC) {
-	    base = NULL;
-	    disp_unit = win->disp_unit;
-	}
-        else {
-            base = win->mpid.info[target_rank].base_addr;
-            disp_unit = win->mpid.info[target_rank].disp_unit;
-        }
-
-        dest_addr = (char *) base + disp_unit * target_disp;
-
-        MPID_Datatype_get_size_macro(datatype, len);
-        MPIU_Memcpy(result_addr, dest_addr, len);
-
-        uop = MPIR_OP_HDL_TO_FN(op);
-        one = 1;
-
-        (*uop)((void *) origin_addr, dest_addr, &one, &datatype);
-
-        if (shm_locked) {
-            MPIDI_SHM_MUTEX_UNLOCK(win);
-            shm_locked = 0;
-        }
-
-        MPIU_Free(req);
-
-        ++win->mpid.sync.complete;
-    }
-  else {
     req->compare_buffer = NULL;
     req->pami_op = pami_op;
     req->op = op;
@@ -376,7 +331,6 @@ int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
       PAMI_Context_post(MPIDI_Context[0], &req->post_request, MPIDI_Atomic, req);
 
     }
-  }
 
 fn_fail:
   return mpi_errno;
diff --git a/src/mpid/pamid/src/onesided/mpid_win_get.c b/src/mpid/pamid/src/onesided/mpid_win_get.c
index 7b83d4a..872f62f 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_get.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_get.c
@@ -267,10 +267,6 @@ MPID_Get(void         *origin_addr,
       else
           target_addr = win->mpid.info[target_rank].base_addr + req->offset;
 
-      if (win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
-          MPIDI_SHM_MUTEX_LOCK(win);
-          shm_locked=1;
-      }
 
       /* The operation is not complete until the local copy is performed */
       mpi_errno = MPIR_Localcopy(target_addr,
@@ -279,10 +275,6 @@ MPID_Get(void         *origin_addr,
                                  origin_addr,
                                  origin_count,
                                  origin_datatype);
-      if (shm_locked) {
-          MPIDI_SHM_MUTEX_UNLOCK(win);
-          shm_locked=0;
-      }
 
       /* The instant this completion counter is set to zero another thread
        * may notice the change and begin freeing request resources. The
diff --git a/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c b/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c
index 52855bd..f57eddb 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c
@@ -40,12 +40,8 @@ MPIDI_Fetch_data_op(const void   * origin_addr,
         int disp_unit;
         int len, one;
 
-       if (win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
-           MPIDI_SHM_MUTEX_LOCK(win);
-           shm_locked = 1;
            base = win->mpid.info[target_rank].base_addr;
            disp_unit = win->mpid.info[target_rank].disp_unit;
-        }
         dest_addr = (char *) base + disp_unit * target_disp;
 
         MPID_Datatype_get_size_macro(origin_datatype, len);
@@ -56,10 +52,6 @@ MPIDI_Fetch_data_op(const void   * origin_addr,
             (*uop)((void *) origin_addr, dest_addr, &one, &origin_datatype);
         }
 
-        if (shm_locked) {
-            MPIDI_SHM_MUTEX_UNLOCK(win);
-            shm_locked = 0;
-        }
    fn_fail: return;
 }
 

http://git.mpich.org/mpich.git/commitdiff/66c07f53bd389af488adcf421f28a7d9e6660ce2

commit 66c07f53bd389af488adcf421f28a7d9e6660ce2
Author: Michael Blocksome <blocksom at us.ibm.com>
Date:   Thu Jun 5 15:58:05 2014 -0500

    pamid: set base addr to NULL if size is zero in win_allocate_shared

diff --git a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
index e94eb48..cfd08a7 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
@@ -365,14 +365,14 @@ MPID_getSharedSegment(MPI_Aint     size,
         /* allocate a temporary buffer to gather the 'size' of each buffer on
          * the node to determine the amount of shared memory to allocate
          */
-        MPI_Aint *tmp_buf;
-        tmp_buf = MPIU_Malloc (2*comm_size*sizeof(MPI_Aint));
-        tmp_buf[rank] = (MPI_Aint) size;
+        MPI_Aint * size_array;
+        size_array = MPIU_Malloc (2*comm_size*sizeof(MPI_Aint));
+        size_array[rank] = (MPI_Aint) size;
         mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
-                                        tmp_buf, 1 * sizeof(MPI_Aint), MPI_BYTE,
+                                        size_array, 1 * sizeof(MPI_Aint), MPI_BYTE,
                                         (*win_ptr)->comm_ptr, &errflag);
         if (mpi_errno) {
-            MPIU_Free(tmp_buf);
+            MPIU_Free(size_array);
             MPIU_ERR_POP(mpi_errno);
         }
 
@@ -380,9 +380,9 @@ MPID_getSharedSegment(MPI_Aint     size,
         MPI_Aint actual_size;
         win->mpid.info[0].base_addr = NULL;
         for (i = 0; i < comm_size; ++i) {
-            win->mpid.info[i].base_size = tmp_buf[i];
+            win->mpid.info[i].base_size = size_array[i];
 
-            actual_size = (*noncontig)?MPIDI_ROUND_UP_PAGESIZE(tmp_buf[i],pageSize):tmp_buf[i];
+            actual_size = (*noncontig)?MPIDI_ROUND_UP_PAGESIZE(size_array[i],pageSize):size_array[i];
 
             win->mpid.shm->segment_len += actual_size;
 
@@ -394,7 +394,6 @@ MPID_getSharedSegment(MPI_Aint     size,
                 win->mpid.info[i+1].base_addr =
                     (void *) ((uintptr_t)win->mpid.info[i].base_addr + actual_size);
         }
-        MPIU_Free(tmp_buf);
 
         /* The beginning of the shared memory allocation contains a control
          * block before the data begins.
@@ -421,6 +420,12 @@ MPID_getSharedSegment(MPI_Aint     size,
                 (void *) ((uintptr_t)win->mpid.info[i].base_addr + (uintptr_t)win->base);
         }
 
+        for (i = 0; i < comm_size; ++i) {
+            if (size_array[i] == 0) win->mpid.info[i].base_addr = NULL;
+        }
+
+        MPIU_Free(size_array);
+
         /* increment the shared counter */
         OPA_fetch_and_add_int((OPA_int_t *) &win->mpid.shm->ctrl->shm_count,(int) 1);
 

http://git.mpich.org/mpich.git/commitdiff/cd168292eed8ee98fd15554d6109fda6f860d150

commit cd168292eed8ee98fd15554d6109fda6f860d150
Author: Michael Blocksome <blocksom at us.ibm.com>
Date:   Thu Jun 5 12:09:19 2014 -0500

    pamid: create memregions at an offset of the shared memory allocation
    
    For MPI_Win_allocate_shared() the memory regions that are exchanged all
    pointed to the begining of the shared memory allocation instead of at
    the portion of the shared memory allocated to each process. When
    MPI_Put(), for example, issues a PAMI_Rput() it uses an offset of zero
    to specify the begining of the peer's window. As the exchanged memory
    regions for each peer rank were not created at the start of each peer
    rank's allocation, data corruption and other bad things occur.

diff --git a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
index 0a63e9f..e94eb48 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
@@ -377,14 +377,22 @@ MPID_getSharedSegment(MPI_Aint     size,
         }
 
         /* calculate total number of bytes needed */
+        MPI_Aint actual_size;
+        win->mpid.info[0].base_addr = NULL;
         for (i = 0; i < comm_size; ++i) {
             win->mpid.info[i].base_size = tmp_buf[i];
-            len = tmp_buf[i];
-            if (*noncontig)
-                /* Round up to next page size */
-                win->mpid.shm->segment_len += MPIDI_ROUND_UP_PAGESIZE(len,pageSize);
-            else
-                win->mpid.shm->segment_len += len;
+
+            actual_size = (*noncontig)?MPIDI_ROUND_UP_PAGESIZE(tmp_buf[i],pageSize):tmp_buf[i];
+
+            win->mpid.shm->segment_len += actual_size;
+
+            /* Save the OFFSET to each rank's private shared memory area. This
+             * will be added to the BASE ADDRESS of the entire shared memory
+             * allocation to determine the virtual address.
+             */
+            if (i < comm_size-1)
+                win->mpid.info[i+1].base_addr =
+                    (void *) ((uintptr_t)win->mpid.info[i].base_addr + actual_size);
         }
         MPIU_Free(tmp_buf);
 
@@ -405,14 +413,19 @@ MPID_getSharedSegment(MPI_Aint     size,
 #endif
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
+        /* compute the base addresses of each process within the shared memory segment */
+        win->base = (void *) ((long) win->mpid.shm->base_addr + (long ) MPIDI_ROUND_UP_PAGESIZE((sizeof(MPIDI_Win_shm_ctrl_t) + ((comm_size+1) * sizeof(void *))),pageSize));
+
+        for (i = 0; i < comm_size; ++i) {
+            win->mpid.info[i].base_addr =
+                (void *) ((uintptr_t)win->mpid.info[i].base_addr + (uintptr_t)win->base);
+        }
+
         /* increment the shared counter */
         OPA_fetch_and_add_int((OPA_int_t *) &win->mpid.shm->ctrl->shm_count,(int) 1);
 
         /* wait for all ranks complete */
         while((int) win->mpid.shm->ctrl->shm_count != comm_size) MPIDI_QUICKSLEEP;
-
-        /* compute the base addresses of each process within the shared memory segment */
-        win->base = (void *) ((long) win->mpid.shm->base_addr + (long ) MPIDI_ROUND_UP_PAGESIZE((sizeof(MPIDI_Win_shm_ctrl_t) + ((comm_size+1) * sizeof(void *))),pageSize));
     }
 
 fn_exit:
@@ -508,26 +521,6 @@ MPID_Win_allocate_shared(MPI_Aint     size,
   if (mpi_errno != MPI_SUCCESS)
       return mpi_errno;
 
-  if (comm_size > 1) {
-      char *cur_base = (*win_ptr)->base;
-      for (i = 0; i < comm_size; ++i) {
-          if (win->mpid.info[i].base_size) {
-             if (i == 0) 
-                 win->mpid.info[i].base_addr = (void *) ((MPI_Aint) cur_base);
-             else {
-              if (noncontig)  
-                  /* Round up to next page size */
-                  win->mpid.info[i].base_addr =(void *) ((MPI_Aint) cur_base + (MPI_Aint) MPIDI_ROUND_UP_PAGESIZE((win->mpid.info[i-1].base_size),pageSize));
-              else
-                  win->mpid.info[i].base_addr = (void *) ((MPI_Aint) cur_base + (MPI_Aint) (win->mpid.info[i-1].base_size));
-              }
-              cur_base = win->mpid.info[i].base_addr;
-          } else {
-              win->mpid.info[i].base_addr = NULL; 
-          }
-      }
-  }
-
   *(void**) base_ptr = (void *) win->mpid.info[rank].base_addr;
 
   mpi_errno = MPIR_Barrier_impl(comm_ptr, &mpi_errno);
diff --git a/src/mpid/pamid/src/onesided/mpid_win_create.c b/src/mpid/pamid/src/onesided/mpid_win_create.c
index 7be9415..706da3c 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_create.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_create.c
@@ -123,7 +123,7 @@ MPIDI_Win_allgather( MPI_Aint size, MPID_Win **win_ptr )
 #ifdef USE_PAMI_RDMA
   if (size != 0)
     {
-      rc = PAMI_Memregion_create(MPIDI_Context[0], win->base, win->size, &length_out, &winfo->memregion);
+      rc = PAMI_Memregion_create(MPIDI_Context[0], win->mpid.info[rank].base_addr, win->mpid.info[rank].base_size, &length_out, &winfo->memregion);
 
       MPIU_ERR_CHKANDJUMP((rc != PAMI_SUCCESS), mpi_errno, MPI_ERR_OTHER, "**nomem");
       MPIU_ERR_CHKANDJUMP((win->size < length_out), mpi_errno, MPI_ERR_OTHER, "**nomem");
@@ -131,7 +131,7 @@ MPIDI_Win_allgather( MPI_Aint size, MPID_Win **win_ptr )
 #else
   if ( (!MPIDI_Process.mp_s_use_pami_get) && (size != 0) )
     {
-      rc = PAMI_Memregion_create(MPIDI_Context[0], win->base, win->size, &length_out, &winfo->memregion);
+      rc = PAMI_Memregion_create(MPIDI_Context[0], win->mpid.info[rank].base_addr, win->mpid.info[rank].base_size, &length_out, &winfo->memregion);
       if(rc == PAMI_SUCCESS)
         {
           winfo->memregion_used = 1;

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/pamid/src/onesided/mpid_win_accumulate.c  |    6 --
 .../pamid/src/onesided/mpid_win_allocate_shared.c  |   70 ++++++++++----------
 .../pamid/src/onesided/mpid_win_compare_and_swap.c |   41 ------------
 src/mpid/pamid/src/onesided/mpid_win_create.c      |    4 +-
 .../pamid/src/onesided/mpid_win_fetch_and_op.c     |   46 -------------
 src/mpid/pamid/src/onesided/mpid_win_get.c         |    8 --
 .../pamid/src/onesided/mpid_win_get_accumulate.c   |    8 --
 7 files changed, 36 insertions(+), 147 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list