[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1b1-57-gcab15c5

Thu Sep 26 00:44:28 CDT 2013

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  cab15c5a4e3edd108064b2a53c3ac43cbbdee000 (commit)
       via  278dfa647d069c0fd18f5778fe6710a783ec4050 (commit)
       via  4e3c88ece6ab89c754974a226c781a07a331e4f2 (commit)
       via  3668461a8a30ff433b7ab488cc93b466a9a55179 (commit)
       via  c3db96fde87ba3f1fa5f36953cbc0a9bc7f5086f (commit)
       via  103382f5a366e07eef48e614e0eb7b1490b7edb0 (commit)
       via  d075cc9d5b7898691919f80cf0bcccc595d010ec (commit)
       via  4b5a188aec3a043884505cd7f34b855e8238a3a3 (commit)
      from  a35fa10b8d70217662e8b1d1c04dfa39015ae757 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/cab15c5a4e3edd108064b2a53c3ac43cbbdee000

commit cab15c5a4e3edd108064b2a53c3ac43cbbdee000
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Sep 25 17:25:55 2013 -0500

    eagerly acquire lock in win_lock/lock_all for SHM operations.
    
    If SHM is allocated by MPI_Win_allocate and target is on the same node
    with origin, origin needs to acquire lock eagerly before it can
    perform any SHM RMA operations immediately on target's SHM region.
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index ae60cf0..fbf609f 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -1884,6 +1884,7 @@ int MPIDI_Win_lock(int lock_type, int dest, int assert, MPID_Win *win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
     struct MPIDI_Win_target_state *target_state;
+    MPIDI_VC_t *orig_vc, *target_vc;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_LOCK);
 
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_LOCK);
@@ -1925,15 +1926,31 @@ int MPIDI_Win_lock(int lock_type, int dest, int assert, MPID_Win *win_ptr)
         mpi_errno = MPIDI_CH3I_Acquire_local_lock(win_ptr, lock_type);
         if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
     }
-    else if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
+    else if (win_ptr->shm_allocated == TRUE) {
         /* Lock must be taken immediately for shared memory windows because of
          * load/store access */
 
-        mpi_errno = MPIDI_CH3I_Send_lock_msg(dest, lock_type, win_ptr);
-        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+        if (win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
+            /* check if target is local and shared memory is allocated on window,
+               if so, we directly send lock request and wait for lock reply. */
+
+            /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
+               the same node. However, in ch3:sock, even if origin and target are on the same node, they do
+               not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
+               which is only set to TRUE when SHM region is allocated in nemesis.
+               In future we need to figure out a way to check if origin and target are in the same "SHM comm".
+            */
+            MPIDI_Comm_get_vc(win_ptr->comm_ptr, win_ptr->comm_ptr->rank, &orig_vc);
+            MPIDI_Comm_get_vc(win_ptr->comm_ptr, dest, &target_vc);
+        }
 
-        mpi_errno = MPIDI_CH3I_Wait_for_lock_granted(win_ptr, dest);
-        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+        if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED || orig_vc->node_id == target_vc->node_id) {
+            mpi_errno = MPIDI_CH3I_Send_lock_msg(dest, lock_type, win_ptr);
+            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+
+            mpi_errno = MPIDI_CH3I_Wait_for_lock_granted(win_ptr, dest);
+            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+        }
     }
     else if (MPIR_PARAM_CH3_RMA_LOCK_IMMED && ((assert & MPI_MODE_NOCHECK) == 0)) {
         /* TODO: Make this mode of operation available through an assert
@@ -2375,6 +2392,7 @@ fn_fail:
 int MPIDI_Win_lock_all(int assert, MPID_Win *win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
+    MPIDI_VC_t *orig_vc, *target_vc;
     int i;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_LOCK_ALL);
 
@@ -2400,23 +2418,46 @@ int MPIDI_Win_lock_all(int assert, MPID_Win *win_ptr)
     mpi_errno = MPIDI_CH3I_Acquire_local_lock(win_ptr, MPI_LOCK_SHARED);
     if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
 
-    if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
+    if (win_ptr->shm_allocated == TRUE) {
         /* Immediately lock all targets for load/store access */
 
         for (i = 0; i < MPIR_Comm_size(win_ptr->comm_ptr); i++) {
             /* Local process is already locked */
             if (i == win_ptr->comm_ptr->rank) continue;
 
-            mpi_errno = MPIDI_CH3I_Send_lock_msg(i, MPI_LOCK_SHARED, win_ptr);
-            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            if (win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
+                /* check if target is local and shared memory is allocated on window,
+                   if so, we directly send lock request and wait for lock reply. */
+
+                /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
+                   the same node. However, in ch3:sock, even if origin and target are on the same node, they do
+                   not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
+                   which is only set to TRUE when SHM region is allocated in nemesis.
+                   In future we need to figure out a way to check if origin and target are in the same "SHM comm".
+                */
+                MPIDI_Comm_get_vc(win_ptr->comm_ptr, win_ptr->comm_ptr->rank, &orig_vc);
+                MPIDI_Comm_get_vc(win_ptr->comm_ptr, i, &target_vc);
+            }
+
+            if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED || orig_vc->node_id == target_vc->node_id) {
+                mpi_errno = MPIDI_CH3I_Send_lock_msg(i, MPI_LOCK_SHARED, win_ptr);
+                if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            }
         }
 
         for (i = 0; i < MPIR_Comm_size(win_ptr->comm_ptr); i++) {
             /* Local process is already locked */
             if (i == win_ptr->comm_ptr->rank) continue;
 
-            mpi_errno = MPIDI_CH3I_Wait_for_lock_granted(win_ptr, i);
-            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            if (win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
+                MPIDI_Comm_get_vc(win_ptr->comm_ptr, win_ptr->comm_ptr->rank, &orig_vc);
+                MPIDI_Comm_get_vc(win_ptr->comm_ptr, i, &target_vc);
+            }
+
+            if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED || orig_vc->node_id == target_vc->node_id) {
+                mpi_errno = MPIDI_CH3I_Wait_for_lock_granted(win_ptr, i);
+                if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            }
         }
     }
 

http://git.mpich.org/mpich.git/commitdiff/278dfa647d069c0fd18f5778fe6710a783ec4050

commit 278dfa647d069c0fd18f5778fe6710a783ec4050
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Sep 25 17:16:21 2013 -0500

    perform SHM operations immediately.
    
    If SHM is allocated, perform RMA operations immediately after it is
    issued, not queuing them up any more.
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/mpid/ch3/src/ch3u_rma_acc_ops.c b/src/mpid/ch3/src/ch3u_rma_acc_ops.c
index 82b1d2d..3d441c9 100644
--- a/src/mpid/ch3/src/ch3u_rma_acc_ops.c
+++ b/src/mpid/ch3/src/ch3u_rma_acc_ops.c
@@ -27,6 +27,7 @@ int MPIDI_Get_accumulate(const void *origin_addr, int origin_count,
     int dt_contig ATTRIBUTE((unused));
     MPI_Aint dt_true_lb ATTRIBUTE((unused));
     MPID_Datatype *dtp;
+    MPIDI_VC_t *orig_vc, *target_vc;
     MPIU_CHKLMEM_DECL(2);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_GET_ACCUMULATE);
 
@@ -59,8 +60,23 @@ int MPIDI_Get_accumulate(const void *origin_addr, int origin_count,
     MPIDI_CH3I_DATATYPE_IS_PREDEFINED(result_datatype, result_predefined);
     MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, target_predefined);
 
+    if (win_ptr->shm_allocated == TRUE && target_rank != rank && win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
+        /* check if target is local and shared memory is allocated on window,
+           if so, we directly perform this operation on shared memory region. */
+
+        /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
+           the same node. However, in ch3:sock, even if origin and target are on the same node, they do
+           not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
+           which is only set to TRUE when SHM region is allocated in nemesis.
+           In future we need to figure out a way to check if origin and target are in the same "SHM comm".
+        */
+        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
+        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
+    }
+
     /* Do =! rank first (most likely branch?) */
-    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
+    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
+        (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id))
     {
         mpi_errno = MPIDI_CH3I_Shm_get_acc_op(origin_addr, origin_count, origin_datatype,
                                               result_addr, result_count, result_datatype,
@@ -135,6 +151,7 @@ int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
 {
     int mpi_errno = MPI_SUCCESS;
     int rank;
+    MPIDI_VC_t *orig_vc, *target_vc;
 
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_COMPARE_AND_SWAP);
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_COMPARE_AND_SWAP);
@@ -152,13 +169,28 @@ int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
 
     rank = win_ptr->comm_ptr->rank;
 
+    if (win_ptr->shm_allocated == TRUE && target_rank != rank && win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
+        /* check if target is local and shared memory is allocated on window,
+           if so, we directly perform this operation on shared memory region. */
+
+        /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
+           the same node. However, in ch3:sock, even if origin and target are on the same node, they do
+           not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
+           which is only set to TRUE when SHM region is allocated in nemesis.
+           In future we need to figure out a way to check if origin and target are in the same "SHM comm".
+        */
+        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
+        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
+    }
+
     /* The datatype must be predefined, and one of: C integer, Fortran integer,
      * Logical, Multi-language types, or Byte.  This is checked above the ADI,
      * so there's no need to check it again here. */
 
     /* FIXME: For shared memory windows, we should provide an implementation
      * that uses a processor atomic operation. */
-    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
+    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
+        (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id))
     {
         mpi_errno = MPIDI_CH3I_Shm_cas_op(origin_addr, compare_addr, result_addr,
                                           datatype, target_rank, target_disp, win_ptr);
@@ -212,6 +244,7 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
 {
     int mpi_errno = MPI_SUCCESS;
     int rank;
+    MPIDI_VC_t *orig_vc, *target_vc;
 
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_FETCH_AND_OP);
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_FETCH_AND_OP);
@@ -229,12 +262,27 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
 
     rank = win_ptr->comm_ptr->rank;
 
+    if (win_ptr->shm_allocated == TRUE && target_rank != rank && win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
+        /* check if target is local and shared memory is allocated on window,
+           if so, we directly perform this operation on shared memory region. */
+
+        /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
+           the same node. However, in ch3:sock, even if origin and target are on the same node, they do
+           not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
+           which is only set to TRUE when SHM region is allocated in nemesis.
+           In future we need to figure out a way to check if origin and target are in the same "SHM comm".
+        */
+        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
+        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
+    }
+
     /* The datatype and op must be predefined.  This is checked above the ADI,
      * so there's no need to check it again here. */
 
     /* FIXME: For shared memory windows, we should provide an implementation
      * that uses a processor atomic operation. */
-    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
+    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
+        (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id))
     {
         mpi_errno = MPIDI_CH3I_Shm_fop_op(origin_addr, result_addr, datatype,
                                           target_rank, target_disp, op, win_ptr);
diff --git a/src/mpid/ch3/src/ch3u_rma_ops.c b/src/mpid/ch3/src/ch3u_rma_ops.c
index e7318e2..0d2c37a 100644
--- a/src/mpid/ch3/src/ch3u_rma_ops.c
+++ b/src/mpid/ch3/src/ch3u_rma_ops.c
@@ -124,6 +124,7 @@ int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
     MPID_Datatype *dtp;
     MPI_Aint dt_true_lb ATTRIBUTE((unused));
     MPIDI_msg_sz_t data_sz;
+    MPIDI_VC_t *orig_vc, *target_vc;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_PUT);
         
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_PUT);
@@ -148,8 +149,23 @@ int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
 
     rank = win_ptr->comm_ptr->rank;
     
+    if (win_ptr->shm_allocated == TRUE && target_rank != rank && win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
+        /* check if target is local and shared memory is allocated on window,
+           if so, we directly perform this operation on shared memory region. */
+
+        /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
+           the same node. However, in ch3:sock, even if origin and target are on the same node, they do
+           not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
+           which is only set to TRUE when SHM region is allocated in nemesis.
+           In future we need to figure out a way to check if origin and target are in the same "SHM comm".
+        */
+        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
+        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
+    }
+
     /* If the put is a local operation, do it here */
-    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
+    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
+        (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id))
     {
         mpi_errno = MPIDI_CH3I_Shm_put_op(origin_addr, origin_count, origin_datatype, target_rank,
                                           target_disp, target_count, target_datatype, win_ptr);
@@ -222,6 +238,7 @@ int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
     int dt_contig ATTRIBUTE((unused)), rank, predefined;
     MPI_Aint dt_true_lb ATTRIBUTE((unused));
     MPID_Datatype *dtp;
+    MPIDI_VC_t *orig_vc, *target_vc;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_GET);
         
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_GET);
@@ -245,9 +262,24 @@ int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
     }
 
     rank = win_ptr->comm_ptr->rank;
+
+    if (win_ptr->shm_allocated == TRUE && target_rank != rank && win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
+        /* check if target is local and shared memory is allocated on window,
+           if so, we directly perform this operation on shared memory region. */
+
+        /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
+           the same node. However, in ch3:sock, even if origin and target are on the same node, they do
+           not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
+           which is only set to TRUE when SHM region is allocated in nemesis.
+           In future we need to figure out a way to check if origin and target are in the same "SHM comm".
+        */
+        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
+        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
+    }
     
     /* If the get is a local operation, do it here */
-    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
+    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
+        (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id))
     {
         mpi_errno = MPIDI_CH3I_Shm_get_op(origin_addr, origin_count, origin_datatype, target_rank,
                                           target_disp, target_count, target_datatype, win_ptr);
@@ -318,6 +350,7 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
     int dt_contig ATTRIBUTE((unused)), rank, origin_predefined, target_predefined;
     MPI_Aint dt_true_lb ATTRIBUTE((unused));
     MPID_Datatype *dtp;
+    MPIDI_VC_t *orig_vc, *target_vc;
     MPIU_CHKLMEM_DECL(2);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_ACCUMULATE);
     
@@ -346,8 +379,23 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
     MPIDI_CH3I_DATATYPE_IS_PREDEFINED(origin_datatype, origin_predefined);
     MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, target_predefined);
 
+    if (win_ptr->shm_allocated == TRUE && target_rank != rank && win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
+        /* check if target is local and shared memory is allocated on window,
+           if so, we directly perform this operation on shared memory region. */
+
+        /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
+           the same node. However, in ch3:sock, even if origin and target are on the same node, they do
+           not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
+           which is only set to TRUE when SHM region is allocated in nemesis.
+           In future we need to figure out a way to check if origin and target are in the same "SHM comm".
+        */
+        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
+        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
+    }
+
     /* Do =! rank first (most likely branch?) */
-    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
+    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
+        (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id))
     {
 	mpi_errno = MPIDI_CH3I_Shm_acc_op(origin_addr, origin_count, origin_datatype,
 					  target_rank, target_disp, target_count, target_datatype,

http://git.mpich.org/mpich.git/commitdiff/4e3c88ece6ab89c754974a226c781a07a331e4f2

commit 4e3c88ece6ab89c754974a226c781a07a331e4f2
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Sep 25 13:06:48 2013 -0500

    Delete MPIDI_CH3I_DO_SHM_OP macro.
    
    Originally for SHM RMA operations, we create strcutures to queue them up and perform
    them lazily when closing the epoch. Because creating queued structure causes siginificant
    performance overhead, we decide to not queue them up but perform them immediately. Therefore
    MPIDI_DO_SHM_OP macro and some special judgements on SHM operations (to count queued
    operations) are not needed anymore.
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index a697250..ae60cf0 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -202,55 +202,6 @@ static int create_datatype(const MPIDI_RMA_dtype_info *dtype_info,
     } while (0)
 
 
-/* Perform RMA operation asynchronously if window of processes on the same node
-   is allocated on shared memory */
-#define MPIDI_CH3I_DO_SHM_OP(op_ptr_, win_ptr_, err_)                                                                       \
-    do {                                                                                                                    \
-    switch ((op_ptr_)->type)                                                                                                \
-    {                                                                                                                       \
-        case (MPIDI_RMA_PUT):                                                                                               \
-            (err_) = MPIDI_CH3I_Shm_put_op((op_ptr_)->origin_addr, (op_ptr_)->origin_count, (op_ptr_)->origin_datatype,     \
-                                           (op_ptr_)->target_rank, (op_ptr_)->target_disp, (op_ptr_)->target_count,         \
-                                           (op_ptr_)->target_datatype, (win_ptr_));                                         \
-            if (err_) {MPIU_ERR_POP(err_);}                                                                                 \
-            break;                                                                                                          \
-        case (MPIDI_RMA_ACCUMULATE):                                                                                        \
-        case (MPIDI_RMA_ACC_CONTIG):                                                                                        \
-            (err_) = MPIDI_CH3I_Shm_acc_op((op_ptr_)->origin_addr, (op_ptr_)->origin_count, (op_ptr_)->origin_datatype,     \
-                                           (op_ptr_)->target_rank, (op_ptr_)->target_disp, (op_ptr_)->target_count,         \
-                                           (op_ptr_)->target_datatype, (op_ptr_)->op, (win_ptr_));                          \
-            if (err_) {MPIU_ERR_POP(err_);}                                                                                 \
-            break;                                                                                                          \
-        case (MPIDI_RMA_GET_ACCUMULATE):                                                                                    \
-            (err_) = MPIDI_CH3I_Shm_get_acc_op((op_ptr_)->origin_addr, (op_ptr_)->origin_count, (op_ptr_)->origin_datatype, \
-                                               (op_ptr_)->result_addr, (op_ptr_)->result_count, (op_ptr_)->result_datatype, \
-                                               (op_ptr_)->target_rank, (op_ptr_)->target_disp, (op_ptr_)->target_count,     \
-                                               (op_ptr_)->target_datatype, (op_ptr_)->op, (win_ptr_));                      \
-            if (err_) {MPIU_ERR_POP(err_);}                                                                                 \
-            break;                                                                                                          \
-        case (MPIDI_RMA_GET):                                                                                               \
-            (err_) = MPIDI_CH3I_Shm_get_op((op_ptr_)->origin_addr, (op_ptr_)->origin_count, (op_ptr_)->origin_datatype,     \
-                                           (op_ptr_)->target_rank, (op_ptr_)->target_disp, (op_ptr_)->target_count,         \
-                                           (op_ptr_)->target_datatype, (win_ptr_));                                         \
-            if (err_) {MPIU_ERR_POP(err_);}                                                                                 \
-            break;                                                                                                          \
-        case (MPIDI_RMA_COMPARE_AND_SWAP):                                                                                  \
-            (err_) = MPIDI_CH3I_Shm_cas_op((op_ptr_)->origin_addr, (op_ptr_)->compare_addr, (op_ptr_)->result_addr,         \
-                                           (op_ptr_)->origin_datatype, (op_ptr_)->target_rank, (op_ptr_)->target_disp,      \
-                                           (win_ptr_));                                                                     \
-            if (err_) {MPIU_ERR_POP(err_);}                                                                                 \
-            break;                                                                                                          \
-        case (MPIDI_RMA_FETCH_AND_OP):                                                                                      \
-            (err_) = MPIDI_CH3I_Shm_fop_op((op_ptr_)->origin_addr, (op_ptr_)->result_addr, (op_ptr_)->origin_datatype,      \
-                                           (op_ptr_)->target_rank, (op_ptr_)->target_disp, (op_ptr_)->op, (win_ptr_));      \
-            if (err_) {MPIU_ERR_POP(err_);}                                                                                 \
-            break;                                                                                                          \
-        default:                                                                                                            \
-            MPIU_ERR_SETANDJUMP(err_,MPI_ERR_OTHER,"**winInvalidOp");                                                       \
-    }                                                                                                                       \
-    } while (0)
-
-
 #undef FUNCNAME
 #define FUNCNAME MPIDI_Win_fence
 #undef FCNAME
@@ -329,7 +280,6 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
     {
 	int nRequest = 0;
 	int nRequestNew = 0;
-        MPIDI_VC_t *orig_vc, *target_vc;
 
         /* Ensure ordering of load/store operations. */
         if (win_ptr->shm_allocated == TRUE) {
@@ -363,15 +313,11 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
 	   ops from this process */
 	total_op_count = 0;
         curr_ptr = MPIDI_CH3I_RMA_Ops_head(ops_list);
-        MPIDI_Comm_get_vc(win_ptr->comm_ptr, win_ptr->comm_ptr->rank, &orig_vc);
 	while (curr_ptr != NULL)
 	{
-            MPIDI_Comm_get_vc(win_ptr->comm_ptr, curr_ptr->target_rank, &target_vc);
-	    if (!(win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
-		total_op_count++;
-		rma_target_proc[curr_ptr->target_rank] = 1;
-		nops_to_proc[curr_ptr->target_rank]++;
-	    }
+	    total_op_count++;
+	    rma_target_proc[curr_ptr->target_rank] = 1;
+	    nops_to_proc[curr_ptr->target_rank]++;
 	    curr_ptr = curr_ptr->next;
 	}
 	
@@ -407,13 +353,6 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
         curr_ptr = MPIDI_CH3I_RMA_Ops_head(ops_list);
 	while (curr_ptr != NULL)
 	{
-          MPIDI_Comm_get_vc(win_ptr->comm_ptr, curr_ptr->target_rank, &target_vc);
-          if (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id) {
-            MPIDI_CH3I_DO_SHM_OP(curr_ptr, win_ptr, mpi_errno);
-            MPIDI_CH3I_RMA_Ops_free_and_next(ops_list, &curr_ptr);
-          }
-          else {
-
             MPIDI_CH3_Pkt_flags_t flags = MPIDI_CH3_PKT_FLAG_NONE;
 
 	    /* The completion counter at the target is decremented only on 
@@ -456,7 +395,6 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
 		    nRequestNew = nRequest;
 		}
 	    }
-	  } /* end of else */
 	}
 	MPIU_INSTR_DURATION_END(winfence_issue);
 
@@ -1573,7 +1511,6 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
     int start_grp_size, *ranks_in_start_grp, *ranks_in_win_grp, rank;
     int nRequest = 0;
     int nRequestNew = 0;
-    MPIDI_VC_t *orig_vc, *target_vc;
     MPIU_CHKLMEM_DECL(9);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_COMPLETE);
 
@@ -1686,14 +1623,10 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
 
     total_op_count = 0;
     curr_ptr = MPIDI_CH3I_RMA_Ops_head(ops_list);
-    MPIDI_Comm_get_vc(win_ptr->comm_ptr, win_ptr->comm_ptr->rank, &orig_vc);
     while (curr_ptr != NULL)
     {
-        MPIDI_Comm_get_vc(win_ptr->comm_ptr, curr_ptr->target_rank, &target_vc);
-	if (!(win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
-	    nops_to_proc[curr_ptr->target_rank]++;
-	    total_op_count++;
-	}
+	nops_to_proc[curr_ptr->target_rank]++;
+	total_op_count++;
 	curr_ptr = curr_ptr->next;
     }
 
@@ -1712,13 +1645,6 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
     curr_ptr = MPIDI_CH3I_RMA_Ops_head(ops_list);
     while (curr_ptr != NULL)
     {
-      MPIDI_Comm_get_vc(win_ptr->comm_ptr, curr_ptr->target_rank, &target_vc);
-      if (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id) {
-        MPIDI_CH3I_DO_SHM_OP(curr_ptr, win_ptr, mpi_errno);
-        MPIDI_CH3I_RMA_Ops_free_and_next(ops_list, &curr_ptr);
-      }
-      else {
-
         MPIDI_CH3_Pkt_flags_t flags = MPIDI_CH3_PKT_FLAG_NONE;
 
 	/* The completion counter at the target is decremented only on 
@@ -1755,7 +1681,6 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
 		nRequestNew = nRequest;
 	    }
 	}
-      }  /* end of else */
     }
     MPIU_INSTR_DURATION_END(wincomplete_issue);
         
@@ -2579,7 +2504,6 @@ static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
     MPIDI_RMA_Op_t *curr_ptr;
     MPI_Win source_win_handle = MPI_WIN_NULL, target_win_handle = MPI_WIN_NULL;
     int nRequest=0, nRequestNew=0;
-    MPIDI_VC_t *orig_vc, *target_vc;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_DO_PASSIVE_TARGET_RMA);
 
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_DO_PASSIVE_TARGET_RMA);
@@ -2589,21 +2513,6 @@ static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
                 (win_ptr->targets[target_rank].remote_lock_state == MPIDI_CH3_WIN_LOCK_CALLED &&
                  win_ptr->targets[target_rank].remote_lock_assert & MPI_MODE_NOCHECK));
 
-    MPIDI_Comm_get_vc(win_ptr->comm_ptr, win_ptr->comm_ptr->rank, &orig_vc);
-    MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
-
-    /* if alloc_shm is enabled and target process is on the same node,
-       directly perform RMA operations at the origin side and remove them
-       from passive RMA operation list */
-    if (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id) {
-        curr_ptr = MPIDI_CH3I_RMA_Ops_head(&win_ptr->targets[target_rank].rma_ops_list);
-        while (curr_ptr != NULL) {
-            MPIU_Assert(curr_ptr->target_rank == target_rank);
-            MPIDI_CH3I_DO_SHM_OP(curr_ptr, win_ptr, mpi_errno);
-            MPIDI_CH3I_RMA_Ops_free_and_next(&win_ptr->targets[target_rank].rma_ops_list, &curr_ptr);
-        }
-    }
-
     if (win_ptr->targets[target_rank].remote_lock_mode == MPI_LOCK_EXCLUSIVE &&
         win_ptr->targets[target_rank].remote_lock_state != MPIDI_CH3_WIN_LOCK_CALLED) {
         /* Exclusive lock already held -- no need to wait for rma done pkt at

http://git.mpich.org/mpich.git/commitdiff/3668461a8a30ff433b7ab488cc93b466a9a55179

commit 3668461a8a30ff433b7ab488cc93b466a9a55179
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Sep 25 13:10:32 2013 -0500

    Delete judgements of datatypes of RMA operations.
    
    If we perform SHM ops immediately, we do not need to do the judgements
    when incrementing datatypes, because SHM ops are already performed at
    that point and judgements only works for remaining operations.
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/mpid/ch3/src/ch3u_rma_acc_ops.c b/src/mpid/ch3/src/ch3u_rma_acc_ops.c
index e8e3dea..82b1d2d 100644
--- a/src/mpid/ch3/src/ch3u_rma_acc_ops.c
+++ b/src/mpid/ch3/src/ch3u_rma_acc_ops.c
@@ -71,7 +71,6 @@ int MPIDI_Get_accumulate(const void *origin_addr, int origin_count,
     else {
         MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
         MPIDI_RMA_Op_t *new_ptr = NULL;
-        MPIDI_VC_t *orig_vc, *target_vc;
 
         /* Append the operation to the window's RMA ops queue */
         MPIU_INSTR_DURATION_START(rmaqueue_alloc);
@@ -98,29 +97,19 @@ int MPIDI_Get_accumulate(const void *origin_addr, int origin_count,
         new_ptr->op = op;
         MPIU_INSTR_DURATION_END(rmaqueue_set);
 
-	/* check if target is local and shared memory is allocated on window,
-	  if so, we do not need to increment reference counts on datatype. This is
-	  because this operation will be directly done on shared memory region, instead
-	  of sending and receiving through the progress engine, therefore datatype
-	  will not be referenced by the progress engine */
-
-        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
-        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
-	if (!(win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
-            /* if source or target datatypes are derived, increment their
-               reference counts */
-            if (!origin_predefined) {
-                MPID_Datatype_get_ptr(origin_datatype, dtp);
-                MPID_Datatype_add_ref(dtp);
-            }
-            if (!result_predefined) {
-                MPID_Datatype_get_ptr(result_datatype, dtp);
-                MPID_Datatype_add_ref(dtp);
-            }
-            if (!target_predefined) {
-                MPID_Datatype_get_ptr(target_datatype, dtp);
-                MPID_Datatype_add_ref(dtp);
-            }
+        /* if source or target datatypes are derived, increment their
+           reference counts */
+        if (!origin_predefined) {
+            MPID_Datatype_get_ptr(origin_datatype, dtp);
+            MPID_Datatype_add_ref(dtp);
+        }
+        if (!result_predefined) {
+            MPID_Datatype_get_ptr(result_datatype, dtp);
+            MPID_Datatype_add_ref(dtp);
+        }
+        if (!target_predefined) {
+            MPID_Datatype_get_ptr(target_datatype, dtp);
+            MPID_Datatype_add_ref(dtp);
         }
     }
 
diff --git a/src/mpid/ch3/src/ch3u_rma_ops.c b/src/mpid/ch3/src/ch3u_rma_ops.c
index c690907..e7318e2 100644
--- a/src/mpid/ch3/src/ch3u_rma_ops.c
+++ b/src/mpid/ch3/src/ch3u_rma_ops.c
@@ -159,7 +159,6 @@ int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
     {
         MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
         MPIDI_RMA_Op_t *new_ptr = NULL;
-        MPIDI_VC_t *orig_vc, *target_vc;
 
 	/* queue it up */
         MPIU_INSTR_DURATION_START(rmaqueue_alloc);
@@ -182,36 +181,20 @@ int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
 	new_ptr->target_datatype = target_datatype;
 	MPIU_INSTR_DURATION_END(rmaqueue_set);
 
-	/* check if target is local and shared memory is allocated on window,
-	  if so, we do not need to increment reference counts on datatype. This is
-	  because this operation will be directly done on shared memory region, instead
-	  of sending and receiving through the progress engine, therefore datatype
-	  will not be referenced by the progress engine */
-
-        /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
-           the same node. However, in ch3:sock, even if origin and target are on the same node, they do
-           not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
-           which is only set to TRUE when SHM region is allocated in nemesis.
-           In future we need to figure out a way to check if origin and target are in the same "SHM comm".
-        */
-        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
-        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
-	if (!(win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
-	    /* if source or target datatypes are derived, increment their
-	       reference counts */
-	    MPIDI_CH3I_DATATYPE_IS_PREDEFINED(origin_datatype, predefined);
-	    if (!predefined)
-	    {
-	        MPID_Datatype_get_ptr(origin_datatype, dtp);
-	        MPID_Datatype_add_ref(dtp);
-	    }
-	    MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, predefined);
-	    if (!predefined)
-	    {
-	        MPID_Datatype_get_ptr(target_datatype, dtp);
-	        MPID_Datatype_add_ref(dtp);
-	    }
-        }
+	/* if source or target datatypes are derived, increment their
+	   reference counts */
+	MPIDI_CH3I_DATATYPE_IS_PREDEFINED(origin_datatype, predefined);
+	if (!predefined)
+	{
+	    MPID_Datatype_get_ptr(origin_datatype, dtp);
+	    MPID_Datatype_add_ref(dtp);
+	}
+	MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, predefined);
+	if (!predefined)
+	{
+	    MPID_Datatype_get_ptr(target_datatype, dtp);
+	    MPID_Datatype_add_ref(dtp);
+	}
     }
 
   fn_exit:
@@ -274,7 +257,6 @@ int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
     {
         MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
         MPIDI_RMA_Op_t *new_ptr = NULL;
-        MPIDI_VC_t *orig_vc, *target_vc;
 
 	/* queue it up */
         MPIU_INSTR_DURATION_START(rmaqueue_alloc);
@@ -294,30 +276,20 @@ int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
 	new_ptr->target_datatype = target_datatype;
 	MPIU_INSTR_DURATION_END(rmaqueue_set);
 	
-	/* check if target is local and shared memory is allocated on window,
-	  if so, we do not need to increment reference counts on datatype. This is
-	  because this operation will be directly done on shared memory region, instead
-	  of sending and receiving through the progress engine, therefore datatype
-	  will not be referenced by the progress engine */
-
-        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
-        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
-	if (!(win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
-	    /* if source or target datatypes are derived, increment their
-	       reference counts */
-	    MPIDI_CH3I_DATATYPE_IS_PREDEFINED(origin_datatype, predefined);
-	    if (!predefined)
-	    {
-	        MPID_Datatype_get_ptr(origin_datatype, dtp);
-	        MPID_Datatype_add_ref(dtp);
-	    }
-	    MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, predefined);
-	    if (!predefined)
-	    {
-	        MPID_Datatype_get_ptr(target_datatype, dtp);
-	        MPID_Datatype_add_ref(dtp);
-	    }
-        }
+	/* if source or target datatypes are derived, increment their
+	   reference counts */
+	MPIDI_CH3I_DATATYPE_IS_PREDEFINED(origin_datatype, predefined);
+	if (!predefined)
+	{
+	    MPID_Datatype_get_ptr(origin_datatype, dtp);
+	    MPID_Datatype_add_ref(dtp);
+	}
+	MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, predefined);
+	if (!predefined)
+	{
+	    MPID_Datatype_get_ptr(target_datatype, dtp);
+	    MPID_Datatype_add_ref(dtp);
+	}
     }
 
   fn_exit:
@@ -386,7 +358,6 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
     {
         MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
         MPIDI_RMA_Op_t *new_ptr = NULL;
-        MPIDI_VC_t *orig_vc, *target_vc;
 
 	/* queue it up */
         MPIU_INSTR_DURATION_START(rmaqueue_alloc);
@@ -427,28 +398,18 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
 	new_ptr->op = op;
 	MPIU_INSTR_DURATION_END(rmaqueue_set);
 	
-	/* check if target is local and shared memory is allocated on window,
-	  if so, we do not need to increment reference counts on datatype. This is
-	  because this operation will be directly done on shared memory region, instead
-	  of sending and receiving through the progress engine, therefore datatype
-	  will not be referenced by the progress engine */
-
-        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
-        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
-	if (!(win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
-	    /* if source or target datatypes are derived, increment their
-	       reference counts */
-	    if (!origin_predefined)
-	    {
-	        MPID_Datatype_get_ptr(origin_datatype, dtp);
-	        MPID_Datatype_add_ref(dtp);
-	    }
-	    if (!target_predefined)
-	    {
-	        MPID_Datatype_get_ptr(target_datatype, dtp);
-	        MPID_Datatype_add_ref(dtp);
-	    }
-        }
+	/* if source or target datatypes are derived, increment their
+	   reference counts */
+	if (!origin_predefined)
+	{
+	    MPID_Datatype_get_ptr(origin_datatype, dtp);
+	    MPID_Datatype_add_ref(dtp);
+	}
+	if (!target_predefined)
+	{
+	    MPID_Datatype_get_ptr(target_datatype, dtp);
+	    MPID_Datatype_add_ref(dtp);
+	}
     }
 
  fn_exit:

http://git.mpich.org/mpich.git/commitdiff/c3db96fde87ba3f1fa5f36953cbc0a9bc7f5086f

commit c3db96fde87ba3f1fa5f36953cbc0a9bc7f5086f
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Sep 25 17:38:02 2013 -0500

    change the condition of memory barrier.
    
    Change the condition of full memory barrier when closing an epoch
    from *judging create_flavor* to *checking if SHM is allocated*.
    Because condition of *SHM is allocated* means either create_flavor
    is SHARED or alloc_shm optimization is enabled for MPI_Win_allocate.
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 5d6d07c..a697250 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -332,7 +332,7 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
         MPIDI_VC_t *orig_vc, *target_vc;
 
         /* Ensure ordering of load/store operations. */
-        if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
+        if (win_ptr->shm_allocated == TRUE) {
            OPA_read_write_barrier();
         }
 
@@ -1593,7 +1593,7 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
     comm_size = comm_ptr->local_size;
         
     /* Ensure ordering of load/store operations. */
-    if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
+    if (win_ptr->shm_allocated == TRUE) {
         OPA_read_write_barrier();
     }
 
@@ -1890,7 +1890,7 @@ int MPIDI_Win_wait(MPID_Win *win_ptr)
     } 
 
     /* Ensure ordering of load/store operations. */
-    if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
+    if (win_ptr->shm_allocated == TRUE) {
         OPA_read_write_barrier();
     }
 
@@ -1934,7 +1934,7 @@ int MPIDI_Win_test(MPID_Win *win_ptr, int *flag)
             win_ptr->epoch_state = MPIDI_EPOCH_NONE;
 
         /* Ensure ordering of load/store operations. */
-        if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
+        if (win_ptr->shm_allocated == TRUE) {
             OPA_read_write_barrier();
         }
     }
@@ -2056,7 +2056,7 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
     }
 
     /* Ensure ordering of load/store operations. */
-    if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
+    if (win_ptr->shm_allocated == TRUE) {
         OPA_read_write_barrier();
     }
 

http://git.mpich.org/mpich.git/commitdiff/103382f5a366e07eef48e614e0eb7b1490b7edb0

commit 103382f5a366e07eef48e614e0eb7b1490b7edb0
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Sep 25 17:17:30 2013 -0500

    fix bug: delete memory barriers in win_lock/lock_all.
    
    We don't need the full memory barrier when opening an epoch, ordering of
    modifications on the same window location can be protected by the full
    memory barrier when closing the epoch. User can modify any window location
    only within an RMA epoch.
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 47e8764..5d6d07c 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -2004,8 +2004,6 @@ int MPIDI_Win_lock(int lock_type, int dest, int assert, MPID_Win *win_ptr)
         /* Lock must be taken immediately for shared memory windows because of
          * load/store access */
 
-        OPA_read_write_barrier();
-
         mpi_errno = MPIDI_CH3I_Send_lock_msg(dest, lock_type, win_ptr);
         if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
@@ -2479,7 +2477,6 @@ int MPIDI_Win_lock_all(int assert, MPID_Win *win_ptr)
 
     if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
         /* Immediately lock all targets for load/store access */
-        OPA_read_write_barrier();
 
         for (i = 0; i < MPIR_Comm_size(win_ptr->comm_ptr); i++) {
             /* Local process is already locked */

http://git.mpich.org/mpich.git/commitdiff/d075cc9d5b7898691919f80cf0bcccc595d010ec

commit d075cc9d5b7898691919f80cf0bcccc595d010ec
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Sep 25 17:09:59 2013 -0500

    fix bug: delete memory barriers in win_post/start.
    
    We don't need the full memory barrier when opening an epoch, ordering of
    modifications on the same window location can be protected by the full
    memory barrier when closing the epoch. User can modify any window location
    only within an RMA epoch.
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index cea6063..47e8764 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -1393,11 +1393,6 @@ int MPIDI_Win_post(MPID_Group *post_grp_ptr, int assert, MPID_Win *win_ptr)
 	MPIU_INSTR_DURATION_END(winpost_clearlock);
     }
         
-    /* Ensure ordering of load/store operations. */
-    if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
-        OPA_read_write_barrier();
-    }
-
     post_grp_size = post_grp_ptr->size;
         
     /* initialize the completion counter */
@@ -1550,11 +1545,6 @@ int MPIDI_Win_start(MPID_Group *group_ptr, int assert, MPID_Win *win_ptr)
 	MPIU_INSTR_DURATION_END(winstart_clearlock);
     }
     
-    /* Ensure ordering of load/store operations. */
-    if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
-        OPA_read_write_barrier();
-    }
-
     win_ptr->start_group_ptr = group_ptr;
     MPIR_Group_add_ref( group_ptr );
     win_ptr->start_assert = assert;

http://git.mpich.org/mpich.git/commitdiff/4b5a188aec3a043884505cd7f34b855e8238a3a3

commit 4b5a188aec3a043884505cd7f34b855e8238a3a3
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Sep 25 17:36:56 2013 -0500

    fix bug: add a memory barrier in win_fence.
    
    Do a memory barrier when winow is allocated by MPI_Win_allocate_shared,
    if this fence is (1) not call with MPI_MODE_NO_PROCEDE; (2) not the very
    first fence; (3) not following a fence with MPI_MODE_NO_SUCCEED.
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index afe4796..cea6063 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -330,6 +330,12 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
 	int nRequest = 0;
 	int nRequestNew = 0;
         MPIDI_VC_t *orig_vc, *target_vc;
+
+        /* Ensure ordering of load/store operations. */
+        if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
+           OPA_read_write_barrier();
+        }
+
 	MPIU_INSTR_DURATION_START(winfence_rs);
 	/* This is the second or later fence. Do all the preceding RMA ops. */
 	comm_ptr = win_ptr->comm_ptr;

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/ch3/src/ch3u_rma_acc_ops.c |   91 ++++++++++++-----
 src/mpid/ch3/src/ch3u_rma_ops.c     |  173 +++++++++++++++++---------------
 src/mpid/ch3/src/ch3u_rma_sync.c    |  187 ++++++++++++-----------------------
 3 files changed, 220 insertions(+), 231 deletions(-)


hooks/post-receive
-- 
MPICH primary repository