[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2a2-80-g52531f7

Service Account noreply at mpich.org
Tue Dec 16 10:28:22 CST 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  52531f77b3370587d116c664b0a1eefe807fe068 (commit)
       via  17202c7c36929e377d508c59f6d0c9e75a282506 (commit)
       via  a44c53d609a4d5fe846c07fe6b92b259329acb20 (commit)
       via  68984ea90d9ad3e90991164d9974da03e64a553e (commit)
       via  c07eeba487e3b29469e1d38b9974a8196c93471e (commit)
       via  7b537b01e6327baef0bcc80987befbb4573bb2a4 (commit)
       via  8098f89c63f8e2d9d301b9a19f9ddf74c8e3b7e4 (commit)
       via  45afd1fdb5329bdbf047ac1d5b263c478585ea20 (commit)
       via  b8c9f31b707fe6485ac88705aa3a3caab2aa35d2 (commit)
       via  970226532fd89bd462402dc50bc6b7b62ebd27f6 (commit)
       via  03ebc97b2f35c4d291ef73ca19634f94cf74858d (commit)
       via  0167912040fde039796776bece438e9e06c17811 (commit)
       via  faae55ad0b81efc98c5850ecf770200ce975d08c (commit)
       via  e36203c35142e3841dfd85134d72bc2731958a8f (commit)
       via  385f0aae7d184464bb37121a78d7739b61ab5caf (commit)
       via  2b53ff6973a9dff00da382aadaa3459da557a5c4 (commit)
       via  1962d3b12679e012c74566da6167dff31946ca12 (commit)
       via  7c533ef3cf618df723da3dc7fba84be3c15442cf (commit)
       via  54af207cae91a9b3c6b1a74484198476d517253d (commit)
       via  886b1d8ddc400fe3e7b314194e487fc370ed31c5 (commit)
       via  89d8f6c19de179e993dc993e4b5931bd10c4ce73 (commit)
       via  ff6e5f9b52c6eecfcc24c1b443770751e36225e0 (commit)
       via  264be641c733a3a6a6d465c1e888eb68374567d6 (commit)
       via  6b56d44a209a75c685673fafd738f160dfc6400a (commit)
       via  6f8c3e59c2b8d01096e3b264af6ae3c056a6c5ce (commit)
       via  fb6a441b9fd125f4ec9b183262d917d4d1d34850 (commit)
       via  04d151901aef08d003b6b9b432fcab91007874de (commit)
       via  097c96285a88f1276ac5f474fd354a1b68421591 (commit)
       via  3a05784f1cb7b9e3ccb1d7d65c73671d886ad41d (commit)
       via  cc158ff2bd0b80bea572493e41565b9a657c2726 (commit)
       via  2493e98b996d7a1ba9a5e5434666cbea1d89779d (commit)
       via  87acbbbe5201f207d3d210c3340741748243e993 (commit)
       via  4739df598e83186eaa25db33606cc00193385e58 (commit)
       via  c73451c01fc27d8a4d2198ef7042d1df713893a1 (commit)
       via  e12376fd3763874907a177792b8fd81bb23aa9d1 (commit)
       via  e92b774663d92158ddcd6514033fb930a43a9bf9 (commit)
       via  7b1a5e2dfd6985e21c0f79c329eeb3d92303c461 (commit)
       via  b155e7e0f8bc8f65420fda64146f03b949230d96 (commit)
       via  389aab1673b9e54b9172985f69931bf1d731f3d9 (commit)
       via  a544067b0bf0396525200d22c323d7fb71a18175 (commit)
      from  ef22b67d3cc87300cbd3ffc44e6914207ddc44d2 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/52531f77b3370587d116c664b0a1eefe807fe068

commit 52531f77b3370587d116c664b0a1eefe807fe068
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Mon Dec 15 15:46:55 2014 -0600

    Simplify epoch checking in Win_lock
    
    when lock_epoch_count != 0, we only need to check if access_state
    is PER_TARGET in Win_lock.
    
    No reviewer.

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index f2d562e..a28392a 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -957,10 +957,7 @@ int MPIDI_Win_lock(int lock_type, int dest, int assert, MPID_Win * win_ptr)
                             mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
     }
     else {
-        MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_NONE &&
-                            win_ptr->states.access_state != MPIDI_RMA_FENCE_ISSUED &&
-                            win_ptr->states.access_state != MPIDI_RMA_FENCE_GRANTED &&
-                            win_ptr->states.access_state != MPIDI_RMA_PER_TARGET,
+        MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET,
                             mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
     }
 

http://git.mpich.org/mpich.git/commitdiff/17202c7c36929e377d508c59f6d0c9e75a282506

commit 17202c7c36929e377d508c59f6d0c9e75a282506
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Mon Dec 15 13:42:04 2014 -0600

    Bug-fix: store lock entry pointer in request even data is dropped
    
    When data is dropped but lock is queued, we should still store
    the lock entry in current request, so that we can try to acquire
    the lock when we received and dropped all data.
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index 19320fd..6d9594d 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -424,6 +424,7 @@ static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
             req->dev.recv_data_sz = recv_data_sz;
             req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
             req->dev.OnFinal = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
+            req->dev.lock_queue_entry = new_ptr;
 
             MPIDI_CH3_PKT_RMA_GET_IMMED_LEN((*pkt), immed_len, mpi_errno);
             if (immed_len > 0) {
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index f1e1352..c2e90b6 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -1463,8 +1463,7 @@ int MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete( MPIDI_VC_t *vc,
 
     /* Note that if we decided to drop op data, here we just need to complete this
        request; otherwise we try to get the lock again in this handler. */
-    if (rreq->dev.drop_data == FALSE) {
-    MPIU_Assert(lock_queue_entry != NULL);
+    if (rreq->dev.lock_queue_entry != NULL) {
 
     /* Mark all data received in lock queue entry */
     lock_queue_entry->all_data_recved = 1;

http://git.mpich.org/mpich.git/commitdiff/a44c53d609a4d5fe846c07fe6b92b259329acb20

commit a44c53d609a4d5fe846c07fe6b92b259329acb20
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Mon Dec 15 13:40:06 2014 -0600

    Bug-fix: first dequeue lock entry from queue, then perform op in this entry
    
    Here we should first dequeue the current lock queue entry from
    lock queue then performing the operation in it. This is because
    when performing op in current lock entry, we may trigger
    release_lock() function, which go to check the lock queue again.
    If we did not remove current entry from the queue, release_lock()
    will try to process it for the second time, which leads to the wrong
    execution.
    
    No reviewer.

diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index b57bfa5..f1e1352 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -1408,14 +1408,13 @@ int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
                 if (lock_entry->all_data_recved) {
                 MPIDI_CH3_PKT_RMA_GET_LOCK_TYPE(lock_entry->pkt, requested_lock, mpi_errno);
                 if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, requested_lock) == 1) {
-                    /* perform this OP */
+                    /* dequeue entry from lock queue */
+                    MPL_LL_DELETE(win_ptr->lock_queue, win_ptr->lock_queue_tail, lock_entry);
 
+                    /* perform this OP */
                     mpi_errno = perform_op_in_lock_queue(win_ptr, lock_entry);
                     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-                    /* dequeue entry from lock queue */
-                    MPL_LL_DELETE(win_ptr->lock_queue, win_ptr->lock_queue_tail, lock_entry);
-
                     /* free this entry */
                     mpi_errno = MPIDI_CH3I_Win_lock_entry_free(win_ptr, lock_entry);
                     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
@@ -1476,13 +1475,13 @@ int MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete( MPIDI_VC_t *vc,
     MPID_Win_get_ptr(target_win_handle, win_ptr);
 
     if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, requested_lock) == 1) {
+        /* dequeue entry from lock queue */
+        MPL_LL_DELETE(win_ptr->lock_queue, win_ptr->lock_queue_tail, lock_queue_entry);
+
         /* perform this OP */
         mpi_errno = perform_op_in_lock_queue(win_ptr, lock_queue_entry);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-        /* dequeue entry from lock queue */
-        MPL_LL_DELETE(win_ptr->lock_queue, win_ptr->lock_queue_tail, lock_queue_entry);
-
         /* free this entry */
         mpi_errno = MPIDI_CH3I_Win_lock_entry_free(win_ptr, lock_queue_entry);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

http://git.mpich.org/mpich.git/commitdiff/68984ea90d9ad3e90991164d9974da03e64a553e

commit 68984ea90d9ad3e90991164d9974da03e64a553e
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Fri Dec 12 17:37:40 2014 -0600

    Change function name: set_lock_sync_counter -> handle_lock_ack
    
    No reviewer.

diff --git a/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c b/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c
index 0c5ec7c..0a11fcc 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c
@@ -2160,7 +2160,7 @@ int MPID_nem_ib_PktHandler_GetResp(MPIDI_VC_t * vc,
 
     /* decrement ack_counter on target */
     if (get_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
-        mpi_errno = set_lock_sync_counter(win_ptr, target_rank,
+        mpi_errno = handle_lock_ack(win_ptr, target_rank,
                                           get_resp_pkt->flags);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index 8b5bb30..19320fd 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -498,7 +498,7 @@ static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
 }
 
 
-static inline int set_lock_sync_counter(MPID_Win *win_ptr, int target_rank,
+static inline int handle_lock_ack(MPID_Win *win_ptr, int target_rank,
                                         MPIDI_CH3_Pkt_flags_t flags)
 {
     MPIDI_RMA_Target_t *t = NULL;
@@ -654,7 +654,7 @@ static inline int acquire_local_lock(MPID_Win * win_ptr, int lock_type)
     MPIR_T_PVAR_TIMER_START(RMA, rma_winlock_getlocallock);
 
     if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 1) {
-        mpi_errno = set_lock_sync_counter(win_ptr, win_ptr->comm_ptr->rank,
+        mpi_errno = handle_lock_ack(win_ptr, win_ptr->comm_ptr->rank,
                                           MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
@@ -670,7 +670,7 @@ static inline int acquire_local_lock(MPID_Win * win_ptr, int lock_type)
 
         new_ptr = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, &pkt);
         if (new_ptr == NULL) {
-            mpi_errno = set_lock_sync_counter(win_ptr, win_ptr->comm_ptr->rank,
+            mpi_errno = handle_lock_ack(win_ptr, win_ptr->comm_ptr->rank,
                                               MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED);
             if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
             goto fn_exit;
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index 1449bb7..b57bfa5 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -1292,7 +1292,7 @@ static inline int perform_op_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_ent
 
         MPIDI_CH3_Pkt_lock_t *lock_pkt = &(lock_entry->pkt.lock);
         if (lock_pkt->origin_rank == win_ptr->comm_ptr->rank) {
-            mpi_errno = set_lock_sync_counter(win_ptr, lock_pkt->origin_rank,
+            mpi_errno = handle_lock_ack(win_ptr, lock_pkt->origin_rank,
                                               MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         }
diff --git a/src/mpid/ch3/src/ch3u_rma_pkthandler.c b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
index fa69d51..76ac30d 100644
--- a/src/mpid/ch3/src/ch3u_rma_pkthandler.c
+++ b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
@@ -1016,7 +1016,7 @@ int MPIDI_CH3_PktHandler_CASResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 
     /* decrement ack_counter on this target */
     if (cas_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
-        mpi_errno = set_lock_sync_counter(win_ptr, target_rank,
+        mpi_errno = handle_lock_ack(win_ptr, target_rank,
                                           cas_resp_pkt->flags);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
@@ -1184,7 +1184,7 @@ int MPIDI_CH3_PktHandler_FOPResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 
     /* decrement ack_counter */
     if (fop_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
-        mpi_errno = set_lock_sync_counter(win_ptr, target_rank,
+        mpi_errno = handle_lock_ack(win_ptr, target_rank,
                                           fop_resp_pkt->flags);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
@@ -1240,7 +1240,7 @@ int MPIDI_CH3_PktHandler_Get_AccumResp(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
     /* decrement ack_counter on target */
     if (get_accum_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
-        mpi_errno = set_lock_sync_counter(win_ptr, target_rank,
+        mpi_errno = handle_lock_ack(win_ptr, target_rank,
                                           get_accum_resp_pkt->flags);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
@@ -1377,7 +1377,7 @@ int MPIDI_CH3_PktHandler_GetResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 
     /* decrement ack_counter on target */
     if (get_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
-        mpi_errno = set_lock_sync_counter(win_ptr, target_rank,
+        mpi_errno = handle_lock_ack(win_ptr, target_rank,
                                           get_resp_pkt->flags);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
@@ -1465,7 +1465,7 @@ int MPIDI_CH3_PktHandler_LockAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
     MPID_Win_get_ptr(lock_ack_pkt->source_win_handle, win_ptr);
 
-    mpi_errno = set_lock_sync_counter(win_ptr, target_rank,
+    mpi_errno = handle_lock_ack(win_ptr, target_rank,
                                       lock_ack_pkt->flags);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
@@ -1500,7 +1500,7 @@ int MPIDI_CH3_PktHandler_LockOpAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
     MPID_Win_get_ptr(lock_op_ack_pkt->source_win_handle, win_ptr);
 
-    mpi_errno = set_lock_sync_counter(win_ptr, target_rank,
+    mpi_errno = handle_lock_ack(win_ptr, target_rank,
                                       lock_op_ack_pkt->flags);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 

http://git.mpich.org/mpich.git/commitdiff/c07eeba487e3b29469e1d38b9974a8196c93471e

commit c07eeba487e3b29469e1d38b9974a8196c93471e
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Fri Dec 12 16:35:22 2014 -0600

    Add check no_locks info in set_info function.
    
    If user set no_locks to true, we do not need
    to allocate passive lock requests pool and
    lock data pool on window.
    
    No reviewer.

diff --git a/src/mpid/ch3/src/ch3u_win_fns.c b/src/mpid/ch3/src/ch3u_win_fns.c
index 8d372ed..166915b 100644
--- a/src/mpid/ch3/src/ch3u_win_fns.c
+++ b/src/mpid/ch3/src/ch3u_win_fns.c
@@ -294,6 +294,23 @@ int MPIDI_Win_set_info(MPID_Win *win, MPID_Info *info)
     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_WIN_SET_INFO);
 
     /********************************************************/
+    /************** check for info no_locks *****************/
+    /********************************************************/
+
+    if (info != NULL) {
+        int info_flag = 0;
+        char info_value[MPI_MAX_INFO_VAL+1];
+        MPIR_Info_get_impl(info, "no_locks", MPI_MAX_INFO_VAL,
+                           info_value, &info_flag);
+        if (info_flag) {
+            if (!strncmp(info_value, "true", strlen("true")))
+                win->info_args.no_locks = 1;
+            if (!strncmp(info_value, "false", strlen("true")))
+                win->info_args.no_locks = 1;
+        }
+    }
+
+    /********************************************************/
     /*************** check for info alloc_shm ***************/
     /********************************************************/
 
diff --git a/src/mpid/ch3/src/mpid_rma.c b/src/mpid/ch3/src/mpid_rma.c
index 6518a9a..0c1a646 100644
--- a/src/mpid/ch3/src/mpid_rma.c
+++ b/src/mpid/ch3/src/mpid_rma.c
@@ -372,8 +372,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
         (*win_ptr)->slots[i].target_list_tail = NULL;
     }
 
-    /* FIXME: we can optimize by letting the user to pass WIN INFO hint if they will not use passive target,
-       in such case we do not need to allocate window pool for lock entries. */
+    if (!(*win_ptr)->info_args.no_locks) {
     MPIU_CHKPMEM_MALLOC((*win_ptr)->lock_entry_pool_start, MPIDI_RMA_Lock_entry_t *,
                         sizeof(MPIDI_RMA_Lock_entry_t) * MPIR_CVAR_CH3_RMA_LOCK_ENTRY_WIN_POOL_SIZE,
                         mpi_errno, "RMA lock entry pool");
@@ -383,6 +382,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
         MPL_LL_APPEND((*win_ptr)->lock_entry_pool, (*win_ptr)->lock_entry_pool_tail,
                       &((*win_ptr)->lock_entry_pool_start[i]));
     }
+    }
 
     /* enqueue window into the global list */
     MPIU_CHKPMEM_MALLOC(win_elem, MPIDI_RMA_Win_list_t *, sizeof(MPIDI_RMA_Win_list_t), mpi_errno,
diff --git a/src/mpid/ch3/src/mpidi_rma.c b/src/mpid/ch3/src/mpidi_rma.c
index d05c3c5..408070d 100644
--- a/src/mpid/ch3/src/mpidi_rma.c
+++ b/src/mpid/ch3/src/mpidi_rma.c
@@ -239,7 +239,9 @@ int MPIDI_Win_free(MPID_Win ** win_ptr)
     MPIU_Free((*win_ptr)->op_pool_start);
     MPIU_Free((*win_ptr)->target_pool_start);
     MPIU_Free((*win_ptr)->slots);
+    if (!(*win_ptr)->info_args.no_locks) {
     MPIU_Free((*win_ptr)->lock_entry_pool_start);
+    }
     MPIU_Assert((*win_ptr)->current_lock_data_bytes == 0);
 
     /* Free the attached buffer for windows created with MPI_Win_allocate() */

http://git.mpich.org/mpich.git/commitdiff/7b537b01e6327baef0bcc80987befbb4573bb2a4

commit 7b537b01e6327baef0bcc80987befbb4573bb2a4
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Fri Dec 12 16:00:56 2014 -0600

    Setting infos in win_init function.
    
    No reviewer.

diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c b/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
index f8f5f27..efd16cb 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
@@ -279,24 +279,6 @@ static int MPIDI_CH3I_Win_allocate_shm(MPI_Aint size, int disp_unit, MPID_Info *
         goto fn_exit;
     }
 
-    /* If create flavor is MPI_WIN_FLAVOR_ALLOCATE, alloc_shared_noncontig is set to 1 by default. */
-    if ((*win_ptr)->create_flavor == MPI_WIN_FLAVOR_ALLOCATE)
-        (*win_ptr)->info_args.alloc_shared_noncontig = 1;
-
-    /* Check if we are allowed to allocate space non-contiguously */
-    if (info != NULL) {
-        int alloc_shared_nctg_flag = 0;
-        char alloc_shared_nctg_value[MPI_MAX_INFO_VAL+1];
-        MPIR_Info_get_impl(info, "alloc_shared_noncontig", MPI_MAX_INFO_VAL,
-                           alloc_shared_nctg_value, &alloc_shared_nctg_flag);
-        if (alloc_shared_nctg_flag == 1) {
-            if (!strncmp(alloc_shared_nctg_value, "true", strlen("true")))
-                (*win_ptr)->info_args.alloc_shared_noncontig = 1;
-            if (!strncmp(alloc_shared_nctg_value, "false", strlen("false")))
-                (*win_ptr)->info_args.alloc_shared_noncontig = 0;
-        }
-    }
-
     /* see if we can allocate all windows contiguously */
     noncontig = (*win_ptr)->info_args.alloc_shared_noncontig;
 
diff --git a/src/mpid/ch3/src/ch3u_win_fns.c b/src/mpid/ch3/src/ch3u_win_fns.c
index 5339c4f..8d372ed 100644
--- a/src/mpid/ch3/src/ch3u_win_fns.c
+++ b/src/mpid/ch3/src/ch3u_win_fns.c
@@ -51,9 +51,6 @@ int MPIDI_CH3U_Win_create_gather( void *base, MPI_Aint size, int disp_unit,
     comm_size = (*win_ptr)->comm_ptr->local_size;
     rank      = (*win_ptr)->comm_ptr->rank;
 
-    /* RMA handlers should be set before calling this function */
-    mpi_errno = (*win_ptr)->RMAFns.Win_set_info(*win_ptr, info);
-
     MPIR_T_PVAR_TIMER_START(RMA, rma_wincreate_allgather);
     /* allocate memory for the base addresses, disp_units, and
        completion counters of all processes */
@@ -296,7 +293,50 @@ int MPIDI_Win_set_info(MPID_Win *win, MPID_Info *info)
 
     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_WIN_SET_INFO);
 
-    /* No op, info arguments are ignored by default */
+    /********************************************************/
+    /*************** check for info alloc_shm ***************/
+    /********************************************************/
+
+    if (win->create_flavor == MPI_WIN_FLAVOR_CREATE)
+        win->info_args.alloc_shm = FALSE;
+    if (win->create_flavor == MPI_WIN_FLAVOR_ALLOCATE ||
+        win->create_flavor == MPI_WIN_FLAVOR_SHARED)
+        win->info_args.alloc_shm = TRUE;
+
+    if (info != NULL) {
+        int info_flag = 0;
+        char info_value[MPI_MAX_INFO_VAL+1];
+        MPIR_Info_get_impl(info, "alloc_shm", MPI_MAX_INFO_VAL,
+                           info_value, &info_flag);
+        if (info_flag) {
+            if (!strncmp(info_value, "true", sizeof("true")))
+                win->info_args.alloc_shm = TRUE;
+            if (!strncmp(info_value, "false", sizeof("false")))
+                win->info_args.alloc_shm = FALSE;
+        }
+    }
+
+    if (win->create_flavor == MPI_WIN_FLAVOR_DYNAMIC)
+        win->info_args.alloc_shm = FALSE;
+
+    /********************************************************/
+    /******* check for info alloc_shared_noncontig **********/
+    /********************************************************/
+
+    if (win->create_flavor == MPI_WIN_FLAVOR_ALLOCATE)
+        win->info_args.alloc_shared_noncontig = 1;
+    if (info != NULL) {
+        int info_flag = 0;
+        char info_value[MPI_MAX_INFO_VAL+1];
+        MPIR_Info_get_impl(info, "alloc_shared_noncontig", MPI_MAX_INFO_VAL,
+                           info_value, &info_flag);
+        if (info_flag) {
+            if (!strncmp(info_value, "true", strlen("true")))
+                win->info_args.alloc_shared_noncontig = 1;
+            if (!strncmp(info_value, "false", strlen("false")))
+                win->info_args.alloc_shared_noncontig = 0;
+        }
+    }
 
  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_WIN_SET_INFO);
diff --git a/src/mpid/ch3/src/mpid_rma.c b/src/mpid/ch3/src/mpid_rma.c
index 4683ee0..6518a9a 100644
--- a/src/mpid/ch3/src/mpid_rma.c
+++ b/src/mpid/ch3/src/mpid_rma.c
@@ -43,7 +43,7 @@ MPIU_THREADSAFE_INIT_DECL(initRMAoptions);
 
 MPIDI_RMA_Win_list_t *MPIDI_RMA_Win_list = NULL, *MPIDI_RMA_Win_list_tail = NULL;
 
-static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
+static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model, MPID_Info *info,
                     MPID_Comm * comm_ptr, MPID_Win ** win_ptr);
 
 
@@ -110,22 +110,12 @@ int MPID_Win_create(void *base, MPI_Aint size, int disp_unit, MPID_Info * info,
     }
 
     mpi_errno =
-        win_init(size, disp_unit, MPI_WIN_FLAVOR_CREATE, MPI_WIN_UNIFIED, comm_ptr, win_ptr);
+        win_init(size, disp_unit, MPI_WIN_FLAVOR_CREATE, MPI_WIN_UNIFIED, info, comm_ptr, win_ptr);
     if (mpi_errno)
         MPIU_ERR_POP(mpi_errno);
 
     (*win_ptr)->base = base;
 
-    /* FOR CREATE, alloc_shm info is default to set to FALSE */
-    (*win_ptr)->info_args.alloc_shm = FALSE;
-    if (info != NULL) {
-        int alloc_shm_flag = 0;
-        char shm_alloc_value[MPI_MAX_INFO_VAL+1];
-        MPIR_Info_get_impl(info, "alloc_shm", MPI_MAX_INFO_VAL, shm_alloc_value, &alloc_shm_flag);
-        if ((alloc_shm_flag == 1) && (!strncmp(shm_alloc_value, "true", sizeof("true"))))
-            (*win_ptr)->info_args.alloc_shm = TRUE;
-    }
-
     mpi_errno = MPIDI_CH3U_Win_fns.create(base, size, disp_unit, info, comm_ptr, win_ptr);
     if (mpi_errno)
         MPIU_ERR_POP(mpi_errno);
@@ -149,22 +139,11 @@ int MPID_Win_allocate(MPI_Aint size, int disp_unit, MPID_Info * info,
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_ALLOCATE);
 
     mpi_errno =
-        win_init(size, disp_unit, MPI_WIN_FLAVOR_ALLOCATE, MPI_WIN_UNIFIED, comm_ptr, win_ptr);
+        win_init(size, disp_unit, MPI_WIN_FLAVOR_ALLOCATE, MPI_WIN_UNIFIED, info, comm_ptr, win_ptr);
     if (mpi_errno != MPI_SUCCESS) {
         MPIU_ERR_POP(mpi_errno);
     }
 
-    /* FOR ALLOCATE, alloc_shm info is default to set to TRUE */
-    (*win_ptr)->info_args.alloc_shm = TRUE;
-
-    if (info != NULL) {
-        int alloc_shm_flag = 0;
-        char shm_alloc_value[MPI_MAX_INFO_VAL + 1];
-        MPIR_Info_get_impl(info, "alloc_shm", MPI_MAX_INFO_VAL, shm_alloc_value, &alloc_shm_flag);
-        if ((alloc_shm_flag == 1) && (!strncmp(shm_alloc_value, "false", sizeof("false"))))
-            (*win_ptr)->info_args.alloc_shm = FALSE;
-    }
-
     mpi_errno = MPIDI_CH3U_Win_fns.allocate(size, disp_unit, info, comm_ptr, baseptr, win_ptr);
     if (mpi_errno != MPI_SUCCESS) {
         MPIU_ERR_POP(mpi_errno);
@@ -190,7 +169,7 @@ int MPID_Win_create_dynamic(MPID_Info * info, MPID_Comm * comm_ptr, MPID_Win **
 
     mpi_errno = win_init(0 /* spec defines size to be 0 */ ,
                          1 /* spec defines disp_unit to be 1 */ ,
-                         MPI_WIN_FLAVOR_DYNAMIC, MPI_WIN_UNIFIED, comm_ptr, win_ptr);
+                         MPI_WIN_FLAVOR_DYNAMIC, MPI_WIN_UNIFIED, info, comm_ptr, win_ptr);
 
     if (mpi_errno)
         MPIU_ERR_POP(mpi_errno);
@@ -263,22 +242,10 @@ int MPID_Win_allocate_shared(MPI_Aint size, int disp_unit, MPID_Info * info, MPI
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_ALLOCATE_SHARED);
 
     mpi_errno =
-        win_init(size, disp_unit, MPI_WIN_FLAVOR_SHARED, MPI_WIN_UNIFIED, comm_ptr, win_ptr);
+        win_init(size, disp_unit, MPI_WIN_FLAVOR_SHARED, MPI_WIN_UNIFIED, info, comm_ptr, win_ptr);
     if (mpi_errno)
         MPIU_ERR_POP(mpi_errno);
 
-    /* FOR ALLOCATE_SHARED, alloc_shm info is default to set to TRUE */
-    (*win_ptr)->info_args.alloc_shm = TRUE;
-
-    if (info != NULL) {
-        int alloc_shm_flag = 0;
-        char shm_alloc_value[MPI_MAX_INFO_VAL + 1];
-        MPIR_Info_get_impl(info, "alloc_shm", MPI_MAX_INFO_VAL, shm_alloc_value, &alloc_shm_flag);
-        /* if value of 'alloc_shm' info is not set to true, throw an error */
-        if (alloc_shm_flag == 1 && strncmp(shm_alloc_value, "true", sizeof("true")))
-            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**infoval");
-    }
-
     mpi_errno =
         MPIDI_CH3U_Win_fns.allocate_shared(size, disp_unit, info, comm_ptr, base_ptr, win_ptr);
     if (mpi_errno != MPI_SUCCESS)
@@ -294,7 +261,7 @@ int MPID_Win_allocate_shared(MPI_Aint size, int disp_unit, MPID_Info * info, MPI
 #define FUNCNAME win_init
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
+static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model, MPID_Info *info,
                     MPID_Comm * comm_ptr, MPID_Win ** win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
@@ -369,6 +336,13 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
     (*win_ptr)->info_args.alloc_shared_noncontig = 0;
     (*win_ptr)->info_args.alloc_shm = FALSE;
 
+    /* Set function pointers on window */
+    MPID_WIN_FTABLE_SET_DEFAULTS(win_ptr);
+
+    /* Set info_args on window based on info provided by user */
+    mpi_errno = (*win_ptr)->RMAFns.Win_set_info((*win_ptr), info);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
     MPIU_CHKPMEM_MALLOC((*win_ptr)->op_pool_start, MPIDI_RMA_Op_t *,
                         sizeof(MPIDI_RMA_Op_t) * MPIR_CVAR_CH3_RMA_OP_WIN_POOL_SIZE, mpi_errno,
                         "RMA op pool");
@@ -410,8 +384,6 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
                       &((*win_ptr)->lock_entry_pool_start[i]));
     }
 
-    MPID_WIN_FTABLE_SET_DEFAULTS(win_ptr);
-
     /* enqueue window into the global list */
     MPIU_CHKPMEM_MALLOC(win_elem, MPIDI_RMA_Win_list_t *, sizeof(MPIDI_RMA_Win_list_t), mpi_errno,
                         "Window list element");

http://git.mpich.org/mpich.git/commitdiff/8098f89c63f8e2d9d301b9a19f9ddf74c8e3b7e4

commit 8098f89c63f8e2d9d301b9a19f9ddf74c8e3b7e4
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Fri Dec 12 10:30:23 2014 -0600

    Avoid memcpy if we decided to drop current op data.
    
    No reviewer.

diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_progress.c b/src/mpid/ch3/channels/nemesis/src/ch3_progress.c
index 21fb2d2..f022651 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_progress.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_progress.c
@@ -738,7 +738,9 @@ int MPID_nem_handle_pkt(MPIDI_VC_t *vc, char *buf, MPIDI_msg_sz_t buflen)
             {
                 size_t iov_len = iov->MPID_IOV_LEN;
 		MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "        %d", (int)iov_len);
-                MPIU_Memcpy (iov->MPID_IOV_BUF, buf, iov_len);
+                if (rreq->dev.drop_data == FALSE) {
+                    MPIU_Memcpy (iov->MPID_IOV_BUF, buf, iov_len);
+                }
 
                 buflen -= iov_len;
                 buf    += iov_len;
@@ -751,7 +753,9 @@ int MPID_nem_handle_pkt(MPIDI_VC_t *vc, char *buf, MPIDI_msg_sz_t buflen)
                 if (buflen > 0)
                 {
 		    MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "        " MPIDI_MSG_SZ_FMT, buflen);
-                    MPIU_Memcpy (iov->MPID_IOV_BUF, buf, buflen);
+                    if (rreq->dev.drop_data == FALSE) {
+                        MPIU_Memcpy (iov->MPID_IOV_BUF, buf, buflen);
+                    }
                     iov->MPID_IOV_BUF = (void *)((char *)iov->MPID_IOV_BUF + buflen);
                     iov->MPID_IOV_LEN -= buflen;
                     buflen = 0;
diff --git a/src/mpid/ch3/include/mpidimpl.h b/src/mpid/ch3/include/mpidimpl.h
index 90c1f46..233a7eb 100644
--- a/src/mpid/ch3/include/mpidimpl.h
+++ b/src/mpid/ch3/include/mpidimpl.h
@@ -345,6 +345,7 @@ extern MPIDI_Process_t MPIDI_Process;
     (sreq_)->dev.match.parts.context_id = comm->context_id + context_offset;	\
     (sreq_)->dev.user_buf = (void *) buf;			\
     (sreq_)->dev.user_count = count;				\
+    (sreq_)->dev.drop_data = FALSE;                             \
     (sreq_)->dev.datatype = datatype;				\
     (sreq_)->dev.datatype_ptr	   = NULL;                      \
     (sreq_)->dev.segment_ptr	   = NULL;                      \
@@ -384,6 +385,7 @@ extern MPIDI_Process_t MPIDI_Process;
     (rreq_)->dev.iov_offset   = 0;                              \
     (rreq_)->dev.OnDataAvail	   = NULL;                      \
     (rreq_)->dev.OnFinal	   = NULL;                      \
+    (rreq_)->dev.drop_data = FALSE;                             \
      MPIDI_CH3_REQUEST_INIT(rreq_);\
 }
 
diff --git a/src/mpid/ch3/include/mpidpre.h b/src/mpid/ch3/include/mpidpre.h
index 86ff728..85f24d9 100644
--- a/src/mpid/ch3/include/mpidpre.h
+++ b/src/mpid/ch3/include/mpidpre.h
@@ -374,6 +374,7 @@ typedef struct MPIDI_Request {
     void        *user_buf;
     int          user_count;
     MPI_Datatype datatype;
+    int drop_data;
 
     /* segment, segment_first, and segment_size are used when processing 
        non-contiguous datatypes */
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index 44b9167..8b5bb30 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -417,6 +417,7 @@ static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
 
         /* fill in area in req that will be used in Receive_data_found() */
         if (lock_discarded || data_discarded) {
+            req->dev.drop_data = TRUE;
             req->dev.user_buf = NULL;
             req->dev.user_count = target_count;
             req->dev.datatype = target_dtp;
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_pkt.c b/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
index 328479f..c2ae4a7 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
@@ -148,7 +148,9 @@ int MPIDI_CH3U_Receive_data_found(MPID_Request *rreq, char *buf, MPIDI_msg_sz_t
         {
             MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"Copying contiguous data to user buffer");
             /* copy data out of the receive buffer */
-            MPIU_Memcpy((char*)(rreq->dev.user_buf) + dt_true_lb, buf, data_sz);
+            if (rreq->dev.drop_data == FALSE) {
+                MPIU_Memcpy((char*)(rreq->dev.user_buf) + dt_true_lb, buf, data_sz);
+            }
             *buflen = data_sz;
             *complete = TRUE;
         }
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index 671febf..1449bb7 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -1462,6 +1462,9 @@ int MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete( MPIDI_VC_t *vc,
     /* This handler is triggered when we received all data of a lock queue
        entry */
 
+    /* Note that if we decided to drop op data, here we just need to complete this
+       request; otherwise we try to get the lock again in this handler. */
+    if (rreq->dev.drop_data == FALSE) {
     MPIU_Assert(lock_queue_entry != NULL);
 
     /* Mark all data received in lock queue entry */
@@ -1487,6 +1490,7 @@ int MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete( MPIDI_VC_t *vc,
     /* If try acquiring lock failed, just leave the lock queue entry in the queue with
        all_data_recved marked as 1, release_lock() function will traverse the queue
        and find entry with all_data_recved being 1 to grant the lock. */
+    }
 
     /* mark receive data transfer as complete and decrement CC in receive
        request */
diff --git a/src/mpid/ch3/src/ch3u_request.c b/src/mpid/ch3/src/ch3u_request.c
index 7547ce5..7e795b2 100644
--- a/src/mpid/ch3/src/ch3u_request.c
+++ b/src/mpid/ch3/src/ch3u_request.c
@@ -93,6 +93,7 @@ MPID_Request * MPID_Request_create(void)
         req->dev.OnFinal           = NULL;
         req->dev.user_buf          = NULL;
         req->dev.final_user_buf    = NULL;
+        req->dev.drop_data         = FALSE;
 #ifdef MPIDI_CH3_REQUEST_INIT
 	MPIDI_CH3_REQUEST_INIT(req);
 #endif

http://git.mpich.org/mpich.git/commitdiff/45afd1fdb5329bdbf047ac1d5b263c478585ea20

commit 45afd1fdb5329bdbf047ac1d5b263c478585ea20
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Mon Dec 8 16:09:44 2014 -0600

    Support handling different LOCK ACKs
    
    No reviewer.

diff --git a/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c b/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c
index c342a56..0c5ec7c 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c
@@ -2160,7 +2160,8 @@ int MPID_nem_ib_PktHandler_GetResp(MPIDI_VC_t * vc,
 
     /* decrement ack_counter on target */
     if (get_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
-        mpi_errno = set_lock_sync_counter(win_ptr, target_rank);
+        mpi_errno = set_lock_sync_counter(win_ptr, target_rank,
+                                          get_resp_pkt->flags);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     if (get_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK) {
diff --git a/src/mpid/ch3/include/mpidimpl.h b/src/mpid/ch3/include/mpidimpl.h
index a0dc6bf..90c1f46 100644
--- a/src/mpid/ch3/include/mpidimpl.h
+++ b/src/mpid/ch3/include/mpidimpl.h
@@ -1826,6 +1826,8 @@ int MPIDI_CH3_PktHandler_Lock( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *,
 			      MPIDI_msg_sz_t *, MPID_Request ** );
 int MPIDI_CH3_PktHandler_LockAck( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *,
 				      MPIDI_msg_sz_t *, MPID_Request ** );
+int MPIDI_CH3_PktHandler_LockOpAck( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *,
+                                    MPIDI_msg_sz_t *, MPID_Request ** );
 int MPIDI_CH3_PktHandler_Unlock( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *,
                                  MPIDI_msg_sz_t *, MPID_Request ** );
 int MPIDI_CH3_PktHandler_Flush( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *,
diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index c3c444a..385c99f 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -90,6 +90,7 @@ typedef enum {
     MPIDI_CH3_PKT_FLUSH,
     MPIDI_CH3_PKT_DECR_AT_COUNTER,
     MPIDI_CH3_PKT_LOCK_ACK,
+    MPIDI_CH3_PKT_LOCK_OP_ACK,
     MPIDI_CH3_PKT_FLUSH_ACK,
     /* RMA Packets end here */
     MPIDI_CH3_PKT_FLOW_CNTL_UPDATE,     /* FIXME: Unused */
@@ -378,6 +379,63 @@ MPIDI_CH3_PKT_DEFS
         }                                                               \
     }
 
+#define MPIDI_CH3_PKT_RMA_ERASE_FLAGS(pkt_, err_)                       \
+    {                                                                   \
+        err_ = MPI_SUCCESS;                                             \
+        switch((pkt_).type) {                                           \
+        case (MPIDI_CH3_PKT_PUT):                                       \
+            (pkt_).put.flags = MPIDI_CH3_PKT_FLAG_NONE;                 \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_GET):                                       \
+            (pkt_).get.flags = MPIDI_CH3_PKT_FLAG_NONE;                 \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_ACCUMULATE):                                \
+            (pkt_).accum.flags = MPIDI_CH3_PKT_FLAG_NONE;               \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_GET_ACCUM):                                 \
+            (pkt_).get_accum.flags = MPIDI_CH3_PKT_FLAG_NONE;           \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_CAS):                                       \
+            (pkt_).cas.flags = MPIDI_CH3_PKT_FLAG_NONE;                 \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_FOP):                                       \
+            (pkt_).fop.flags = MPIDI_CH3_PKT_FLAG_NONE;                 \
+            break;                                                      \
+        default:                                                        \
+            MPIU_ERR_SETANDJUMP1(err_, MPI_ERR_OTHER, "**invalidpkt", "**invalidpkt %d", (pkt_).type); \
+        }                                                               \
+    }
+
+#define MPIDI_CH3_PKT_RMA_GET_SOURCE_WIN_HANDLE(pkt_, win_hdl_, err_)   \
+    {                                                                   \
+        err_ = MPI_SUCCESS;                                             \
+        switch((pkt_).type) {                                           \
+        case (MPIDI_CH3_PKT_PUT):                                       \
+            win_hdl_ = (pkt_).put.source_win_handle;                    \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_GET):                                       \
+            win_hdl_ = (pkt_).get.source_win_handle;                    \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_ACCUMULATE):                                \
+            win_hdl_ = (pkt_).accum.source_win_handle;                  \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_GET_ACCUM):                                 \
+            win_hdl_ = (pkt_).get_accum.source_win_handle;              \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_CAS):                                       \
+            win_hdl_ = (pkt_).cas.source_win_handle;                    \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_FOP):                                       \
+            win_hdl_ = (pkt_).fop.source_win_handle;                    \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_LOCK):                                      \
+            win_hdl_ = (pkt_).lock.source_win_handle;                   \
+            break;                                                      \
+        default:                                                        \
+            MPIU_ERR_SETANDJUMP1(err_, MPI_ERR_OTHER, "**invalidpkt", "**invalidpkt %d", (pkt_).type); \
+        }                                                               \
+    }
+
 #define MPIDI_CH3_PKT_RMA_GET_TARGET_WIN_HANDLE(pkt_, win_hdl_, err_)   \
     {                                                                   \
         err_ = MPI_SUCCESS;                                             \
@@ -641,7 +699,7 @@ typedef struct MPIDI_CH3_Pkt_decr_at_counter {
 
 /*********************************************************************************/
 /* RMA control response packet (from target to origin, including LOCK_ACK,       */
-/* FLUSH_ACK)                                                                    */
+/* LOCK_OP_ACK, FLUSH_ACK)                                                       */
 /*********************************************************************************/
 
 typedef struct MPIDI_CH3_Pkt_lock_ack {
@@ -651,6 +709,13 @@ typedef struct MPIDI_CH3_Pkt_lock_ack {
     int target_rank;
 } MPIDI_CH3_Pkt_lock_ack_t;
 
+typedef struct MPIDI_CH3_Pkt_lock_op_ack {
+    MPIDI_CH3_Pkt_type_t type;
+    MPIDI_CH3_Pkt_flags_t flags;
+    MPI_Win source_win_handle;
+    int target_rank;
+} MPIDI_CH3_Pkt_lock_op_ack_t;
+
 typedef struct MPIDI_CH3_Pkt_flush_ack {
     MPIDI_CH3_Pkt_type_t type;
     MPI_Win source_win_handle;
@@ -699,6 +764,7 @@ typedef union MPIDI_CH3_Pkt {
     MPIDI_CH3_Pkt_flush_t flush;
     MPIDI_CH3_Pkt_decr_at_counter_t decr_at_cnt;
     MPIDI_CH3_Pkt_lock_ack_t lock_ack;
+    MPIDI_CH3_Pkt_lock_op_ack_t lock_op_ack;
     MPIDI_CH3_Pkt_flush_ack_t flush_ack;
     /* RMA packets end here */
     MPIDI_CH3_Pkt_close_t close;
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index daf9542..44b9167 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -142,6 +142,48 @@ static inline int MPIDI_CH3I_Send_lock_ack_pkt(MPIDI_VC_t * vc, MPID_Win * win_p
     return mpi_errno;
 }
 
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Send_lock_op_ack_pkt
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+static inline int MPIDI_CH3I_Send_lock_op_ack_pkt(MPIDI_VC_t * vc, MPID_Win * win_ptr,
+                                                  MPIDI_CH3_Pkt_flags_t flags,
+                                                  MPI_Win source_win_handle)
+{
+    MPIDI_CH3_Pkt_t upkt;
+    MPIDI_CH3_Pkt_lock_op_ack_t *lock_op_ack_pkt = &upkt.lock_op_ack;
+    MPID_Request *req = NULL;
+    int mpi_errno;
+    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_LOCK_OP_ACK_PKT);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_OP_ACK_PKT);
+
+    /* send lock ack packet */
+    MPIDI_Pkt_init(lock_op_ack_pkt, MPIDI_CH3_PKT_LOCK_OP_ACK);
+    lock_op_ack_pkt->source_win_handle = source_win_handle;
+    lock_op_ack_pkt->target_rank = win_ptr->comm_ptr->rank;
+    lock_op_ack_pkt->flags = flags;
+
+    MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE,
+                     (MPIU_DBG_FDEST, "sending lock op ack pkt on vc=%p, source_win_handle=%#08x",
+                      vc, lock_op_ack_pkt->source_win_handle));
+
+    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
+    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_op_ack_pkt, sizeof(*lock_op_ack_pkt), &req);
+    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
+    if (mpi_errno) {
+        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
+    }
+
+    if (req != NULL) {
+        MPID_Request_release(req);
+    }
+
+  fn_fail:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_OP_ACK_PKT);
+    return mpi_errno;
+}
+
 
 #undef FUNCNAME
 #define FUNCNAME MPIDI_CH3I_Send_flush_ack_pkt
@@ -269,25 +311,33 @@ static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                                       MPID_Request **reqp)
 {
     MPIDI_RMA_Lock_entry_t *new_ptr = NULL;
+    MPIDI_CH3_Pkt_flags_t flag;
+    MPI_Win source_win_handle;
+    int lock_discarded = 0, data_discarded = 0;
     int mpi_errno = MPI_SUCCESS;
 
     (*reqp) = NULL;
 
     new_ptr = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, pkt);
-    if (new_ptr == NULL) {
-        /* FIXME: we run out of resources of lock requests, needs to
-           send LOCK DISCARDED packet back to origin */
+    if (new_ptr != NULL) {
+        MPL_LL_APPEND(win_ptr->lock_queue, win_ptr->lock_queue_tail, new_ptr);
+    }
+    else {
+        lock_discarded = 1;
     }
-    MPL_LL_APPEND(win_ptr->lock_queue, win_ptr->lock_queue_tail, new_ptr);
 
     if (pkt->type == MPIDI_CH3_PKT_LOCK ||
         pkt->type == MPIDI_CH3_PKT_GET ||
         pkt->type == MPIDI_CH3_PKT_FOP ||
         pkt->type == MPIDI_CH3_PKT_CAS) {
-        new_ptr->all_data_recved = 1;
+
         /* return bytes of data processed in this pkt handler */
         (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
-        goto fn_exit;
+
+        if (new_ptr != NULL)
+            new_ptr->all_data_recved = 1;
+
+        goto issue_ack;
     }
     else {
         MPI_Aint type_size = 0;
@@ -310,18 +360,55 @@ static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
         recv_data_sz = type_size * target_count;
 
         if (recv_data_sz <= MPIDI_RMA_IMMED_BYTES) {
-            /* all data fits in packet header */
-            new_ptr->all_data_recved = 1;
+
             /* return bytes of data processed in this pkt handler */
             (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
-            goto fn_exit;
+
+            if (new_ptr != NULL)
+                new_ptr->all_data_recved = 1;
+
+            goto issue_ack;
         }
 
-        /* allocate tmp buffer to recieve data. */
-        new_ptr->data = MPIU_Malloc(recv_data_sz);
-        if (new_ptr->data == NULL) {
-            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %d",
-                                 recv_data_sz);
+        if (new_ptr != NULL) {
+            if (win_ptr->current_lock_data_bytes + recv_data_sz
+                < MPIR_CVAR_CH3_RMA_LOCK_DATA_BYTES) {
+                new_ptr->data = MPIU_Malloc(recv_data_sz);
+            }
+
+            if (new_ptr->data == NULL) {
+                /* Note that there are two possible reasons to make new_ptr->data to be NULL:
+                 * (1) win_ptr->current_lock_data_bytes + recv_data_sz >= MPIR_CVAR_CH3_RMA_LOCK_DATA_BYTES;
+                 * (2) MPIU_Malloc(recv_data_sz) failed.
+                 * In such cases, we cannot allocate memory for lock data, so we give up
+                 * buffering lock data, however, we still buffer lock request.
+                 */
+                MPIDI_CH3_Pkt_t new_pkt;
+                MPIDI_CH3_Pkt_lock_t *lock_pkt = &new_pkt.lock;
+                MPI_Win target_win_handle;
+                int lock_type, origin_rank;
+
+                MPIDI_CH3_PKT_RMA_GET_TARGET_WIN_HANDLE((*pkt), target_win_handle, mpi_errno);
+                MPIDI_CH3_PKT_RMA_GET_SOURCE_WIN_HANDLE((*pkt), source_win_handle, mpi_errno);
+                MPIDI_CH3_PKT_RMA_GET_ORIGIN_RANK((*pkt), origin_rank, mpi_errno);
+                MPIDI_CH3_PKT_RMA_GET_LOCK_TYPE((*pkt), lock_type, mpi_errno);
+
+                MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
+                lock_pkt->target_win_handle = target_win_handle;
+                lock_pkt->source_win_handle = source_win_handle;
+                lock_pkt->lock_type = lock_type;
+                lock_pkt->origin_rank = origin_rank;
+
+                /* replace original pkt with lock pkt */
+                new_ptr->pkt = new_pkt;
+                new_ptr->all_data_recved = 1;
+
+                data_discarded = 1;
+            }
+            else {
+                win_ptr->current_lock_data_bytes += recv_data_sz;
+                new_ptr->data_size = recv_data_sz;
+            }
         }
 
         /* create request to receive upcoming requests */
@@ -329,27 +416,44 @@ static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
         MPIU_Object_set_ref(req, 1);
 
         /* fill in area in req that will be used in Receive_data_found() */
-        req->dev.user_buf = new_ptr->data;
-        req->dev.user_count = target_count;
-        req->dev.datatype = target_dtp;
-        req->dev.recv_data_sz = recv_data_sz;
-        req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
-        req->dev.OnFinal = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
-        req->dev.lock_queue_entry = new_ptr;
-
-        MPIDI_CH3_PKT_RMA_GET_IMMED_LEN((*pkt), immed_len, mpi_errno);
-        MPIDI_CH3_PKT_RMA_GET_IMMED_DATA_PTR((*pkt), immed_data, mpi_errno);
-
-        if (immed_len > 0) {
-            /* see if we can receive some data from packet header */
-            MPIU_Memcpy(req->dev.user_buf, immed_data, (size_t)immed_len);
-            req->dev.user_buf = (void*)((char*)req->dev.user_buf + immed_len);
-            req->dev.recv_data_sz -= immed_len;
+        if (lock_discarded || data_discarded) {
+            req->dev.user_buf = NULL;
+            req->dev.user_count = target_count;
+            req->dev.datatype = target_dtp;
+            req->dev.recv_data_sz = recv_data_sz;
+            req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
+            req->dev.OnFinal = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
+
+            MPIDI_CH3_PKT_RMA_GET_IMMED_LEN((*pkt), immed_len, mpi_errno);
+            if (immed_len > 0) {
+                req->dev.recv_data_sz -= immed_len;
+            }
+            data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
+            data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
+            MPIU_Assert(req->dev.recv_data_sz > 0);
+        }
+        else {
+            req->dev.user_buf = new_ptr->data;
+            req->dev.user_count = target_count;
+            req->dev.datatype = target_dtp;
+            req->dev.recv_data_sz = recv_data_sz;
+            req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
+            req->dev.OnFinal = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
+            req->dev.lock_queue_entry = new_ptr;
+
+            MPIDI_CH3_PKT_RMA_GET_IMMED_LEN((*pkt), immed_len, mpi_errno);
+            MPIDI_CH3_PKT_RMA_GET_IMMED_DATA_PTR((*pkt), immed_data, mpi_errno);
+
+            if (immed_len > 0) {
+                /* see if we can receive some data from packet header */
+                MPIU_Memcpy(req->dev.user_buf, immed_data, (size_t)immed_len);
+                req->dev.user_buf = (void*)((char*)req->dev.user_buf + immed_len);
+                req->dev.recv_data_sz -= immed_len;
+            }
+            data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
+            data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
+            MPIU_Assert(req->dev.recv_data_sz > 0);
         }
-
-        data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
-        data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
-        MPIU_Assert(req->dev.recv_data_sz > 0);
 
         mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
@@ -361,13 +465,31 @@ static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
             mpi_errno = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete(vc, req, &complete);
             if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
             if (complete) {
-                goto fn_exit;
+                goto issue_ack;
             }
         }
 
         (*reqp) = req;
     }
 
+ issue_ack:
+    MPIDI_CH3_PKT_RMA_GET_SOURCE_WIN_HANDLE((*pkt), source_win_handle, mpi_errno);
+    if (pkt->type == MPIDI_CH3_PKT_LOCK) {
+        if (lock_discarded) flag = MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED;
+        else flag = MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_QUEUED;
+
+        mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr, flag, source_win_handle);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    }
+    else {
+        if (lock_discarded) flag = MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED;
+        else if (data_discarded) flag = MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_DISCARDED;
+        else flag = MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_QUEUED;
+
+        mpi_errno = MPIDI_CH3I_Send_lock_op_ack_pkt(vc, win_ptr, flag, source_win_handle);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    }
+
  fn_exit:
     return mpi_errno;
  fn_fail:
@@ -375,7 +497,8 @@ static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
 }
 
 
-static inline int set_lock_sync_counter(MPID_Win *win_ptr, int target_rank)
+static inline int set_lock_sync_counter(MPID_Win *win_ptr, int target_rank,
+                                        MPIDI_CH3_Pkt_flags_t flags)
 {
     MPIDI_RMA_Target_t *t = NULL;
     int mpi_errno = MPI_SUCCESS;
@@ -390,21 +513,125 @@ static inline int set_lock_sync_counter(MPID_Win *win_ptr, int target_rank)
         MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
         if (win_ptr->comm_ptr->rank == target_rank ||
             (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
-            win_ptr->outstanding_locks--;
-            MPIU_Assert(win_ptr->outstanding_locks >= 0);
+            if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
+                win_ptr->outstanding_locks--;
+                MPIU_Assert(win_ptr->outstanding_locks >= 0);
+            }
+            else if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED) {
+                /* re-send lock request message. */
+                mpi_errno = send_lock_msg(target_rank, MPI_LOCK_SHARED, win_ptr);
+                if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+            }
             goto fn_exit;
         }
     }
     else if (win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED) {
-        win_ptr->outstanding_locks--;
-        MPIU_Assert(win_ptr->outstanding_locks >= 0);
+        if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
+            win_ptr->outstanding_locks--;
+            MPIU_Assert(win_ptr->outstanding_locks >= 0);
+        }
+        else if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED) {
+            /* re-send lock request message. */
+            mpi_errno = send_lock_msg(target_rank, MPI_LOCK_SHARED, win_ptr);
+            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+        }
         goto fn_exit;
     }
 
     mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &t);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
     MPIU_Assert(t != NULL);
-    t->access_state = MPIDI_RMA_LOCK_GRANTED;
+
+    if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED)
+        t->access_state = MPIDI_RMA_LOCK_GRANTED;
+
+    if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED)
+        t->access_state = MPIDI_RMA_LOCK_CALLED;
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+static inline int adjust_op_piggybacked_with_lock (MPID_Win *win_ptr,
+                                                   int target_rank,
+                                                   MPIDI_CH3_Pkt_flags_t flags) {
+    MPIDI_RMA_Target_t *target = NULL;
+    MPIDI_RMA_Op_t *op = NULL;
+    MPIDI_CH3_Pkt_flags_t op_flags = MPIDI_CH3_PKT_FLAG_NONE;
+    int mpi_errno = MPI_SUCCESS;
+
+    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &target);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    MPIU_Assert(target != NULL);
+
+    op = target->pending_op_list;
+    if (op != NULL) MPIDI_CH3_PKT_RMA_GET_FLAGS(op->pkt, op_flags, mpi_errno);
+
+    if (op_flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK) {
+        if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED ||
+            flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_QUEUED) {
+            if (!op->request) {
+                if (op->ureq) {
+                    /* Complete user request and release the ch3 ref */
+                    MPID_Request_set_completed(op->ureq);
+                    MPID_Request_release(op->ureq);
+                }
+
+                MPIDI_CH3I_RMA_Ops_free_elem(win_ptr, &(target->pending_op_list),
+                                             &(target->pending_op_list_tail), op);
+            }
+            else {
+                MPIDI_CH3I_RMA_Ops_unlink(&(target->pending_op_list),
+                                          &(target->pending_op_list_tail), op);
+                if (op->is_dt) {
+                    MPIDI_CH3I_RMA_Ops_append(&(target->dt_op_list),
+                                              &(target->dt_op_list_tail), op);
+                }
+                else if (op->pkt.type == MPIDI_CH3_PKT_PUT ||
+                         op->pkt.type == MPIDI_CH3_PKT_ACCUMULATE) {
+                    MPIDI_CH3I_RMA_Ops_append(&(target->write_op_list),
+                                              &(target->write_op_list_tail), op);
+                }
+                else {
+                    MPIDI_CH3I_RMA_Ops_append(&(target->read_op_list),
+                                              &(target->read_op_list_tail), op);
+                }
+
+                if (op->ureq) {
+                    if (MPID_Request_is_complete(op->request)) {
+                        /* Complete user request, let cleanup function to release
+                           ch3 ref */
+                        MPID_Request_set_completed(op->ureq);
+                    }
+                    else {
+                        /* Increase ref for completion handler */
+                        MPIU_Object_add_ref(op->ureq);
+                        op->request->dev.request_handle = op->ureq->handle;
+                        if (op->request->dev.OnDataAvail == NULL) {
+                            op->request->dev.OnDataAvail = MPIDI_CH3_ReqHandler_ReqOpsComplete;
+                        }
+                        op->request->dev.OnFinal = MPIDI_CH3_ReqHandler_ReqOpsComplete;
+                    }
+                }
+            }
+        }
+        else if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_DISCARDED ||
+                 flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED) {
+            /* We need to re-transmit this operation, so we destroy
+               the internal request and erase all flags in current
+               operation. */
+            if (op->request) {
+                MPIDI_CH3_Request_destroy(op->request);
+                op->request = NULL;
+                win_ptr->active_req_cnt--;
+            }
+            MPIDI_CH3_PKT_RMA_ERASE_FLAGS(op->pkt, mpi_errno);
+
+            target->next_op_to_issue = op;
+        }
+    }
 
  fn_exit:
     return mpi_errno;
@@ -426,7 +653,8 @@ static inline int acquire_local_lock(MPID_Win * win_ptr, int lock_type)
     MPIR_T_PVAR_TIMER_START(RMA, rma_winlock_getlocallock);
 
     if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 1) {
-        mpi_errno = set_lock_sync_counter(win_ptr, win_ptr->comm_ptr->rank);
+        mpi_errno = set_lock_sync_counter(win_ptr, win_ptr->comm_ptr->rank,
+                                          MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     else {
@@ -441,8 +669,10 @@ static inline int acquire_local_lock(MPID_Win * win_ptr, int lock_type)
 
         new_ptr = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, &pkt);
         if (new_ptr == NULL) {
-            /* FIXME: we run out of resources of lock requests, needs to
-               send LOCK DISCARDED packet back to origin */
+            mpi_errno = set_lock_sync_counter(win_ptr, win_ptr->comm_ptr->rank,
+                                              MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED);
+            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
         }
         MPL_LL_APPEND(win_ptr->lock_queue, win_ptr->lock_queue_tail, new_ptr);
 
@@ -609,7 +839,6 @@ static inline int check_piggyback_lock(MPID_Win *win_ptr, MPIDI_VC_t *vc,
             /* cannot acquire the lock, queue up this operation. */
             mpi_errno = enqueue_lock_origin(win_ptr, vc, pkt, buflen, reqp);
             if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
             (*acquire_lock_fail) = 1;
         }
     }
@@ -633,9 +862,9 @@ static inline int finish_op_on_target(MPID_Win *win_ptr, MPIDI_VC_t *vc,
             if ((flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
                 (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
                 pkt_flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
-            mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr,
-                                                     pkt_flags,
-                                                     source_win_handle);
+            mpi_errno = MPIDI_CH3I_Send_lock_op_ack_pkt(vc, win_ptr,
+                                                        pkt_flags,
+                                                        source_win_handle);
             if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
             MPIDI_CH3_Progress_signal_completion();
         }
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_pkt.c b/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
index 3bf484f..328479f 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
@@ -581,6 +581,8 @@ int MPIDI_CH3_PktHandler_Init( MPIDI_CH3_PktHandler_Fcn *pktArray[],
 	MPIDI_CH3_PktHandler_Lock;
     pktArray[MPIDI_CH3_PKT_LOCK_ACK] =
 	MPIDI_CH3_PktHandler_LockAck;
+    pktArray[MPIDI_CH3_PKT_LOCK_OP_ACK] =
+	MPIDI_CH3_PktHandler_LockOpAck;
     pktArray[MPIDI_CH3_PKT_UNLOCK] =
         MPIDI_CH3_PktHandler_Unlock;
     pktArray[MPIDI_CH3_PKT_FLUSH] =
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index 5ef4bff..671febf 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -1292,7 +1292,8 @@ static inline int perform_op_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_ent
 
         MPIDI_CH3_Pkt_lock_t *lock_pkt = &(lock_entry->pkt.lock);
         if (lock_pkt->origin_rank == win_ptr->comm_ptr->rank) {
-            mpi_errno = set_lock_sync_counter(win_ptr, lock_pkt->origin_rank);
+            mpi_errno = set_lock_sync_counter(win_ptr, lock_pkt->origin_rank,
+                                              MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         }
         else {
diff --git a/src/mpid/ch3/src/ch3u_rma_oplist.c b/src/mpid/ch3/src/ch3u_rma_oplist.c
index 7d6a2c7..4261903 100644
--- a/src/mpid/ch3/src/ch3u_rma_oplist.c
+++ b/src/mpid/ch3/src/ch3u_rma_oplist.c
@@ -355,12 +355,25 @@ static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t *targe
         if (mpi_errno != MPI_SUCCESS)
             MPIU_ERR_POP(mpi_errno);
 
+        (*made_progress) = 1;
+
+        if (curr_op->request != NULL)
+            win_ptr->active_req_cnt++;
+
         if (curr_op->pkt.type == MPIDI_CH3_PKT_PUT ||
             curr_op->pkt.type == MPIDI_CH3_PKT_ACCUMULATE) {
             target->put_acc_issued = 1; /* set PUT_ACC_FLAG when sending
                                            PUT/ACC operation. */
         }
 
+        if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK) {
+            /* If this operation is piggybacked with LOCK,
+               do not move it out of pending list, and do
+               not complete the user request, because we
+               may need to re-transmit it. */
+            break;
+        }
+
         if (!curr_op->request) {
             if (curr_op->ureq) {
                 /* Complete user request and release the ch3 ref */
@@ -374,6 +387,7 @@ static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t *targe
         }
         else {
             /* Sending is not completed immediately. */
+
             MPIDI_CH3I_RMA_Ops_unlink(&(target->pending_op_list),
                                       &(target->pending_op_list_tail), curr_op);
             if (curr_op->is_dt) {
@@ -418,13 +432,10 @@ static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t *targe
                 }
                 curr_op->request->dev.OnFinal = MPIDI_CH3_ReqHandler_ReqOpsComplete;
             }
-            win_ptr->active_req_cnt++;
         }
 
         curr_op = target->next_op_to_issue;
 
-        (*made_progress) = 1;
-
     } /* end of while loop */
 
   fn_exit:
diff --git a/src/mpid/ch3/src/ch3u_rma_pkthandler.c b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
index 1d1718c..fa69d51 100644
--- a/src/mpid/ch3/src/ch3u_rma_pkthandler.c
+++ b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
@@ -1016,7 +1016,12 @@ int MPIDI_CH3_PktHandler_CASResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 
     /* decrement ack_counter on this target */
     if (cas_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
-        mpi_errno = set_lock_sync_counter(win_ptr, target_rank);
+        mpi_errno = set_lock_sync_counter(win_ptr, target_rank,
+                                          cas_resp_pkt->flags);
+        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+        mpi_errno = adjust_op_piggybacked_with_lock(win_ptr, target_rank,
+                                                    cas_resp_pkt->flags);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     if (cas_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK) {
@@ -1179,7 +1184,12 @@ int MPIDI_CH3_PktHandler_FOPResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 
     /* decrement ack_counter */
     if (fop_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
-        mpi_errno = set_lock_sync_counter(win_ptr, target_rank);
+        mpi_errno = set_lock_sync_counter(win_ptr, target_rank,
+                                          fop_resp_pkt->flags);
+        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+        mpi_errno = adjust_op_piggybacked_with_lock(win_ptr, target_rank,
+                                                    fop_resp_pkt->flags);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     if (fop_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK) {
@@ -1230,7 +1240,12 @@ int MPIDI_CH3_PktHandler_Get_AccumResp(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
     /* decrement ack_counter on target */
     if (get_accum_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
-        mpi_errno = set_lock_sync_counter(win_ptr, target_rank);
+        mpi_errno = set_lock_sync_counter(win_ptr, target_rank,
+                                          get_accum_resp_pkt->flags);
+        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+        mpi_errno = adjust_op_piggybacked_with_lock(win_ptr, target_rank,
+                                                    get_accum_resp_pkt->flags);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     if (get_accum_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK) {
@@ -1362,7 +1377,12 @@ int MPIDI_CH3_PktHandler_GetResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 
     /* decrement ack_counter on target */
     if (get_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
-        mpi_errno = set_lock_sync_counter(win_ptr, target_rank);
+        mpi_errno = set_lock_sync_counter(win_ptr, target_rank,
+                                          get_resp_pkt->flags);
+        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+        mpi_errno = adjust_op_piggybacked_with_lock(win_ptr, target_rank,
+                                                    get_resp_pkt->flags);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     if (get_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK) {
@@ -1432,7 +1452,6 @@ int MPIDI_CH3_PktHandler_LockAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIDI_CH3_Pkt_lock_ack_t *lock_ack_pkt = &pkt->lock_ack;
     MPID_Win *win_ptr = NULL;
     int target_rank = lock_ack_pkt->target_rank;
-    MPIDI_CH3_Pkt_flags_t flags = lock_ack_pkt->flags;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKACK);
 
@@ -1446,9 +1465,49 @@ int MPIDI_CH3_PktHandler_LockAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
     MPID_Win_get_ptr(lock_ack_pkt->source_win_handle, win_ptr);
 
-    mpi_errno = set_lock_sync_counter(win_ptr, target_rank);
+    mpi_errno = set_lock_sync_counter(win_ptr, target_rank,
+                                      lock_ack_pkt->flags);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
+    *rreqp = NULL;
+    MPIDI_CH3_Progress_signal_completion();
+
+    MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_lock_ack);
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKACK);
+ fn_exit:
+    return MPI_SUCCESS;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3_PktHandler_LockOpAck
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPIDI_CH3_PktHandler_LockOpAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                   MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp)
+{
+    MPIDI_CH3_Pkt_lock_op_ack_t *lock_op_ack_pkt = &pkt->lock_op_ack;
+    MPID_Win *win_ptr = NULL;
+    int target_rank = lock_op_ack_pkt->target_rank;
+    MPIDI_CH3_Pkt_flags_t flags = lock_op_ack_pkt->flags;
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKOPACK);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKOPACK);
+
+    *buflen = sizeof(MPIDI_CH3_Pkt_t);
+
+    MPID_Win_get_ptr(lock_op_ack_pkt->source_win_handle, win_ptr);
+
+    mpi_errno = set_lock_sync_counter(win_ptr, target_rank,
+                                      lock_op_ack_pkt->flags);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+    mpi_errno = adjust_op_piggybacked_with_lock(win_ptr, target_rank,
+                                                lock_op_ack_pkt->flags);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
     if (flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK) {
         MPIU_Assert(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED);
         mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
@@ -1458,8 +1517,7 @@ int MPIDI_CH3_PktHandler_LockAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     *rreqp = NULL;
     MPIDI_CH3_Progress_signal_completion();
 
-    MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_lock_ack);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKACK);
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKOPACK);
  fn_exit:
     return MPI_SUCCESS;
  fn_fail:

http://git.mpich.org/mpich.git/commitdiff/b8c9f31b707fe6485ac88705aa3a3caab2aa35d2

commit b8c9f31b707fe6485ac88705aa3a3caab2aa35d2
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Mon Dec 15 16:11:21 2014 -0600

    Tracking current buffered lock data on each window
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpid_rma_lockqueue.h b/src/mpid/ch3/include/mpid_rma_lockqueue.h
index a2633aa..0e1919b 100644
--- a/src/mpid/ch3/include/mpid_rma_lockqueue.h
+++ b/src/mpid/ch3/include/mpid_rma_lockqueue.h
@@ -33,6 +33,7 @@ static inline MPIDI_RMA_Lock_entry_t *MPIDI_CH3I_Win_lock_entry_alloc(MPID_Win *
         new_ptr->next = NULL;
         new_ptr->pkt = (*pkt);
         new_ptr->data = NULL;
+        new_ptr->data_size = 0;
         new_ptr->all_data_recved = 0;
     }
 
@@ -51,6 +52,7 @@ static inline int MPIDI_CH3I_Win_lock_entry_free(MPID_Win * win_ptr,
     int mpi_errno = MPI_SUCCESS;
 
     if (lock_entry->data != NULL) {
+        win_ptr->current_lock_data_bytes -= lock_entry->data_size;
         MPIU_Free(lock_entry->data);
     }
 
diff --git a/src/mpid/ch3/include/mpid_rma_types.h b/src/mpid/ch3/include/mpid_rma_types.h
index a43fa0d..9089b1a 100644
--- a/src/mpid/ch3/include/mpid_rma_types.h
+++ b/src/mpid/ch3/include/mpid_rma_types.h
@@ -132,6 +132,7 @@ typedef struct MPIDI_RMA_Lock_entry {
     struct MPIDI_RMA_Lock_entry *next;
     MPIDI_CH3_Pkt_t pkt;    /* all information for this request packet */
     void *data;             /* for queued PUTs / ACCs / GACCs, data is copied here */
+    int data_size;
     int all_data_recved;    /* indicate if all data has been received */
 } MPIDI_RMA_Lock_entry_t;
 
diff --git a/src/mpid/ch3/include/mpidpre.h b/src/mpid/ch3/include/mpidpre.h
index f17c628..86ff728 100644
--- a/src/mpid/ch3/include/mpidpre.h
+++ b/src/mpid/ch3/include/mpidpre.h
@@ -353,6 +353,7 @@ extern MPIDI_RMA_Pkt_orderings_t *MPIDI_RMA_Pkt_orderings;
     struct MPIDI_RMA_Lock_entry *lock_entry_pool_start;                  \
     struct MPIDI_RMA_Lock_entry *lock_entry_pool;                        \
     struct MPIDI_RMA_Lock_entry *lock_entry_pool_tail;                   \
+    int current_lock_data_bytes;                                         \
 
 #ifdef MPIDI_CH3_WIN_DECL
 #define MPID_DEV_WIN_DECL \
diff --git a/src/mpid/ch3/src/mpid_rma.c b/src/mpid/ch3/src/mpid_rma.c
index 5ef58bb..4683ee0 100644
--- a/src/mpid/ch3/src/mpid_rma.c
+++ b/src/mpid/ch3/src/mpid_rma.c
@@ -23,6 +23,18 @@ cvars:
         a linked list of target elements. The distribution of ranks among
         slots follows a round-robin pattern. Requires a positive value.
 
+    - name        : MPIR_CVAR_CH3_RMA_LOCK_DATA_BYTES
+      category    : CH3
+      type        : int
+      default     : 655360
+      class       : none
+      verbosity   : MPI_T_VERBOSITY_USER_BASIC
+      scope       : MPI_T_SCOPE_ALL_EQ
+      description : >-
+        Size (in bytes) of available lock data this window can provided. If
+        current buffered lock data is more than this value, the process will
+        drop the upcoming operation data. Requires a positive calue.
+
 === END_MPI_T_CVAR_INFO_BLOCK ===
 */
 
@@ -346,6 +358,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
     (*win_ptr)->lock_all_assert = 0;
     (*win_ptr)->lock_epoch_count = 0;
     (*win_ptr)->outstanding_locks = 0;
+    (*win_ptr)->current_lock_data_bytes = 0;
 
     /* Initialize the info flags */
     (*win_ptr)->info_args.no_locks = 0;
diff --git a/src/mpid/ch3/src/mpidi_rma.c b/src/mpid/ch3/src/mpidi_rma.c
index bae0595..d05c3c5 100644
--- a/src/mpid/ch3/src/mpidi_rma.c
+++ b/src/mpid/ch3/src/mpidi_rma.c
@@ -205,7 +205,8 @@ int MPIDI_Win_free(MPID_Win ** win_ptr)
        entering Win_free. */
     while ((*win_ptr)->current_lock_type != MPID_LOCK_NONE ||
            (*win_ptr)->at_completion_counter != 0 ||
-           (*win_ptr)->lock_queue != NULL) {
+           (*win_ptr)->lock_queue != NULL ||
+           (*win_ptr)->current_lock_data_bytes != 0) {
         mpi_errno = wait_progress_engine();
         if (mpi_errno != MPI_SUCCESS)
             MPIU_ERR_POP(mpi_errno);
@@ -239,6 +240,7 @@ int MPIDI_Win_free(MPID_Win ** win_ptr)
     MPIU_Free((*win_ptr)->target_pool_start);
     MPIU_Free((*win_ptr)->slots);
     MPIU_Free((*win_ptr)->lock_entry_pool_start);
+    MPIU_Assert((*win_ptr)->current_lock_data_bytes == 0);
 
     /* Free the attached buffer for windows created with MPI_Win_allocate() */
     if ((*win_ptr)->create_flavor == MPI_WIN_FLAVOR_ALLOCATE ||

http://git.mpich.org/mpich.git/commitdiff/970226532fd89bd462402dc50bc6b7b62ebd27f6

commit 970226532fd89bd462402dc50bc6b7b62ebd27f6
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Mon Dec 8 22:06:30 2014 -0600

    Delete MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK flag
    
    The behavior of UNLOCK_ACK flag is exactly the same
    with the behavior of FLUSH_ACK, so here we just delete
    UNLOCK_ACK flag and use FLUSH_ACK flag for all FLUSH
    ACK packets.
    
    No reviewer.

diff --git a/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c b/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c
index c5e8779..c342a56 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c
@@ -2167,10 +2167,6 @@ int MPID_nem_ib_PktHandler_GetResp(MPIDI_VC_t * vc,
         mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
-    if (get_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK) {
-        mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
-        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    }
 
     void *write_to_buf;
 
diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index 95078d0..c3c444a 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -117,13 +117,12 @@ typedef enum {
     MPIDI_CH3_PKT_FLAG_RMA_SHARED = 64,
     MPIDI_CH3_PKT_FLAG_RMA_EXCLUSIVE = 128,
     MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK = 256,
-    MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK = 512,
-    MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED = 1024,
-    MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_QUEUED = 2048,
-    MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_DISCARDED = 4096,
-    MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED = 8192,
-    MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_NO_ACK = 16384,
-    MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP = 32768
+    MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED = 512,
+    MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_QUEUED = 1024,
+    MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_DISCARDED = 2048,
+    MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED = 4096,
+    MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_NO_ACK = 8192,
+    MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP = 16384
 } MPIDI_CH3_Pkt_flags_t;
 
 typedef struct MPIDI_CH3_Pkt_send {
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index 18cb59b..daf9542 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -630,10 +630,9 @@ static inline int finish_op_on_target(MPID_Win *win_ptr, MPIDI_VC_t *vc,
         /* This is PUT or ACC */
         if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK) {
             MPIDI_CH3_Pkt_flags_t pkt_flags = MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
-            if (flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+            if ((flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
+                (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
                 pkt_flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
-            if (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
-                pkt_flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
             mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr,
                                                      pkt_flags,
                                                      source_win_handle);
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index b80c89f..5ef4bff 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -240,10 +240,9 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete( MPIDI_VC_t *vc,
     get_accum_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
     if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
-    if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+    if ((rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
+        (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
         get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
-    if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
-        get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
     get_accum_resp_pkt->immed_len = 0;
 
     MPID_Datatype_get_size_macro(rreq->dev.datatype, type_size);
@@ -596,10 +595,9 @@ int MPIDI_CH3_ReqHandler_GetDerivedDTRecvComplete( MPIDI_VC_t *vc,
     get_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
     if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
-    if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+    if ((rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
+        (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
         get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
-    if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
-        get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
     get_resp_pkt->immed_len = 0;
     
     sreq->dev.segment_ptr = MPID_Segment_alloc( );
@@ -896,10 +894,9 @@ static inline int perform_get_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_en
     get_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
     if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
-    if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+    if ((get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
+        (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
         get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
-    if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
-        get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
     get_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
     get_resp_pkt->source_win_handle = get_pkt->source_win_handle;
     get_resp_pkt->immed_len = 0;
@@ -1049,10 +1046,9 @@ static inline int perform_get_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Loc
     get_accum_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
     if (get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
-    if (get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+    if ((get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
+        (get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
         get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
-    if (get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
-        get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
     get_accum_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
     get_accum_resp_pkt->source_win_handle = get_accum_pkt->source_win_handle;
     get_accum_resp_pkt->immed_len = 0;
@@ -1148,10 +1144,9 @@ static inline int perform_fop_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_en
     fop_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
     if (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
-    if (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+    if ((fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
+        (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
         fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
-    if (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
-        fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
     fop_resp_pkt->immed_len = fop_pkt->immed_len;
 
     /* copy data to resp pkt header */
@@ -1228,10 +1223,9 @@ static inline int perform_cas_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_en
     cas_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
     if (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         cas_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
-    if (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+    if ((cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
+        (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
         cas_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
-    if (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
-        cas_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
 
     /* Copy old value into the response packet */
     MPID_Datatype_get_size_macro(cas_pkt->datatype, len);
diff --git a/src/mpid/ch3/src/ch3u_rma_pkthandler.c b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
index a70daec..1d1718c 100644
--- a/src/mpid/ch3/src/ch3u_rma_pkthandler.c
+++ b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
@@ -408,10 +408,9 @@ int MPIDI_CH3_PktHandler_Get(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         get_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
         if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
             get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
-        if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+        if ((get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
+            (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
             get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
-        if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
-            get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
         get_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
         get_resp_pkt->source_win_handle = get_pkt->source_win_handle;
         get_resp_pkt->immed_len = 0;
@@ -930,10 +929,9 @@ int MPIDI_CH3_PktHandler_CAS(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     cas_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
     if (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         cas_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
-    if (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+    if ((cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
+        (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
         cas_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
-    if (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
-        cas_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
 
     /* Copy old value into the response packet */
     MPID_Datatype_get_size_macro(cas_pkt->datatype, len);
@@ -1025,10 +1023,6 @@ int MPIDI_CH3_PktHandler_CASResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
         mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
     }
-    if (cas_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK) {
-        mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
-        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-    }
 
     MPID_Request_get_ptr(cas_resp_pkt->request_handle, req);
     MPID_Datatype_get_size_macro(req->dev.datatype, len);
@@ -1094,10 +1088,9 @@ int MPIDI_CH3_PktHandler_FOP(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     fop_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
     if (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
-    if (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+    if ((fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) ||
+        (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
         fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
-    if (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
-        fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
     fop_resp_pkt->immed_len = fop_pkt->immed_len;
 
     /* copy data to resp pkt header */
@@ -1193,10 +1186,6 @@ int MPIDI_CH3_PktHandler_FOPResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
         mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
-    if (fop_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK) {
-        mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
-        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    }
 
     MPIDI_CH3U_Request_complete(req);
     *buflen = sizeof(MPIDI_CH3_Pkt_t);
@@ -1248,10 +1237,6 @@ int MPIDI_CH3_PktHandler_Get_AccumResp(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
-    if (get_accum_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK) {
-        mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
-        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    }
 
     data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
     data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
@@ -1384,10 +1369,6 @@ int MPIDI_CH3_PktHandler_GetResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
         mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
-    if (get_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK) {
-        mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
-        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    }
 
     data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
     data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
@@ -1473,11 +1454,6 @@ int MPIDI_CH3_PktHandler_LockAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
     }
-    if (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK) {
-        MPIU_Assert(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED);
-        mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
-        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-    }
 
     *rreqp = NULL;
     MPIDI_CH3_Progress_signal_completion();

http://git.mpich.org/mpich.git/commitdiff/03ebc97b2f35c4d291ef73ca19634f94cf74858d

commit 03ebc97b2f35c4d291ef73ca19634f94cf74858d
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Mon Dec 8 14:58:56 2014 -0600

    Modify ACK of op with both LOCK and UNLOCK (FLUSH) flags
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index 2afbde6..95078d0 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -654,7 +654,6 @@ typedef struct MPIDI_CH3_Pkt_lock_ack {
 
 typedef struct MPIDI_CH3_Pkt_flush_ack {
     MPIDI_CH3_Pkt_type_t type;
-    MPIDI_CH3_Pkt_flags_t flags;
     MPI_Win source_win_handle;
     int target_rank;
 } MPIDI_CH3_Pkt_flush_ack_t;
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index 85e0228..18cb59b 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -148,7 +148,6 @@ static inline int MPIDI_CH3I_Send_lock_ack_pkt(MPIDI_VC_t * vc, MPID_Win * win_p
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 static inline int MPIDI_CH3I_Send_flush_ack_pkt(MPIDI_VC_t *vc, MPID_Win *win_ptr,
-                                                MPIDI_CH3_Pkt_flags_t flags,
                                     MPI_Win source_win_handle)
 {
     MPIDI_CH3_Pkt_t upkt;
@@ -162,9 +161,6 @@ static inline int MPIDI_CH3I_Send_flush_ack_pkt(MPIDI_VC_t *vc, MPID_Win *win_pt
     MPIDI_Pkt_init(flush_ack_pkt, MPIDI_CH3_PKT_FLUSH_ACK);
     flush_ack_pkt->source_win_handle = source_win_handle;
     flush_ack_pkt->target_rank = win_ptr->comm_ptr->rank;
-    flush_ack_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
-    if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
-        flush_ack_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
 
     /* Because this is in a packet handler, it is already within a critical section */	
     /* MPIU_THREAD_CS_ENTER(CH3COMM,vc); */
@@ -633,19 +629,25 @@ static inline int finish_op_on_target(MPID_Win *win_ptr, MPIDI_VC_t *vc,
     if (type == MPIDI_CH3_PKT_PUT || type == MPIDI_CH3_PKT_ACCUMULATE) {
         /* This is PUT or ACC */
         if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK) {
-            if (!(flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) &&
-                !(flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)) {
-                mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr,
-                                                         MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED,
-                                                         source_win_handle);
-                if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-                MPIDI_CH3_Progress_signal_completion();
-            }
+            MPIDI_CH3_Pkt_flags_t pkt_flags = MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
+            if (flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+                pkt_flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
+            if (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
+                pkt_flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
+            mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr,
+                                                     pkt_flags,
+                                                     source_win_handle);
+            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+            MPIDI_CH3_Progress_signal_completion();
         }
         if (flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) {
-            mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr, flags,
-                                                      source_win_handle);
-            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            if (!(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)) {
+                /* If op is piggybacked with both LOCK and FLUSH,
+                   we only send LOCK ACK back, do not send FLUSH ACK. */
+                mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr,
+                                                          source_win_handle);
+                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            }
             MPIDI_CH3_Progress_signal_completion();
         }
         if (flags & MPIDI_CH3_PKT_FLAG_RMA_DECR_AT_COUNTER) {
@@ -656,9 +658,13 @@ static inline int finish_op_on_target(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                 MPIDI_CH3_Progress_signal_completion();
         }
         if (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK) {
-            mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr, flags,
-                                                      source_win_handle);
-            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            if (!(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)) {
+                /* If op is piggybacked with both LOCK and UNLOCK,
+                   we only send LOCK ACK back, do not send FLUSH (UNLOCK) ACK. */
+                mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr,
+                                                          source_win_handle);
+                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            }
             mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
             MPIDI_CH3_Progress_signal_completion();
diff --git a/src/mpid/ch3/src/ch3u_rma_pkthandler.c b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
index 38d72d9..a70daec 100644
--- a/src/mpid/ch3/src/ch3u_rma_pkthandler.c
+++ b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
@@ -1451,6 +1451,7 @@ int MPIDI_CH3_PktHandler_LockAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIDI_CH3_Pkt_lock_ack_t *lock_ack_pkt = &pkt->lock_ack;
     MPID_Win *win_ptr = NULL;
     int target_rank = lock_ack_pkt->target_rank;
+    MPIDI_CH3_Pkt_flags_t flags = lock_ack_pkt->flags;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKACK);
 
@@ -1467,6 +1468,17 @@ int MPIDI_CH3_PktHandler_LockAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     mpi_errno = set_lock_sync_counter(win_ptr, target_rank);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
+    if (flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK) {
+        MPIU_Assert(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED);
+        mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    }
+    if (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK) {
+        MPIU_Assert(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED);
+        mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    }
+
     *rreqp = NULL;
     MPIDI_CH3_Progress_signal_completion();
 
@@ -1501,11 +1513,6 @@ int MPIDI_CH3_PktHandler_FlushAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
     MPID_Win_get_ptr(flush_ack_pkt->source_win_handle, win_ptr);
 
-    if (flush_ack_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
-        mpi_errno = set_lock_sync_counter(win_ptr, target_rank);
-        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    }
-
     /* decrement ack_counter on target */
     mpi_errno = MPIDI_CH3I_RMA_Handle_flush_ack(win_ptr, target_rank);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -1583,7 +1590,7 @@ int MPIDI_CH3_PktHandler_Unlock(MPIDI_VC_t * vc ATTRIBUTE((unused)),
     MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
 
     if (!(unlock_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_NO_ACK)) {
-        mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr, MPIDI_CH3_PKT_FLAG_NONE,
+        mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr,
                                                   unlock_pkt->source_win_handle);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
@@ -1623,7 +1630,7 @@ int MPIDI_CH3_PktHandler_Flush(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
     MPID_Win_get_ptr(flush_pkt->target_win_handle, win_ptr);
 
-    mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr, MPIDI_CH3_PKT_FLAG_NONE,
+    mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr,
                                               flush_pkt->source_win_handle);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 

http://git.mpich.org/mpich.git/commitdiff/0167912040fde039796776bece438e9e06c17811

commit 0167912040fde039796776bece438e9e06c17811
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Sat Dec 6 20:38:49 2014 -0800

    Modify send_lock_ack_pkt function to contain flags.
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index e7b23c2..2afbde6 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -647,6 +647,7 @@ typedef struct MPIDI_CH3_Pkt_decr_at_counter {
 
 typedef struct MPIDI_CH3_Pkt_lock_ack {
     MPIDI_CH3_Pkt_type_t type;
+    MPIDI_CH3_Pkt_flags_t flags;
     MPI_Win source_win_handle;
     int target_rank;
 } MPIDI_CH3_Pkt_lock_ack_t;
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index 7420073..85e0228 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -103,7 +103,9 @@ static inline int send_unlock_msg(int dest, MPID_Win * win_ptr,
 #define FUNCNAME MPIDI_CH3I_Send_lock_ack_pkt
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-static inline int MPIDI_CH3I_Send_lock_ack_pkt(MPIDI_VC_t * vc, MPID_Win * win_ptr, MPI_Win source_win_handle)
+static inline int MPIDI_CH3I_Send_lock_ack_pkt(MPIDI_VC_t * vc, MPID_Win * win_ptr,
+                                               MPIDI_CH3_Pkt_flags_t flags,
+                                               MPI_Win source_win_handle)
 {
     MPIDI_CH3_Pkt_t upkt;
     MPIDI_CH3_Pkt_lock_ack_t *lock_ack_pkt = &upkt.lock_ack;
@@ -117,6 +119,7 @@ static inline int MPIDI_CH3I_Send_lock_ack_pkt(MPIDI_VC_t * vc, MPID_Win * win_p
     MPIDI_Pkt_init(lock_ack_pkt, MPIDI_CH3_PKT_LOCK_ACK);
     lock_ack_pkt->source_win_handle = source_win_handle;
     lock_ack_pkt->target_rank = win_ptr->comm_ptr->rank;
+    lock_ack_pkt->flags = flags;
 
     MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE,
                      (MPIU_DBG_FDEST, "sending lock ack pkt on vc=%p, source_win_handle=%#08x",
@@ -632,7 +635,9 @@ static inline int finish_op_on_target(MPID_Win *win_ptr, MPIDI_VC_t *vc,
         if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK) {
             if (!(flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) &&
                 !(flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)) {
-                mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr, source_win_handle);
+                mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr,
+                                                         MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED,
+                                                         source_win_handle);
                 if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
                 MPIDI_CH3_Progress_signal_completion();
             }
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index 1b9f2ce..b80c89f 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -1306,6 +1306,7 @@ static inline int perform_op_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_ent
             MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr,
                                          lock_pkt->origin_rank, &vc);
             mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr,
+                                                     MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED,
                                               lock_pkt->source_win_handle);
             if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
         }
diff --git a/src/mpid/ch3/src/ch3u_rma_pkthandler.c b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
index b833c79..38d72d9 100644
--- a/src/mpid/ch3/src/ch3u_rma_pkthandler.c
+++ b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
@@ -1328,7 +1328,8 @@ int MPIDI_CH3_PktHandler_Lock(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
     if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_pkt->lock_type) == 1) {
         /* send lock granted packet. */
-        mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr, lock_pkt->source_win_handle);
+        mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr, MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED,
+                                                 lock_pkt->source_win_handle);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
     }
 

http://git.mpich.org/mpich.git/commitdiff/faae55ad0b81efc98c5850ecf770200ce975d08c

commit faae55ad0b81efc98c5850ecf770200ce975d08c
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Sat Dec 6 20:23:12 2014 -0800

    Add new pkt flags for different LOCK ACKs.
    
    Add new flags for four different kinds of LOCK ACKs:
    
    (1) LOCK_GRANTED: lock is granted on target.
    (2) LOCK_QUEUED_DATA_QUEUED: lock is not granted on target,
        but it is safely queued on target. If this lock request
        is sent with an RMA operation, the operation data is also
        safely queued on target.
    (3) LOCK_QUEUED_DATA_DISCARDED: lock is not granted on target,
        but it is safely queued on target. If this lock request
        is sent with an RMA operation, the operation data is discarded
        on target due to out of resources.
    (4) LOCK_DISCARDED: lock is not granted on target, and it is
        not queued up on target due to out of resources. If this
        lock request is set with an RMA opration, the operation data
        is also discarded on target.
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index 1c36d6c..e7b23c2 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -119,8 +119,11 @@ typedef enum {
     MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK = 256,
     MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK = 512,
     MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED = 1024,
-    MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_NO_ACK = 2048,
-    MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP = 4096
+    MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_QUEUED = 2048,
+    MPIDI_CH3_PKT_FLAG_RMA_LOCK_QUEUED_DATA_DISCARDED = 4096,
+    MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED = 8192,
+    MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_NO_ACK = 16384,
+    MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP = 32768
 } MPIDI_CH3_Pkt_flags_t;
 
 typedef struct MPIDI_CH3_Pkt_send {

http://git.mpich.org/mpich.git/commitdiff/e36203c35142e3841dfd85134d72bc2731958a8f

commit e36203c35142e3841dfd85134d72bc2731958a8f
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Mon Dec 1 20:49:14 2014 -0600

    Change routine/pkt name from LOCK_GRANTED to LOCK_ACK
    
    Because we will send different kinds of LOCK ACKs (not
    just LOCK_GRANTED, but maybe LOCK_DISCARDED, for example),
    so naming related packets and function as "LOCK_GRANTED"
    is not proper anymore. Here we rename them to "LOCK_ACK".
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpidimpl.h b/src/mpid/ch3/include/mpidimpl.h
index 76c52f2..a0dc6bf 100644
--- a/src/mpid/ch3/include/mpidimpl.h
+++ b/src/mpid/ch3/include/mpidimpl.h
@@ -1824,7 +1824,7 @@ int MPIDI_CH3_PktHandler_GetResp( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *,
 				 MPIDI_msg_sz_t *, MPID_Request ** );
 int MPIDI_CH3_PktHandler_Lock( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, 
 			      MPIDI_msg_sz_t *, MPID_Request ** );
-int MPIDI_CH3_PktHandler_LockGranted( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, 
+int MPIDI_CH3_PktHandler_LockAck( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *,
 				      MPIDI_msg_sz_t *, MPID_Request ** );
 int MPIDI_CH3_PktHandler_Unlock( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *,
                                  MPIDI_msg_sz_t *, MPID_Request ** );
diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index ae40948..1c36d6c 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -89,7 +89,7 @@ typedef enum {
     MPIDI_CH3_PKT_UNLOCK,
     MPIDI_CH3_PKT_FLUSH,
     MPIDI_CH3_PKT_DECR_AT_COUNTER,
-    MPIDI_CH3_PKT_LOCK_GRANTED,
+    MPIDI_CH3_PKT_LOCK_ACK,
     MPIDI_CH3_PKT_FLUSH_ACK,
     /* RMA Packets end here */
     MPIDI_CH3_PKT_FLOW_CNTL_UPDATE,     /* FIXME: Unused */
@@ -638,15 +638,15 @@ typedef struct MPIDI_CH3_Pkt_decr_at_counter {
 } MPIDI_CH3_Pkt_decr_at_counter_t;
 
 /*********************************************************************************/
-/* RMA control response packet (from target to origin, including LOCK_GRANTED,   */
+/* RMA control response packet (from target to origin, including LOCK_ACK,       */
 /* FLUSH_ACK)                                                                    */
 /*********************************************************************************/
 
-typedef struct MPIDI_CH3_Pkt_lock_granted {
+typedef struct MPIDI_CH3_Pkt_lock_ack {
     MPIDI_CH3_Pkt_type_t type;
     MPI_Win source_win_handle;
     int target_rank;
-} MPIDI_CH3_Pkt_lock_granted_t;
+} MPIDI_CH3_Pkt_lock_ack_t;
 
 typedef struct MPIDI_CH3_Pkt_flush_ack {
     MPIDI_CH3_Pkt_type_t type;
@@ -696,7 +696,7 @@ typedef union MPIDI_CH3_Pkt {
     MPIDI_CH3_Pkt_unlock_t unlock;
     MPIDI_CH3_Pkt_flush_t flush;
     MPIDI_CH3_Pkt_decr_at_counter_t decr_at_cnt;
-    MPIDI_CH3_Pkt_lock_granted_t lock_granted;
+    MPIDI_CH3_Pkt_lock_ack_t lock_ack;
     MPIDI_CH3_Pkt_flush_ack_t flush_ack;
     /* RMA packets end here */
     MPIDI_CH3_Pkt_close_t close;
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index 3afb723..7420073 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -100,30 +100,30 @@ static inline int send_unlock_msg(int dest, MPID_Win * win_ptr,
 
 
 #undef FUNCNAME
-#define FUNCNAME MPIDI_CH3I_Send_lock_granted_pkt
+#define FUNCNAME MPIDI_CH3I_Send_lock_ack_pkt
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-static inline int MPIDI_CH3I_Send_lock_granted_pkt(MPIDI_VC_t * vc, MPID_Win * win_ptr, MPI_Win source_win_handle)
+static inline int MPIDI_CH3I_Send_lock_ack_pkt(MPIDI_VC_t * vc, MPID_Win * win_ptr, MPI_Win source_win_handle)
 {
     MPIDI_CH3_Pkt_t upkt;
-    MPIDI_CH3_Pkt_lock_granted_t *lock_granted_pkt = &upkt.lock_granted;
+    MPIDI_CH3_Pkt_lock_ack_t *lock_ack_pkt = &upkt.lock_ack;
     MPID_Request *req = NULL;
     int mpi_errno;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_LOCK_GRANTED_PKT);
+    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_GRANTED_PKT);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
 
-    /* send lock granted packet */
-    MPIDI_Pkt_init(lock_granted_pkt, MPIDI_CH3_PKT_LOCK_GRANTED);
-    lock_granted_pkt->source_win_handle = source_win_handle;
-    lock_granted_pkt->target_rank = win_ptr->comm_ptr->rank;
+    /* send lock ack packet */
+    MPIDI_Pkt_init(lock_ack_pkt, MPIDI_CH3_PKT_LOCK_ACK);
+    lock_ack_pkt->source_win_handle = source_win_handle;
+    lock_ack_pkt->target_rank = win_ptr->comm_ptr->rank;
 
     MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE,
-                     (MPIU_DBG_FDEST, "sending lock granted pkt on vc=%p, source_win_handle=%#08x",
-                      vc, lock_granted_pkt->source_win_handle));
+                     (MPIU_DBG_FDEST, "sending lock ack pkt on vc=%p, source_win_handle=%#08x",
+                      vc, lock_ack_pkt->source_win_handle));
 
     MPIU_THREAD_CS_ENTER(CH3COMM, vc);
-    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_granted_pkt, sizeof(*lock_granted_pkt), &req);
+    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_ack_pkt, sizeof(*lock_ack_pkt), &req);
     MPIU_THREAD_CS_EXIT(CH3COMM, vc);
     if (mpi_errno) {
         MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
@@ -134,7 +134,7 @@ static inline int MPIDI_CH3I_Send_lock_granted_pkt(MPIDI_VC_t * vc, MPID_Win * w
     }
 
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_GRANTED_PKT);
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
 
     return mpi_errno;
 }
@@ -632,7 +632,7 @@ static inline int finish_op_on_target(MPID_Win *win_ptr, MPIDI_VC_t *vc,
         if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK) {
             if (!(flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) &&
                 !(flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)) {
-                mpi_errno = MPIDI_CH3I_Send_lock_granted_pkt(vc, win_ptr, source_win_handle);
+                mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr, source_win_handle);
                 if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
                 MPIDI_CH3_Progress_signal_completion();
             }
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_pkt.c b/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
index 1a7bf4f..3bf484f 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
@@ -579,8 +579,8 @@ int MPIDI_CH3_PktHandler_Init( MPIDI_CH3_PktHandler_Fcn *pktArray[],
 	MPIDI_CH3_PktHandler_GetResp;
     pktArray[MPIDI_CH3_PKT_LOCK] =
 	MPIDI_CH3_PktHandler_Lock;
-    pktArray[MPIDI_CH3_PKT_LOCK_GRANTED] =
-	MPIDI_CH3_PktHandler_LockGranted;
+    pktArray[MPIDI_CH3_PKT_LOCK_ACK] =
+	MPIDI_CH3_PktHandler_LockAck;
     pktArray[MPIDI_CH3_PKT_UNLOCK] =
         MPIDI_CH3_PktHandler_Unlock;
     pktArray[MPIDI_CH3_PKT_FLUSH] =
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index 11bdaf1..1b9f2ce 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -1305,7 +1305,7 @@ static inline int perform_op_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_ent
             MPIDI_VC_t *vc = NULL;
             MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr,
                                          lock_pkt->origin_rank, &vc);
-            mpi_errno = MPIDI_CH3I_Send_lock_granted_pkt(vc, win_ptr,
+            mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr,
                                               lock_pkt->source_win_handle);
             if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
         }
diff --git a/src/mpid/ch3/src/ch3u_rma_pkthandler.c b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
index df9ea36..b833c79 100644
--- a/src/mpid/ch3/src/ch3u_rma_pkthandler.c
+++ b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
@@ -18,7 +18,7 @@ MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_get_accum_resp);
 MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_cas_resp);
 MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_fop_resp);
 MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_lock);
-MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_lock_granted);
+MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_lock_ack);
 MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_unlock);
 MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_flush);
 MPIR_T_PVAR_DOUBLE_TIMER_DECL(RMA, rma_rmapkt_flush_ack);
@@ -128,11 +128,11 @@ void MPIDI_CH3_RMA_Init_pkthandler_pvars(void)
     /* rma_rmapkt_lock_granted */
     MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
                                       MPI_DOUBLE,
-                                      rma_rmapkt_lock_granted,
+                                      rma_rmapkt_lock_ack,
                                       MPI_T_VERBOSITY_MPIDEV_DETAIL,
                                       MPI_T_BIND_NO_OBJECT,
                                       MPIR_T_PVAR_FLAG_READONLY,
-                                      "RMA", "RMA:PKTHANDLER for Lock-Granted (in seconds)");
+                                      "RMA", "RMA:PKTHANDLER for Lock-Ack (in seconds)");
 
     /* rma_rmapkt_unlock */
     MPIR_T_PVAR_TIMER_REGISTER_STATIC(RMA,
@@ -1328,7 +1328,7 @@ int MPIDI_CH3_PktHandler_Lock(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
     if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_pkt->lock_type) == 1) {
         /* send lock granted packet. */
-        mpi_errno = MPIDI_CH3I_Send_lock_granted_pkt(vc, win_ptr, lock_pkt->source_win_handle);
+        mpi_errno = MPIDI_CH3I_Send_lock_ack_pkt(vc, win_ptr, lock_pkt->source_win_handle);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -1441,27 +1441,27 @@ int MPIDI_CH3_PktHandler_GetResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 }
 
 #undef FUNCNAME
-#define FUNCNAME MPIDI_CH3_PktHandler_LockGranted
+#define FUNCNAME MPIDI_CH3_PktHandler_LockAck
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPIDI_CH3_PktHandler_LockGranted(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+int MPIDI_CH3_PktHandler_LockAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
                                      MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp)
 {
-    MPIDI_CH3_Pkt_lock_granted_t *lock_granted_pkt = &pkt->lock_granted;
+    MPIDI_CH3_Pkt_lock_ack_t *lock_ack_pkt = &pkt->lock_ack;
     MPID_Win *win_ptr = NULL;
-    int target_rank = lock_granted_pkt->target_rank;
+    int target_rank = lock_ack_pkt->target_rank;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKGRANTED);
+    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKACK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKGRANTED);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKACK);
 
-    MPIU_DBG_MSG(CH3_OTHER, VERBOSE, "received lock granted pkt");
+    MPIU_DBG_MSG(CH3_OTHER, VERBOSE, "received lock ack pkt");
 
-    MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_lock_granted);
+    MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_lock_ack);
 
     *buflen = sizeof(MPIDI_CH3_Pkt_t);
 
-    MPID_Win_get_ptr(lock_granted_pkt->source_win_handle, win_ptr);
+    MPID_Win_get_ptr(lock_ack_pkt->source_win_handle, win_ptr);
 
     mpi_errno = set_lock_sync_counter(win_ptr, target_rank);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -1469,8 +1469,8 @@ int MPIDI_CH3_PktHandler_LockGranted(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     *rreqp = NULL;
     MPIDI_CH3_Progress_signal_completion();
 
-    MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_lock_granted);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKGRANTED);
+    MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_lock_ack);
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKACK);
  fn_exit:
     return MPI_SUCCESS;
  fn_fail:
@@ -1712,10 +1712,10 @@ int MPIDI_CH3_PktPrint_FlushAck(FILE * fp, MPIDI_CH3_Pkt_t * pkt)
     return MPI_SUCCESS;
 }
 
-int MPIDI_CH3_PktPrint_LockGranted(FILE * fp, MPIDI_CH3_Pkt_t * pkt)
+int MPIDI_CH3_PktPrint_LockAck(FILE * fp, MPIDI_CH3_Pkt_t * pkt)
 {
-    MPIU_DBG_PRINTF((" type ......... MPIDI_CH3_PKT_LOCK_GRANTED\n"));
-    MPIU_DBG_PRINTF((" source ....... 0x%08X\n", pkt->lock_granted.source_win_handle));
+    MPIU_DBG_PRINTF((" type ......... MPIDI_CH3_PKT_LOCK_ACK\n"));
+    MPIU_DBG_PRINTF((" source ....... 0x%08X\n", pkt->lock_ack.source_win_handle));
     return MPI_SUCCESS;
 }
 #endif
diff --git a/src/mpid/ch3/src/mpidi_printf.c b/src/mpid/ch3/src/mpidi_printf.c
index 8f15f63..8b19ae6 100644
--- a/src/mpid/ch3/src/mpidi_printf.c
+++ b/src/mpid/ch3/src/mpidi_printf.c
@@ -146,8 +146,8 @@ void MPIDI_DBG_Print_packet(MPIDI_CH3_Pkt_t *pkt)
 	    case MPIDI_CH3_PKT_FLUSH_ACK:
 		MPIDI_CH3_PktPrint_FlushAck( stdout, pkt );
 		break;
-	    case MPIDI_CH3_PKT_LOCK_GRANTED:
-		MPIDI_CH3_PktPrint_LockGranted( stdout, pkt );
+	    case MPIDI_CH3_PKT_LOCK_ACK:
+		MPIDI_CH3_PktPrint_LockAck( stdout, pkt );
 		break;
 		/*
 	    case MPIDI_CH3_PKT_SHARED_LOCK_OPS_DONE:
@@ -339,10 +339,10 @@ const char *MPIDI_Pkt_GetDescString( MPIDI_CH3_Pkt_t *pkt )
 		       "RMA_DONE - 0x%08X", 
 		       pkt->flush_ack.source_win_handle );
 	break;
-    case MPIDI_CH3_PKT_LOCK_GRANTED:
+    case MPIDI_CH3_PKT_LOCK_ACK:
 	MPIU_Snprintf( pktmsg, sizeof(pktmsg), 
-		       "LOCK_GRANTED - 0x%08X", 
-		       pkt->lock_granted.source_win_handle );
+		       "LOCK_ACK - 0x%08X",
+		       pkt->lock_ack.source_win_handle );
 		break;
     case MPIDI_CH3_PKT_FLOW_CNTL_UPDATE:
 	MPIU_Snprintf( pktmsg, sizeof(pktmsg), 

http://git.mpich.org/mpich.git/commitdiff/385f0aae7d184464bb37121a78d7739b61ab5caf

commit 385f0aae7d184464bb37121a78d7739b61ab5caf
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Mon Dec 8 23:26:01 2014 -0600

    Bug-fix: add pkt type LOCK in GET_TARGET_WIN_HANDLE macro
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index 2dc8114..ae40948 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -398,6 +398,9 @@ MPIDI_CH3_PKT_DEFS
         case (MPIDI_CH3_PKT_FOP):                                       \
             win_hdl_ = (pkt_).fop.target_win_handle;                    \
             break;                                                      \
+        case (MPIDI_CH3_PKT_LOCK):                                      \
+            win_hdl_ = (pkt_).lock.target_win_handle;                   \
+            break;                                                      \
         default:                                                        \
             MPIU_ERR_SETANDJUMP1(err_, MPI_ERR_OTHER, "**invalidpkt", "**invalidpkt %d", (pkt_).type); \
         }                                                               \

http://git.mpich.org/mpich.git/commitdiff/2b53ff6973a9dff00da382aadaa3459da557a5c4

commit 2b53ff6973a9dff00da382aadaa3459da557a5c4
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Sat Dec 6 17:38:12 2014 -0800

    Code-refactor: Move send_flush_msg function to header file.
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index 1bd1591..3afb723 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -223,6 +223,45 @@ static inline int send_decr_at_cnt_msg(int dst, MPID_Win * win_ptr)
 }
 
 
+#undef FUNCNAME
+#define FUNCNAME send_flush_msg
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+static inline int send_flush_msg(int dest, MPID_Win * win_ptr)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_CH3_Pkt_t upkt;
+    MPIDI_CH3_Pkt_flush_t *flush_pkt = &upkt.flush;
+    MPID_Request *req = NULL;
+    MPIDI_VC_t *vc;
+    MPIDI_STATE_DECL(MPID_STATE_SEND_FLUSH_MSG);
+    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_FLUSH_MSG);
+
+    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
+
+    MPIDI_Pkt_init(flush_pkt, MPIDI_CH3_PKT_FLUSH);
+    flush_pkt->target_win_handle = win_ptr->all_win_handles[dest];
+    flush_pkt->source_win_handle = win_ptr->handle;
+
+    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
+    mpi_errno = MPIDI_CH3_iStartMsg(vc, flush_pkt, sizeof(*flush_pkt), &req);
+    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
+    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
+
+    /* Release the request returned by iStartMsg */
+    if (req != NULL) {
+        MPID_Request_release(req);
+    }
+
+  fn_exit:
+    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_FLUSH_MSG);
+    return mpi_errno;
+    /* --BEGIN ERROR HANDLING-- */
+  fn_fail:
+    goto fn_exit;
+    /* --END ERROR HANDLING-- */
+}
+
 
 /* enqueue an unsatisfied origin in passive target at target side. */
 static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
diff --git a/src/mpid/ch3/src/ch3u_rma_oplist.c b/src/mpid/ch3/src/ch3u_rma_oplist.c
index 7b0f256..7d6a2c7 100644
--- a/src/mpid/ch3/src/ch3u_rma_oplist.c
+++ b/src/mpid/ch3/src/ch3u_rma_oplist.c
@@ -37,8 +37,6 @@ static inline int check_window_state(MPID_Win *win_ptr, int *made_progress);
 static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t *target, int *made_progress);
 static inline int issue_ops_win(MPID_Win * win_ptr, int *made_progress);
 
-static int send_flush_msg(int dest, MPID_Win *win_ptr);
-
 /* check if we can switch window-wide state: FENCE_ISSUED, PSCW_ISSUED, LOCK_ALL_ISSUED */
 #undef FUNCNAME
 #define FUNCNAME check_window_state
@@ -826,43 +824,3 @@ int MPIDI_CH3I_RMA_Make_progress_global(int *made_progress)
   fn_fail:
     goto fn_exit;
 }
-
-
-#undef FUNCNAME
-#define FUNCNAME send_flush_msg
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-static int send_flush_msg(int dest, MPID_Win * win_ptr)
-{
-    int mpi_errno = MPI_SUCCESS;
-    MPIDI_CH3_Pkt_t upkt;
-    MPIDI_CH3_Pkt_flush_t *flush_pkt = &upkt.flush;
-    MPID_Request *req = NULL;
-    MPIDI_VC_t *vc;
-    MPIDI_STATE_DECL(MPID_STATE_SEND_FLUSH_MSG);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_FLUSH_MSG);
-
-    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
-
-    MPIDI_Pkt_init(flush_pkt, MPIDI_CH3_PKT_FLUSH);
-    flush_pkt->target_win_handle = win_ptr->all_win_handles[dest];
-    flush_pkt->source_win_handle = win_ptr->handle;
-
-    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
-    mpi_errno = MPIDI_CH3_iStartMsg(vc, flush_pkt, sizeof(*flush_pkt), &req);
-    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
-    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
-
-    /* Release the request returned by iStartMsg */
-    if (req != NULL) {
-        MPID_Request_release(req);
-    }
-
-  fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_FLUSH_MSG);
-    return mpi_errno;
-    /* --BEGIN ERROR HANDLING-- */
-  fn_fail:
-    goto fn_exit;
-    /* --END ERROR HANDLING-- */
-}

http://git.mpich.org/mpich.git/commitdiff/1962d3b12679e012c74566da6167dff31946ca12

commit 1962d3b12679e012c74566da6167dff31946ca12
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Dec 3 08:16:33 2014 -0800

    Re-organize progress engine functions.
    
    Rewrite progress engine functions as following:
    
    Basic functions:
    
    (1) check_target_state: check to see if we can switch target state,
        issue synchronization messages if needed.
    (2) issue_ops_target: issue al pending operations to this target.
    (3) check_window_state: check to see if we can switch window state.
    (4) issue_ops_win: issue all pending operations on this window.
        Currently it internally calls check_target_state and
        issue_ops_target, it should be optimized in future.
    
    Progress making functions:
    
    (1) Make_progress_target: make progress on one target, which
        internally call check_target_state and issue_ops_target.
    (2) Make_progress_win: make progress on all targets on one window,
        which internally call check_window_state and issue_ops_win.
    (3) Make_progress_global: make progress on all windows, which
        internally call make_progress_win.
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpid_rma_oplist.h b/src/mpid/ch3/include/mpid_rma_oplist.h
index 6c5a9cd..663591d 100644
--- a/src/mpid/ch3/include/mpid_rma_oplist.h
+++ b/src/mpid/ch3/include/mpid_rma_oplist.h
@@ -115,7 +115,6 @@ static inline MPIDI_RMA_Target_t *MPIDI_CH3I_Win_target_alloc(MPID_Win * win_ptr
     e->access_state = MPIDI_RMA_NONE;
     e->lock_type = MPID_LOCK_NONE;
     e->lock_mode = 0;
-    e->outstanding_lock = 0;
     e->accumulated_ops_cnt = 0;
     e->disable_flush_local = 0;
     e->win_complete_flag = 0;
diff --git a/src/mpid/ch3/include/mpid_rma_types.h b/src/mpid/ch3/include/mpid_rma_types.h
index f01468a..a43fa0d 100644
--- a/src/mpid/ch3/include/mpid_rma_types.h
+++ b/src/mpid/ch3/include/mpid_rma_types.h
@@ -85,7 +85,6 @@ typedef struct MPIDI_RMA_Target {
     enum MPIDI_RMA_states access_state;
     int lock_type; /* NONE, SHARED, EXCLUSIVE */
     int lock_mode;              /* e.g., MODE_NO_CHECK */
-    int outstanding_lock;
     int accumulated_ops_cnt;
     int disable_flush_local;
     int win_complete_flag;
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index 43a9ac8..1bd1591 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -339,21 +339,34 @@ static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
 
 static inline int set_lock_sync_counter(MPID_Win *win_ptr, int target_rank)
 {
+    MPIDI_RMA_Target_t *t = NULL;
     int mpi_errno = MPI_SUCCESS;
 
-    if (win_ptr->outstanding_locks > 0) {
+    MPIU_Assert(win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
+                win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED ||
+                win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED);
+
+    if (win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED) {
+        MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
+        MPIDI_Comm_get_vc(win_ptr->comm_ptr, win_ptr->comm_ptr->rank, &orig_vc);
+        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
+        if (win_ptr->comm_ptr->rank == target_rank ||
+            (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
+            win_ptr->outstanding_locks--;
+            MPIU_Assert(win_ptr->outstanding_locks >= 0);
+            goto fn_exit;
+        }
+    }
+    else if (win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED) {
         win_ptr->outstanding_locks--;
         MPIU_Assert(win_ptr->outstanding_locks >= 0);
+        goto fn_exit;
     }
-    else {
-        MPIDI_RMA_Target_t *t = NULL;
-        mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &t);
-        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-        MPIU_Assert(t != NULL);
 
-        t->outstanding_lock--;
-        MPIU_Assert(t->outstanding_lock == 0);
-    }
+    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &t);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    MPIU_Assert(t != NULL);
+    t->access_state = MPIDI_RMA_LOCK_GRANTED;
 
  fn_exit:
     return mpi_errno;
diff --git a/src/mpid/ch3/src/ch3u_rma_oplist.c b/src/mpid/ch3/src/ch3u_rma_oplist.c
index eaab99a..7b0f256 100644
--- a/src/mpid/ch3/src/ch3u_rma_oplist.c
+++ b/src/mpid/ch3/src/ch3u_rma_oplist.c
@@ -32,31 +32,30 @@ cvars:
 === END_MPI_T_CVAR_INFO_BLOCK ===
 */
 
+static inline int check_target_state(MPID_Win *win_ptr, MPIDI_RMA_Target_t *target, int *made_progress);
+static inline int check_window_state(MPID_Win *win_ptr, int *made_progress);
 static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t *target, int *made_progress);
 static inline int issue_ops_win(MPID_Win * win_ptr, int *made_progress);
 
 static int send_flush_msg(int dest, MPID_Win *win_ptr);
 
+/* check if we can switch window-wide state: FENCE_ISSUED, PSCW_ISSUED, LOCK_ALL_ISSUED */
 #undef FUNCNAME
 #define FUNCNAME check_window_state
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-static inline int check_window_state(MPID_Win *win_ptr, int *made_progress, int *cannot_issue)
+static inline int check_window_state(MPID_Win *win_ptr, int *made_progress)
 {
+    MPID_Request *fence_req_ptr = NULL;
     int i, mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_CHECK_WINDOW_STATE);
 
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_CHECK_WINDOW_STATE);
 
     (*made_progress) = 0;
-    (*cannot_issue) = 0;
 
-    if (win_ptr->states.access_state == MPIDI_RMA_NONE) {
-        (*cannot_issue) = 1;
-        goto fn_exit;
-    }
-    else if (win_ptr->states.access_state == MPIDI_RMA_FENCE_ISSUED) {
-        MPID_Request *fence_req_ptr = NULL;
+    switch (win_ptr->states.access_state) {
+    case MPIDI_RMA_FENCE_ISSUED:
         MPID_Request_get_ptr(win_ptr->fence_sync_req, fence_req_ptr);
         if (MPID_Request_is_complete(fence_req_ptr)) {
             win_ptr->states.access_state = MPIDI_RMA_FENCE_GRANTED;
@@ -68,12 +67,9 @@ static inline int check_window_state(MPID_Win *win_ptr, int *made_progress, int
 
             (*made_progress) = 1;
         }
-        else {
-            (*cannot_issue) = 1;
-            goto fn_exit;
-        }
-    }
-    else if (win_ptr->states.access_state == MPIDI_RMA_PSCW_ISSUED) {
+        break;
+
+    case MPIDI_RMA_PSCW_ISSUED:
         if (win_ptr->start_req == NULL) {
             /* for MPI_MODE_NOCHECK and all targets on SHM,
                we do not create PSCW requests on window. */
@@ -95,32 +91,34 @@ static inline int check_window_state(MPID_Win *win_ptr, int *made_progress, int
                     win_ptr->start_req[i] = MPI_REQUEST_NULL;
                 }
                 else {
-                    (*cannot_issue) = 1;
-                    goto fn_exit;
+                    break;
                 }
             }
-            MPIU_Assert(i == win_ptr->start_grp_size);
-            win_ptr->states.access_state = MPIDI_RMA_PSCW_GRANTED;
 
-            num_active_issued_win--;
-            MPIU_Assert(num_active_issued_win >= 0);
+            if (i == win_ptr->start_grp_size) {
+                win_ptr->states.access_state = MPIDI_RMA_PSCW_GRANTED;
 
-            (*made_progress) = 1;
+                num_active_issued_win--;
+                MPIU_Assert(num_active_issued_win >= 0);
 
-            MPIU_Free(win_ptr->start_req);
-            win_ptr->start_req = NULL;
+                (*made_progress) = 1;
+
+                MPIU_Free(win_ptr->start_req);
+                win_ptr->start_req = NULL;
+            }
         }
-    }
-    else if (win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED) {
+        break;
+
+    case MPIDI_RMA_LOCK_ALL_ISSUED:
         if (win_ptr->outstanding_locks == 0) {
             win_ptr->states.access_state = MPIDI_RMA_LOCK_ALL_GRANTED;
             (*made_progress) = 1;
         }
-        else {
-            (*cannot_issue) = 1;
-            goto fn_exit;
-        }
-    }
+        break;
+
+    default:
+        break;
+    } /* end of switch */
 
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_CHECK_WINDOW_STATE);
@@ -132,175 +130,208 @@ static inline int check_window_state(MPID_Win *win_ptr, int *made_progress, int
 }
 
 
-
 #undef FUNCNAME
-#define FUNCNAME issue_ops_target
+#define FUNCNAME check_target_state
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t *target,
-                                   int *made_progress)
+static inline int check_target_state(MPID_Win *win_ptr, MPIDI_RMA_Target_t *target,
+                                     int *made_progress)
 {
     int rank = win_ptr->comm_ptr->rank;
-    MPIDI_RMA_Op_t *curr_op = NULL;
-    int first_op;
     int mpi_errno = MPI_SUCCESS;
 
     (*made_progress) = 0;
 
-    if (win_ptr->non_empty_slots == 0 || target == NULL)
+    if (target == NULL)
         goto fn_exit;
 
-    /* check per-target state */
-    if (win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
-        win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED) {
-        if (target->access_state == MPIDI_RMA_LOCK_CALLED) {
-            if (target->sync.sync_flag == MPIDI_RMA_SYNC_NONE ||
-                target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH_LOCAL ||
-                target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH) {
-                if (target->pending_op_list != NULL &&
-                    target->pending_op_list->piggyback_lock_candidate) {
-                    /* Capable of piggybacking LOCK message with first operation. */
+    /* This check should only be performed when window-wide sync is finished, or
+       current sync is per-target sync. */
+    if (win_ptr->states.access_state == MPIDI_RMA_NONE ||
+        win_ptr->states.access_state == MPIDI_RMA_FENCE_ISSUED ||
+        win_ptr->states.access_state == MPIDI_RMA_PSCW_ISSUED ||
+        win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED) {
+        goto fn_exit;
+    }
+
+    switch (target->access_state) {
+    case MPIDI_RMA_LOCK_CALLED:
+        if (target->sync.sync_flag == MPIDI_RMA_SYNC_NONE ||
+            target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH_LOCAL ||
+            target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH) {
+            if (target->pending_op_list == NULL ||
+                !target->pending_op_list->piggyback_lock_candidate) {
+                /* issue lock request */
+                target->access_state = MPIDI_RMA_LOCK_ISSUED;
+                if (target->target_rank == rank) {
+                    mpi_errno = acquire_local_lock(win_ptr, target->lock_type);
+                    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
                 }
                 else {
-                    target->access_state = MPIDI_RMA_LOCK_ISSUED;
-                    target->outstanding_lock++;
-                    MPIU_Assert(target->outstanding_lock == 1);
-                    if (target->target_rank == rank) {
-                        mpi_errno = acquire_local_lock(win_ptr, target->lock_type);
+                    mpi_errno = send_lock_msg(target->target_rank,
+                                              target->lock_type, win_ptr);
+                    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+                }
+
+                (*made_progress) = 1;
+            }
+        }
+        else if (target->sync.sync_flag == MPIDI_RMA_SYNC_UNLOCK) {
+            if (target->pending_op_list == NULL) {
+                /* No RMA operation has ever been posted to this target,
+                   finish issuing, no need to acquire the lock. Cleanup
+                   function will clean it up. */
+                target->access_state = MPIDI_RMA_LOCK_GRANTED;
+
+                target->sync.outstanding_acks--;
+                MPIU_Assert(target->sync.outstanding_acks >= 0);
+
+                /* We are done with ending synchronization, unset target's sync_flag. */
+                target->sync.sync_flag = MPIDI_RMA_SYNC_NONE;
+
+                (*made_progress) = 1;
+            }
+            else {
+                /* if we reach WIN_UNLOCK and there is still operation existing
+                   in pending list, this operation must be the only operation
+                   and it is prepared to piggyback LOCK and UNLOCK. */
+                MPIU_Assert(target->pending_op_list->next == NULL);
+                MPIU_Assert(target->pending_op_list->piggyback_lock_candidate);
+            }
+        }
+        break;
+
+    case MPIDI_RMA_LOCK_GRANTED:
+    case MPIDI_RMA_NONE:
+        if (target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH) {
+            if (target->pending_op_list == NULL) {
+                if (target->target_rank == rank) {
+                    target->sync.outstanding_acks--;
+                    MPIU_Assert(target->sync.outstanding_acks >= 0);
+                }
+                else {
+                    if (target->put_acc_issued) {
+                        mpi_errno = send_flush_msg(target->target_rank, win_ptr);
                         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
                     }
                     else {
-                        mpi_errno = send_lock_msg(target->target_rank,
-                                                  target->lock_type, win_ptr);
-                        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+                        /* We did not issue PUT/ACC since the last
+                           synchronization call, therefore here we
+                           don't need ACK back */
+                        target->sync.outstanding_acks--;
+                        MPIU_Assert(target->sync.outstanding_acks >= 0);
                     }
-                    (*made_progress) = 1;
-                    goto fn_exit;
                 }
+
+                /* We are done with ending synchronization, unset target's sync_flag. */
+                target->sync.sync_flag = MPIDI_RMA_SYNC_NONE;
+
+                (*made_progress) = 1;
             }
-            else if (target->sync.sync_flag == MPIDI_RMA_SYNC_UNLOCK) {
-                if (target->pending_op_list != NULL) {
-                    /* Capable of piggybacking LOCK message with first operation. */
-                    MPIU_Assert(target->pending_op_list->piggyback_lock_candidate);
-                }
-                else {
-                    /* No RMA operation has ever been posted to this target,
-                       finish issuing, no need to acquire the lock. Cleanup
-                       function will clean it up. */
+        }
+        else if (target->sync.sync_flag == MPIDI_RMA_SYNC_UNLOCK) {
+            if (target->pending_op_list == NULL) {
+                if (target->target_rank == rank) {
                     target->sync.outstanding_acks--;
-                    MPIU_Assert(target->sync.outstanding_acks == 0);
-                    (*made_progress) = 1;
+                    MPIU_Assert(target->sync.outstanding_acks >= 0);
 
-                    /* Unset target's sync_flag. */
-                    target->sync.sync_flag = MPIDI_RMA_SYNC_NONE;
-                    goto fn_exit;
+                    mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
+                    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
                 }
-            }
-        }
-        else if (target->access_state == MPIDI_RMA_LOCK_ISSUED) {
-            if (target->outstanding_lock == 0) {
-                target->access_state = MPIDI_RMA_LOCK_GRANTED;
+                else {
+                    MPIDI_CH3_Pkt_flags_t flag = MPIDI_CH3_PKT_FLAG_NONE;
+                    if (!target->put_acc_issued) {
+                        /* We did not issue PUT/ACC since the last
+                           synchronization call, therefore here we
+                           don't need ACK back */
+                        target->sync.outstanding_acks--;
+                        MPIU_Assert(target->sync.outstanding_acks >= 0);
+
+                        flag = MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_NO_ACK;
+                    }
+                    mpi_errno = send_unlock_msg(target->target_rank, win_ptr, flag);
+                    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+                }
+
+                /* We are done with ending synchronization, unset target's sync_flag. */
+                target->sync.sync_flag = MPIDI_RMA_SYNC_NONE;
+
                 (*made_progress) = 1;
             }
-            else
-                goto fn_exit;
         }
-    }
+        break;
 
-    MPIU_Assert(win_ptr->states.access_state == MPIDI_RMA_FENCE_GRANTED ||
-                win_ptr->states.access_state == MPIDI_RMA_PSCW_GRANTED ||
-                win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
-                win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED ||
-                win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_GRANTED);
+    default:
+        break;
+    } /* end of switch */
 
-     if (win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
-        win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED) {
-        MPIU_Assert(target->access_state == MPIDI_RMA_LOCK_CALLED ||
-                    target->access_state == MPIDI_RMA_LOCK_GRANTED);
-    }
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
 
-    /* Deal with when there is no operation in the list. */
-    if (target->pending_op_list == NULL) {
 
-        /* At this point, per-target state must be LOCK_GRANTED. */
-        if (win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
-            win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED) {
-            MPIU_Assert(target->access_state == MPIDI_RMA_LOCK_GRANTED);
-        }
+#undef FUNCNAME
+#define FUNCNAME issue_ops_target
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t *target,
+                                   int *made_progress)
+{
+    MPIDI_RMA_Op_t *curr_op = NULL;
+    MPIDI_CH3_Pkt_flags_t flags;
+    int first_op = 1, mpi_errno = MPI_SUCCESS;
 
-        if (target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH) {
-            if (target->target_rank == rank) {
-                target->sync.outstanding_acks--;
-                MPIU_Assert(target->sync.outstanding_acks == 0);
-            }
-            else if (target->put_acc_issued == 0) {
-                /* We did not issue PUT/ACC since the last
-                   synchronization call, therefore we do
-                   not need to issue FLUSH here. */
-                target->sync.outstanding_acks--;
-                MPIU_Assert(target->sync.outstanding_acks >= 0);
-            }
-            else {
-                mpi_errno = send_flush_msg(target->target_rank, win_ptr);
-                if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-            }
+    (*made_progress) = 0;
 
-            (*made_progress) = 1;
-            goto finish_issue;
-        }
-        else if (target->sync.sync_flag == MPIDI_RMA_SYNC_UNLOCK) {
-            if (target->target_rank == rank) {
-                mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
-                if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-                target->sync.outstanding_acks--;
-                MPIU_Assert(target->sync.outstanding_acks == 0);
-            }
-            else if (target->put_acc_issued == 0) {
-                /* We did not issue PUT/ACC since the last
-                   synchronization call, therefore here we
-                   don't need ACK back */
-                mpi_errno = send_unlock_msg(target->target_rank, win_ptr, MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_NO_ACK);
-                if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    if (win_ptr->non_empty_slots == 0 || target == NULL)
+        goto fn_exit;
 
-                target->sync.outstanding_acks--;
-                MPIU_Assert(target->sync.outstanding_acks >= 0);
-            }
-            else {
-                mpi_errno = send_unlock_msg(target->target_rank, win_ptr, MPIDI_CH3_PKT_FLAG_NONE);
-                if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-            }
+    /* Exit if window-wide sync is not finished */
+    if (win_ptr->states.access_state == MPIDI_RMA_NONE ||
+        win_ptr->states.access_state == MPIDI_RMA_FENCE_ISSUED ||
+        win_ptr->states.access_state == MPIDI_RMA_PSCW_ISSUED ||
+        win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED)
+        goto fn_exit;
 
-            (*made_progress) = 1;
-            goto finish_issue;
-        }
+    /* Exit if per-target sync is not finished */
+    if (win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
+        win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED) {
+        if (target->access_state == MPIDI_RMA_LOCK_ISSUED)
+            goto fn_exit;
     }
 
     /* Issue out operations in the list. */
-    first_op = 1;
     curr_op = target->next_op_to_issue;
     while (curr_op != NULL) {
-        MPIDI_CH3_Pkt_flags_t flags = MPIDI_CH3_PKT_FLAG_NONE;
 
-        if (target->access_state == MPIDI_RMA_LOCK_ISSUED)
-            goto fn_exit;
+        if (target->access_state == MPIDI_RMA_LOCK_ISSUED) {
+            /* It is possible that the previous OP+LOCK changes
+               lock state to LOCK_ISSUED. */
+            break;
+        }
 
         if (curr_op->next == NULL &&
             target->sync.sync_flag == MPIDI_RMA_SYNC_NONE &&
-                /* always issue if it is a request-based RMA,
-                 * otherwise a wait call before unlock will be blocked.*/
-                curr_op->ureq == NULL) {
-            /* skip last OP. */
-            goto finish_issue;
+            curr_op->ureq == NULL) {
+            /* Skip the last OP if sync_flag is NONE since we
+               want to leave it to the ending synchronization
+               so that we can piggyback LOCK / FLUSH.
+               However, if it is a request-based RMA, do not
+               skip it (otherwise a wait call before unlock
+               will be blocked). */
+            break;
         }
 
+        flags = MPIDI_CH3_PKT_FLAG_NONE;
+
         if (first_op) {
             /* piggyback on first OP. */
             if (target->access_state == MPIDI_RMA_LOCK_CALLED) {
                 MPIU_Assert(curr_op->piggyback_lock_candidate);
                 flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK;
                 target->access_state = MPIDI_RMA_LOCK_ISSUED;
-                target->outstanding_lock++;
-                MPIU_Assert(target->outstanding_lock == 1);
             }
             first_op = 0;
         }
@@ -315,6 +346,9 @@ static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t *targe
             else if (target->sync.sync_flag == MPIDI_RMA_SYNC_UNLOCK) {
                 flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK;
             }
+
+            /* We are done with ending sync, unset target's sync_flag. */
+            target->sync.sync_flag = MPIDI_RMA_SYNC_NONE;
         }
 
         target->next_op_to_issue = curr_op->next;
@@ -392,11 +426,8 @@ static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t *targe
         curr_op = target->next_op_to_issue;
 
         (*made_progress) = 1;
-    }
 
- finish_issue:
-    /* Unset target's sync_flag. */
-    target->sync.sync_flag = MPIDI_RMA_SYNC_NONE;
+    } /* end of while loop */
 
   fn_exit:
     return mpi_errno;
@@ -411,7 +442,7 @@ static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t *targe
 static inline int issue_ops_win(MPID_Win *win_ptr, int *made_progress)
 {
     int mpi_errno = MPI_SUCCESS;
-    int start_slot, end_slot, i;
+    int start_slot, end_slot, i, idx;
     MPIDI_RMA_Target_t *target = NULL;
 
     (*made_progress) = 0;
@@ -419,28 +450,34 @@ static inline int issue_ops_win(MPID_Win *win_ptr, int *made_progress)
     if (win_ptr->non_empty_slots == 0)
         goto fn_exit;
 
-    MPIU_Assert(win_ptr->states.access_state == MPIDI_RMA_FENCE_GRANTED ||
-                win_ptr->states.access_state == MPIDI_RMA_PSCW_GRANTED ||
-                win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
-                win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED ||
-                win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_GRANTED);
+    /* Exit if window-wide sync is not finished */
+    if (win_ptr->states.access_state == MPIDI_RMA_NONE ||
+        win_ptr->states.access_state == MPIDI_RMA_FENCE_ISSUED ||
+        win_ptr->states.access_state == MPIDI_RMA_PSCW_ISSUED ||
+        win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED)
+        goto fn_exit;
+
+    /* FIXME: we should optimize the issuing pattern here. */
 
     start_slot = win_ptr->comm_ptr->rank % win_ptr->num_slots;
     end_slot = start_slot + win_ptr->num_slots;
-
     for (i = start_slot; i < end_slot; i++) {
-        int idx;
-        if (i >= win_ptr->num_slots) idx = i - win_ptr->num_slots;
-        else idx = i;
+        if (i < win_ptr->num_slots) idx = i;
+        else idx = i - win_ptr->num_slots;
 
         target = win_ptr->slots[idx].target_list;
         while (target != NULL) {
-            int temp = 0;
-            mpi_errno = issue_ops_target(win_ptr, target, &temp);
+            int temp_progress = 0;
+
+            /* check target state */
+            mpi_errno = check_target_state(win_ptr, target, &temp_progress);
             if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+            if (temp_progress) (*made_progress) = 1;
 
-            if (temp)
-                (*made_progress) = 1;
+            /* issue operations to this target */
+            mpi_errno = issue_ops_target(win_ptr, target, &temp_progress);
+            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+            if (temp_progress) (*made_progress) = 1;
 
             target = target->next;
         }
@@ -700,42 +737,30 @@ int MPIDI_CH3I_RMA_Cleanup_target_aggressive(MPID_Win * win_ptr, MPIDI_RMA_Targe
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 int MPIDI_CH3I_RMA_Make_progress_target(MPID_Win * win_ptr, int target_rank, int *made_progress)
 {
+    int temp_progress = 0;
+    MPIDI_RMA_Target_t *target = NULL;
     int mpi_errno = MPI_SUCCESS;
-    int cannot_issue = 0, temp_progress = 0;
-    MPIDI_RMA_Slot_t *slot;
-    MPIDI_RMA_Target_t *target;
 
     (*made_progress) = 0;
 
-    if (win_ptr->num_slots < win_ptr->comm_ptr->local_size) {
-        slot = &(win_ptr->slots[target_rank % win_ptr->num_slots]);
-        for (target = slot->target_list;
-             target && target->target_rank != target_rank; target = target->next);
-    }
-    else {
-        slot = &(win_ptr->slots[target_rank]);
-        target = slot->target_list;
-    }
-
-    if (target != NULL) {
-
-        /* check window state */
-        mpi_errno = check_window_state(win_ptr, &temp_progress, &cannot_issue);
-        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-        if (temp_progress)
-            (*made_progress) = 1;
+    /* check window state */
+    mpi_errno = check_window_state(win_ptr, &temp_progress);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    if (temp_progress) (*made_progress) = 1;
 
-        if (cannot_issue)
-            goto fn_exit;
+    /* find target element */
+    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &target);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-        mpi_errno = issue_ops_target(win_ptr, target, &temp_progress);
-        if (mpi_errno)
-            MPIU_ERR_POP(mpi_errno);
+    /* check target state */
+    mpi_errno = check_target_state(win_ptr, target, &temp_progress);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    if (temp_progress) (*made_progress) = 1;
 
-        if (temp_progress)
-            (*made_progress) = 1;
-    }
+    /* issue operations to this target */
+    mpi_errno = issue_ops_target(win_ptr, target, &temp_progress);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    if (temp_progress) (*made_progress) = 1;
 
   fn_exit:
     return mpi_errno;
@@ -750,27 +775,20 @@ int MPIDI_CH3I_RMA_Make_progress_target(MPID_Win * win_ptr, int target_rank, int
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 int MPIDI_CH3I_RMA_Make_progress_win(MPID_Win * win_ptr, int *made_progress)
 {
-    int temp_progress = 0, cannot_issue = 0;
+    int temp_progress = 0;
     int mpi_errno = MPI_SUCCESS;
 
     (*made_progress) = 0;
 
     /* check window state */
-    mpi_errno = check_window_state(win_ptr, &temp_progress, &cannot_issue);
+    mpi_errno = check_window_state(win_ptr, &temp_progress);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    if (temp_progress) (*made_progress) = 1;
 
-    if (temp_progress)
-        (*made_progress) = 1;
-
-    if (cannot_issue)
-        goto fn_exit;
-
+    /* issue operations on window */
     mpi_errno = issue_ops_win(win_ptr, &temp_progress);
-    if (mpi_errno)
-        MPIU_ERR_POP(mpi_errno);
-
-    if (temp_progress)
-        (*made_progress) = 1;
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    if (temp_progress) (*made_progress) = 1;
 
   fn_exit:
     return mpi_errno;
@@ -786,35 +804,21 @@ int MPIDI_CH3I_RMA_Make_progress_win(MPID_Win * win_ptr, int *made_progress)
 int MPIDI_CH3I_RMA_Make_progress_global(int *made_progress)
 {
     MPIDI_RMA_Win_list_t *win_elem = MPIDI_RMA_Win_list;
-    int tmp = 0, cannot_issue = 0;
     int mpi_errno = MPI_SUCCESS;
 
     (*made_progress) = 0;
 
     for (win_elem = MPIDI_RMA_Win_list; win_elem; win_elem = win_elem->next) {
-        if (win_elem->win_ptr->states.access_state == MPIDI_RMA_FENCE_ISSUED ||
-            win_elem->win_ptr->states.access_state == MPIDI_RMA_PSCW_ISSUED ||
-            win_elem->win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
-            win_elem->win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED ||
-            win_elem->win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED ||
-            win_elem->win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_GRANTED) {
-
-            /* check window state */
-            mpi_errno = check_window_state(win_elem->win_ptr, &tmp, &cannot_issue);
-            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-            if (tmp)
-                (*made_progress) = 1;
+        int temp_progress = 0;
 
-            if (cannot_issue)
-                continue;
+        if (win_elem->win_ptr->states.access_state == MPIDI_RMA_NONE ||
+            win_elem->win_ptr->states.access_state == MPIDI_RMA_FENCE_GRANTED ||
+            win_elem->win_ptr->states.access_state == MPIDI_RMA_PSCW_GRANTED)
+            continue;
 
-            mpi_errno = issue_ops_win(win_elem->win_ptr, &tmp);
-            if (mpi_errno)
-                MPIU_ERR_POP(mpi_errno);
-            if (tmp)
-                (*made_progress) = 1;
-        }
+        mpi_errno = MPIDI_CH3I_RMA_Make_progress_win(win_elem->win_ptr, &temp_progress);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+        if (temp_progress) (*made_progress) = 1;
     }
 
   fn_exit:

http://git.mpich.org/mpich.git/commitdiff/7c533ef3cf618df723da3dc7fba84be3c15442cf

commit 7c533ef3cf618df723da3dc7fba84be3c15442cf
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Tue Dec 2 20:06:12 2014 -0800

    Modify struct name: replace "struct XXX" with "XXX_t"
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpid_rma_lockqueue.h b/src/mpid/ch3/include/mpid_rma_lockqueue.h
index 46e62fa..a2633aa 100644
--- a/src/mpid/ch3/include/mpid_rma_lockqueue.h
+++ b/src/mpid/ch3/include/mpid_rma_lockqueue.h
@@ -19,10 +19,10 @@ MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_winlock_getlocallock);
 #define FUNCNAME MPIDI_CH3I_Win_lock_entry_alloc
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-static inline MPIDI_RMA_Lock_entry *MPIDI_CH3I_Win_lock_entry_alloc(MPID_Win * win_ptr,
-                                                                    MPIDI_CH3_Pkt_t *pkt)
+static inline MPIDI_RMA_Lock_entry_t *MPIDI_CH3I_Win_lock_entry_alloc(MPID_Win * win_ptr,
+                                                                      MPIDI_CH3_Pkt_t *pkt)
 {
-    MPIDI_RMA_Lock_entry *new_ptr = NULL;
+    MPIDI_RMA_Lock_entry_t *new_ptr = NULL;
 
     if (win_ptr->lock_entry_pool != NULL) {
         new_ptr = win_ptr->lock_entry_pool;
@@ -46,7 +46,7 @@ static inline MPIDI_RMA_Lock_entry *MPIDI_CH3I_Win_lock_entry_alloc(MPID_Win * w
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 static inline int MPIDI_CH3I_Win_lock_entry_free(MPID_Win * win_ptr,
-                                                 MPIDI_RMA_Lock_entry *lock_entry)
+                                                 MPIDI_RMA_Lock_entry_t *lock_entry)
 {
     int mpi_errno = MPI_SUCCESS;
 
diff --git a/src/mpid/ch3/include/mpid_rma_oplist.h b/src/mpid/ch3/include/mpid_rma_oplist.h
index b0f3ec2..6c5a9cd 100644
--- a/src/mpid/ch3/include/mpid_rma_oplist.h
+++ b/src/mpid/ch3/include/mpid_rma_oplist.h
@@ -16,8 +16,8 @@ int MPIDI_CH3I_RMA_Cleanup_target_aggressive(MPID_Win * win_ptr, MPIDI_RMA_Targe
 int MPIDI_CH3I_RMA_Make_progress_target(MPID_Win * win_ptr, int target_rank, int *made_progress);
 int MPIDI_CH3I_RMA_Make_progress_win(MPID_Win * win_ptr, int *made_progress);
 
-extern struct MPIDI_RMA_Op *global_rma_op_pool, *global_rma_op_pool_tail, *global_rma_op_pool_start;
-extern struct MPIDI_RMA_Target *global_rma_target_pool, *global_rma_target_pool_tail, *global_rma_target_pool_start;
+extern MPIDI_RMA_Op_t *global_rma_op_pool, *global_rma_op_pool_tail, *global_rma_op_pool_start;
+extern MPIDI_RMA_Target_t *global_rma_target_pool, *global_rma_target_pool_tail, *global_rma_target_pool_start;
 
 MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_rmaqueue_alloc);
 
diff --git a/src/mpid/ch3/include/mpid_rma_types.h b/src/mpid/ch3/include/mpid_rma_types.h
index 62c2885..f01468a 100644
--- a/src/mpid/ch3/include/mpid_rma_types.h
+++ b/src/mpid/ch3/include/mpid_rma_types.h
@@ -134,7 +134,7 @@ typedef struct MPIDI_RMA_Lock_entry {
     MPIDI_CH3_Pkt_t pkt;    /* all information for this request packet */
     void *data;             /* for queued PUTs / ACCs / GACCs, data is copied here */
     int all_data_recved;    /* indicate if all data has been received */
-} MPIDI_RMA_Lock_entry;
+} MPIDI_RMA_Lock_entry_t;
 
 typedef MPIDI_RMA_Op_t *MPIDI_RMA_Ops_list_t;
 
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index 930b75e..43a9ac8 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -230,7 +230,7 @@ static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                                       MPIDI_msg_sz_t *buflen,
                                       MPID_Request **reqp)
 {
-    MPIDI_RMA_Lock_entry *new_ptr = NULL;
+    MPIDI_RMA_Lock_entry_t *new_ptr = NULL;
     int mpi_errno = MPI_SUCCESS;
 
     (*reqp) = NULL;
@@ -382,7 +382,7 @@ static inline int acquire_local_lock(MPID_Win * win_ptr, int lock_type)
         /* Queue the lock information. */
         MPIDI_CH3_Pkt_t pkt;
         MPIDI_CH3_Pkt_lock_t *lock_pkt = &pkt.lock;
-        MPIDI_RMA_Lock_entry *new_ptr = NULL;
+        MPIDI_RMA_Lock_entry_t *new_ptr = NULL;
 
         MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
         lock_pkt->lock_type = lock_type;
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index e364673..11bdaf1 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -828,7 +828,7 @@ static int create_derived_datatype(MPID_Request *req, MPID_Datatype **dtp)
 }
 
 
-static inline int perform_put_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry *lock_entry)
+static inline int perform_put_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry_t *lock_entry)
 {
     MPIDI_CH3_Pkt_put_t *put_pkt = &((lock_entry->pkt).put);
     MPIDI_VC_t *vc = NULL;
@@ -860,7 +860,7 @@ static inline int perform_put_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_en
     goto fn_exit;
 }
 
-static inline int perform_get_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry *lock_entry)
+static inline int perform_get_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry_t *lock_entry)
 {
     MPIDI_CH3_Pkt_t upkt;
     MPIDI_CH3_Pkt_get_resp_t *get_resp_pkt = &upkt.get_resp;
@@ -955,7 +955,7 @@ static inline int perform_get_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_en
 }
 
 
-static inline int perform_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry *lock_entry)
+static inline int perform_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry_t *lock_entry)
 {
     MPIDI_CH3_Pkt_accum_t *acc_pkt = &((lock_entry->pkt).accum);
     MPIDI_VC_t *vc = NULL;
@@ -995,7 +995,7 @@ static inline int perform_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_en
 }
 
 
-static inline int perform_get_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry *lock_entry)
+static inline int perform_get_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry_t *lock_entry)
 {
     MPIDI_CH3_Pkt_t upkt;
     MPIDI_CH3_Pkt_get_accum_resp_t *get_accum_resp_pkt = &upkt.get_accum_resp;
@@ -1126,7 +1126,7 @@ static inline int perform_get_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Loc
 }
 
 
-static inline int perform_fop_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry *lock_entry)
+static inline int perform_fop_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry_t *lock_entry)
 {
     MPIDI_CH3_Pkt_t upkt;
     MPIDI_CH3_Pkt_fop_resp_t *fop_resp_pkt = &upkt.fop_resp;
@@ -1208,7 +1208,7 @@ static inline int perform_fop_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_en
 }
 
 
-static inline int perform_cas_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry *lock_entry)
+static inline int perform_cas_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry_t *lock_entry)
 {
     MPIDI_CH3_Pkt_t upkt;
     MPIDI_CH3_Pkt_cas_resp_t *cas_resp_pkt = &upkt.cas_resp;
@@ -1288,7 +1288,7 @@ static inline int perform_cas_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_en
 }
 
 
-static inline int perform_op_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry *lock_entry)
+static inline int perform_op_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry_t *lock_entry)
 {
     int mpi_errno = MPI_SUCCESS;
 
@@ -1361,7 +1361,7 @@ static int entered_count = 0;
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
 {
-    MPIDI_RMA_Lock_entry *lock_entry, *lock_entry_next;
+    MPIDI_RMA_Lock_entry_t *lock_entry, *lock_entry_next;
     int requested_lock, mpi_errno = MPI_SUCCESS, temp_entered_count;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RELEASE_LOCK);
 
@@ -1405,7 +1405,7 @@ int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
 	       only that one. */
 
 	    /* FIXME: MT: All queue accesses need to be made atomic */
-            lock_entry = (MPIDI_RMA_Lock_entry *) win_ptr->lock_queue;
+            lock_entry = (MPIDI_RMA_Lock_entry_t *) win_ptr->lock_queue;
             while (lock_entry) {
                 lock_entry_next = lock_entry->next;
 
@@ -1457,7 +1457,7 @@ int MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete( MPIDI_VC_t *vc,
     int requested_lock;
     MPI_Win target_win_handle;
     MPID_Win *win_ptr = NULL;
-    MPIDI_RMA_Lock_entry *lock_queue_entry = rreq->dev.lock_queue_entry;
+    MPIDI_RMA_Lock_entry_t *lock_queue_entry = rreq->dev.lock_queue_entry;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_PIGGYBACKLOCKOPRECVCOMPLETE);
 
diff --git a/src/mpid/ch3/src/ch3u_rma_oplist.c b/src/mpid/ch3/src/ch3u_rma_oplist.c
index 77de769..eaab99a 100644
--- a/src/mpid/ch3/src/ch3u_rma_oplist.c
+++ b/src/mpid/ch3/src/ch3u_rma_oplist.c
@@ -461,7 +461,7 @@ int MPIDI_CH3I_RMA_Free_ops_before_completion(MPID_Win * win_ptr)
 {
     MPIDI_RMA_Op_t *curr_op = NULL;
     MPIDI_RMA_Target_t *curr_target = NULL;
-    struct MPIDI_RMA_Op **op_list = NULL, **op_list_tail = NULL;
+    MPIDI_RMA_Op_t **op_list = NULL, **op_list_tail = NULL;
     int read_flag = 0;
     int i, made_progress = 0;
     int mpi_errno = MPI_SUCCESS;
diff --git a/src/mpid/ch3/src/mpid_rma.c b/src/mpid/ch3/src/mpid_rma.c
index 9d2e21c..5ef58bb 100644
--- a/src/mpid/ch3/src/mpid_rma.c
+++ b/src/mpid/ch3/src/mpid_rma.c
@@ -356,7 +356,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
     (*win_ptr)->info_args.alloc_shared_noncontig = 0;
     (*win_ptr)->info_args.alloc_shm = FALSE;
 
-    MPIU_CHKPMEM_MALLOC((*win_ptr)->op_pool_start, struct MPIDI_RMA_Op *,
+    MPIU_CHKPMEM_MALLOC((*win_ptr)->op_pool_start, MPIDI_RMA_Op_t *,
                         sizeof(MPIDI_RMA_Op_t) * MPIR_CVAR_CH3_RMA_OP_WIN_POOL_SIZE, mpi_errno,
                         "RMA op pool");
     (*win_ptr)->op_pool = NULL;
@@ -367,7 +367,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
     }
 
     win_target_pool_size = MPIR_MIN(MPIR_CVAR_CH3_RMA_TARGET_WIN_POOL_SIZE, MPIR_Comm_size(win_comm_ptr));
-    MPIU_CHKPMEM_MALLOC((*win_ptr)->target_pool_start, struct MPIDI_RMA_Target *,
+    MPIU_CHKPMEM_MALLOC((*win_ptr)->target_pool_start, MPIDI_RMA_Target_t *,
                         sizeof(MPIDI_RMA_Target_t) * win_target_pool_size,
                         mpi_errno, "RMA target pool");
     (*win_ptr)->target_pool = NULL;
@@ -378,7 +378,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
     }
 
     (*win_ptr)->num_slots = MPIR_MIN(MPIR_CVAR_CH3_RMA_SLOTS_SIZE, MPIR_Comm_size(win_comm_ptr));
-    MPIU_CHKPMEM_MALLOC((*win_ptr)->slots, struct MPIDI_RMA_Slot *,
+    MPIU_CHKPMEM_MALLOC((*win_ptr)->slots, MPIDI_RMA_Slot_t *,
                         sizeof(MPIDI_RMA_Slot_t) * (*win_ptr)->num_slots, mpi_errno, "RMA slots");
     for (i = 0; i < (*win_ptr)->num_slots; i++) {
         (*win_ptr)->slots[i].target_list = NULL;
@@ -387,8 +387,8 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
 
     /* FIXME: we can optimize by letting the user to pass WIN INFO hint if they will not use passive target,
        in such case we do not need to allocate window pool for lock entries. */
-    MPIU_CHKPMEM_MALLOC((*win_ptr)->lock_entry_pool_start, struct MPIDI_RMA_Lock_entry *,
-                        sizeof(MPIDI_RMA_Lock_entry) * MPIR_CVAR_CH3_RMA_LOCK_ENTRY_WIN_POOL_SIZE,
+    MPIU_CHKPMEM_MALLOC((*win_ptr)->lock_entry_pool_start, MPIDI_RMA_Lock_entry_t *,
+                        sizeof(MPIDI_RMA_Lock_entry_t) * MPIR_CVAR_CH3_RMA_LOCK_ENTRY_WIN_POOL_SIZE,
                         mpi_errno, "RMA lock entry pool");
     (*win_ptr)->lock_entry_pool = NULL;
     (*win_ptr)->lock_entry_pool_tail = NULL;
diff --git a/src/mpid/ch3/src/mpidi_rma.c b/src/mpid/ch3/src/mpidi_rma.c
index 45acc76..bae0595 100644
--- a/src/mpid/ch3/src/mpidi_rma.c
+++ b/src/mpid/ch3/src/mpidi_rma.c
@@ -91,9 +91,9 @@ cvars:
 */
 
 
-struct MPIDI_RMA_Op *global_rma_op_pool = NULL, *global_rma_op_pool_tail = NULL, *global_rma_op_pool_start = NULL;
-struct MPIDI_RMA_Target *global_rma_target_pool = NULL, *global_rma_target_pool_tail = NULL, *global_rma_target_pool_start = NULL;
-struct MPIDI_RMA_Pkt_orderings *MPIDI_RMA_Pkt_orderings = NULL;
+MPIDI_RMA_Op_t *global_rma_op_pool = NULL, *global_rma_op_pool_tail = NULL, *global_rma_op_pool_start = NULL;
+MPIDI_RMA_Target_t *global_rma_target_pool = NULL, *global_rma_target_pool_tail = NULL, *global_rma_target_pool_start = NULL;
+MPIDI_RMA_Pkt_orderings_t *MPIDI_RMA_Pkt_orderings = NULL;
 
 #undef FUNCNAME
 #define FUNCNAME MPIDI_RMA_init
@@ -109,16 +109,16 @@ int MPIDI_RMA_init(void)
 
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_RMA_INIT);
 
-    MPIU_CHKPMEM_MALLOC(global_rma_op_pool_start, struct MPIDI_RMA_Op *,
-                        sizeof(struct MPIDI_RMA_Op) * MPIR_CVAR_CH3_RMA_OP_GLOBAL_POOL_SIZE,
+    MPIU_CHKPMEM_MALLOC(global_rma_op_pool_start, MPIDI_RMA_Op_t *,
+                        sizeof(MPIDI_RMA_Op_t) * MPIR_CVAR_CH3_RMA_OP_GLOBAL_POOL_SIZE,
                         mpi_errno, "RMA op pool");
     for (i = 0; i < MPIR_CVAR_CH3_RMA_OP_GLOBAL_POOL_SIZE; i++) {
         global_rma_op_pool_start[i].pool_type = MPIDI_RMA_POOL_GLOBAL;
         MPL_LL_APPEND(global_rma_op_pool, global_rma_op_pool_tail, &(global_rma_op_pool_start[i]));
     }
 
-    MPIU_CHKPMEM_MALLOC(global_rma_target_pool_start, struct MPIDI_RMA_Target *,
-                        sizeof(struct MPIDI_RMA_Target) * MPIR_CVAR_CH3_RMA_TARGET_GLOBAL_POOL_SIZE,
+    MPIU_CHKPMEM_MALLOC(global_rma_target_pool_start, MPIDI_RMA_Target_t *,
+                        sizeof(MPIDI_RMA_Target_t) * MPIR_CVAR_CH3_RMA_TARGET_GLOBAL_POOL_SIZE,
                         mpi_errno, "RMA target pool");
     for (i = 0; i < MPIR_CVAR_CH3_RMA_TARGET_GLOBAL_POOL_SIZE; i++) {
         global_rma_target_pool_start[i].pool_type = MPIDI_RMA_POOL_GLOBAL;

http://git.mpich.org/mpich.git/commitdiff/54af207cae91a9b3c6b1a74484198476d517253d

commit 54af207cae91a9b3c6b1a74484198476d517253d
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Mon Dec 1 11:18:54 2014 -0600

    Change struct name from MPIDI_Win_lock_queue to MPIDI_RMA_Lock_entry
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpid_rma_lockqueue.h b/src/mpid/ch3/include/mpid_rma_lockqueue.h
index fed97fe..46e62fa 100644
--- a/src/mpid/ch3/include/mpid_rma_lockqueue.h
+++ b/src/mpid/ch3/include/mpid_rma_lockqueue.h
@@ -19,10 +19,10 @@ MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_winlock_getlocallock);
 #define FUNCNAME MPIDI_CH3I_Win_lock_entry_alloc
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-static inline MPIDI_Win_lock_queue *MPIDI_CH3I_Win_lock_entry_alloc(MPID_Win * win_ptr,
+static inline MPIDI_RMA_Lock_entry *MPIDI_CH3I_Win_lock_entry_alloc(MPID_Win * win_ptr,
                                                                     MPIDI_CH3_Pkt_t *pkt)
 {
-    MPIDI_Win_lock_queue *new_ptr = NULL;
+    MPIDI_RMA_Lock_entry *new_ptr = NULL;
 
     if (win_ptr->lock_entry_pool != NULL) {
         new_ptr = win_ptr->lock_entry_pool;
@@ -46,7 +46,7 @@ static inline MPIDI_Win_lock_queue *MPIDI_CH3I_Win_lock_entry_alloc(MPID_Win * w
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 static inline int MPIDI_CH3I_Win_lock_entry_free(MPID_Win * win_ptr,
-                                                 MPIDI_Win_lock_queue *lock_entry)
+                                                 MPIDI_RMA_Lock_entry *lock_entry)
 {
     int mpi_errno = MPI_SUCCESS;
 
diff --git a/src/mpid/ch3/include/mpid_rma_types.h b/src/mpid/ch3/include/mpid_rma_types.h
index 81ff07f..62c2885 100644
--- a/src/mpid/ch3/include/mpid_rma_types.h
+++ b/src/mpid/ch3/include/mpid_rma_types.h
@@ -129,12 +129,12 @@ typedef struct MPIDI_RMA_Win_list {
 
 extern MPIDI_RMA_Win_list_t *MPIDI_RMA_Win_list, *MPIDI_RMA_Win_list_tail;
 
-typedef struct MPIDI_Win_lock_queue {
-    struct MPIDI_Win_lock_queue *next;
+typedef struct MPIDI_RMA_Lock_entry {
+    struct MPIDI_RMA_Lock_entry *next;
     MPIDI_CH3_Pkt_t pkt;    /* all information for this request packet */
     void *data;             /* for queued PUTs / ACCs / GACCs, data is copied here */
     int all_data_recved;    /* indicate if all data has been received */
-} MPIDI_Win_lock_queue;
+} MPIDI_RMA_Lock_entry;
 
 typedef MPIDI_RMA_Op_t *MPIDI_RMA_Ops_list_t;
 
diff --git a/src/mpid/ch3/include/mpidpre.h b/src/mpid/ch3/include/mpidpre.h
index f6da014..f17c628 100644
--- a/src/mpid/ch3/include/mpidpre.h
+++ b/src/mpid/ch3/include/mpidpre.h
@@ -315,8 +315,8 @@ extern MPIDI_RMA_Pkt_orderings_t *MPIDI_RMA_Pkt_orderings;
     volatile int current_lock_type;   /* current lock type on this window (as target)   \
                               * (none, shared, exclusive) */             \
     volatile int shared_lock_ref_cnt;                                    \
-    struct MPIDI_Win_lock_queue volatile *lock_queue;  /* list of unsatisfied locks */  \
-    struct MPIDI_Win_lock_queue volatile *lock_queue_tail; /* tail of unstaisfied locks. */ \
+    struct MPIDI_RMA_Lock_entry volatile *lock_queue;  /* list of unsatisfied locks */  \
+    struct MPIDI_RMA_Lock_entry volatile *lock_queue_tail; /* tail of unstaisfied locks. */ \
                                                                          \
     MPI_Aint *sizes;      /* array of sizes of all windows */            \
     struct MPIDI_Win_info_args info_args;                                \
@@ -350,9 +350,9 @@ extern MPIDI_RMA_Pkt_orderings_t *MPIDI_RMA_Pkt_orderings;
     int outstanding_locks; /* when issuing multiple lock requests in     \
                             MPI_WIN_LOCK_ALL, this counter keeps track   \
                             of number of locks not being granted yet. */ \
-    struct MPIDI_Win_lock_queue *lock_entry_pool_start;                  \
-    struct MPIDI_Win_lock_queue *lock_entry_pool;                        \
-    struct MPIDI_Win_lock_queue *lock_entry_pool_tail;                   \
+    struct MPIDI_RMA_Lock_entry *lock_entry_pool_start;                  \
+    struct MPIDI_RMA_Lock_entry *lock_entry_pool;                        \
+    struct MPIDI_RMA_Lock_entry *lock_entry_pool_tail;                   \
 
 #ifdef MPIDI_CH3_WIN_DECL
 #define MPID_DEV_WIN_DECL \
@@ -443,7 +443,7 @@ typedef struct MPIDI_Request {
     MPI_Win     target_win_handle;
     MPI_Win     source_win_handle;
     MPIDI_CH3_Pkt_flags_t flags; /* flags that were included in the original RMA packet header */
-    struct MPIDI_Win_lock_queue *lock_queue_entry;
+    struct MPIDI_RMA_Lock_entry *lock_queue_entry;
     MPI_Request resp_request_handle; /* Handle for get_accumulate response */
 
     MPIDI_REQUEST_SEQNUM
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index bae66d3..930b75e 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -230,7 +230,7 @@ static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                                       MPIDI_msg_sz_t *buflen,
                                       MPID_Request **reqp)
 {
-    MPIDI_Win_lock_queue *new_ptr = NULL;
+    MPIDI_RMA_Lock_entry *new_ptr = NULL;
     int mpi_errno = MPI_SUCCESS;
 
     (*reqp) = NULL;
@@ -382,7 +382,7 @@ static inline int acquire_local_lock(MPID_Win * win_ptr, int lock_type)
         /* Queue the lock information. */
         MPIDI_CH3_Pkt_t pkt;
         MPIDI_CH3_Pkt_lock_t *lock_pkt = &pkt.lock;
-        MPIDI_Win_lock_queue *new_ptr = NULL;
+        MPIDI_RMA_Lock_entry *new_ptr = NULL;
 
         MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
         lock_pkt->lock_type = lock_type;
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index 73f67ef..e364673 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -828,7 +828,7 @@ static int create_derived_datatype(MPID_Request *req, MPID_Datatype **dtp)
 }
 
 
-static inline int perform_put_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_entry)
+static inline int perform_put_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry *lock_entry)
 {
     MPIDI_CH3_Pkt_put_t *put_pkt = &((lock_entry->pkt).put);
     MPIDI_VC_t *vc = NULL;
@@ -860,7 +860,7 @@ static inline int perform_put_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_qu
     goto fn_exit;
 }
 
-static inline int perform_get_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_entry)
+static inline int perform_get_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry *lock_entry)
 {
     MPIDI_CH3_Pkt_t upkt;
     MPIDI_CH3_Pkt_get_resp_t *get_resp_pkt = &upkt.get_resp;
@@ -955,7 +955,7 @@ static inline int perform_get_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_qu
 }
 
 
-static inline int perform_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_entry)
+static inline int perform_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry *lock_entry)
 {
     MPIDI_CH3_Pkt_accum_t *acc_pkt = &((lock_entry->pkt).accum);
     MPIDI_VC_t *vc = NULL;
@@ -995,7 +995,7 @@ static inline int perform_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_qu
 }
 
 
-static inline int perform_get_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_entry)
+static inline int perform_get_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry *lock_entry)
 {
     MPIDI_CH3_Pkt_t upkt;
     MPIDI_CH3_Pkt_get_accum_resp_t *get_accum_resp_pkt = &upkt.get_accum_resp;
@@ -1126,7 +1126,7 @@ static inline int perform_get_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_loc
 }
 
 
-static inline int perform_fop_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_entry)
+static inline int perform_fop_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry *lock_entry)
 {
     MPIDI_CH3_Pkt_t upkt;
     MPIDI_CH3_Pkt_fop_resp_t *fop_resp_pkt = &upkt.fop_resp;
@@ -1208,7 +1208,7 @@ static inline int perform_fop_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_qu
 }
 
 
-static inline int perform_cas_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_entry)
+static inline int perform_cas_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry *lock_entry)
 {
     MPIDI_CH3_Pkt_t upkt;
     MPIDI_CH3_Pkt_cas_resp_t *cas_resp_pkt = &upkt.cas_resp;
@@ -1288,7 +1288,7 @@ static inline int perform_cas_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_qu
 }
 
 
-static inline int perform_op_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_entry)
+static inline int perform_op_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_entry *lock_entry)
 {
     int mpi_errno = MPI_SUCCESS;
 
@@ -1361,7 +1361,7 @@ static int entered_count = 0;
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
 {
-    MPIDI_Win_lock_queue *lock_entry, *lock_entry_next;
+    MPIDI_RMA_Lock_entry *lock_entry, *lock_entry_next;
     int requested_lock, mpi_errno = MPI_SUCCESS, temp_entered_count;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RELEASE_LOCK);
 
@@ -1405,7 +1405,7 @@ int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
 	       only that one. */
 
 	    /* FIXME: MT: All queue accesses need to be made atomic */
-            lock_entry = (MPIDI_Win_lock_queue *) win_ptr->lock_queue;
+            lock_entry = (MPIDI_RMA_Lock_entry *) win_ptr->lock_queue;
             while (lock_entry) {
                 lock_entry_next = lock_entry->next;
 
@@ -1457,7 +1457,7 @@ int MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete( MPIDI_VC_t *vc,
     int requested_lock;
     MPI_Win target_win_handle;
     MPID_Win *win_ptr = NULL;
-    MPIDI_Win_lock_queue *lock_queue_entry = rreq->dev.lock_queue_entry;
+    MPIDI_RMA_Lock_entry *lock_queue_entry = rreq->dev.lock_queue_entry;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_PIGGYBACKLOCKOPRECVCOMPLETE);
 
diff --git a/src/mpid/ch3/src/mpid_rma.c b/src/mpid/ch3/src/mpid_rma.c
index 5f1e345..9d2e21c 100644
--- a/src/mpid/ch3/src/mpid_rma.c
+++ b/src/mpid/ch3/src/mpid_rma.c
@@ -387,8 +387,8 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
 
     /* FIXME: we can optimize by letting the user to pass WIN INFO hint if they will not use passive target,
        in such case we do not need to allocate window pool for lock entries. */
-    MPIU_CHKPMEM_MALLOC((*win_ptr)->lock_entry_pool_start, struct MPIDI_Win_lock_queue *,
-                        sizeof(MPIDI_Win_lock_queue) * MPIR_CVAR_CH3_RMA_LOCK_ENTRY_WIN_POOL_SIZE,
+    MPIU_CHKPMEM_MALLOC((*win_ptr)->lock_entry_pool_start, struct MPIDI_RMA_Lock_entry *,
+                        sizeof(MPIDI_RMA_Lock_entry) * MPIR_CVAR_CH3_RMA_LOCK_ENTRY_WIN_POOL_SIZE,
                         mpi_errno, "RMA lock entry pool");
     (*win_ptr)->lock_entry_pool = NULL;
     (*win_ptr)->lock_entry_pool_tail = NULL;

http://git.mpich.org/mpich.git/commitdiff/886b1d8ddc400fe3e7b314194e487fc370ed31c5

commit 886b1d8ddc400fe3e7b314194e487fc370ed31c5
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Tue Nov 25 11:21:30 2014 -0600

    Use window pool to manage lock requests
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpid_rma_lockqueue.h b/src/mpid/ch3/include/mpid_rma_lockqueue.h
index 9fe6fb2..fed97fe 100644
--- a/src/mpid/ch3/include/mpid_rma_lockqueue.h
+++ b/src/mpid/ch3/include/mpid_rma_lockqueue.h
@@ -19,35 +19,46 @@ MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_winlock_getlocallock);
 #define FUNCNAME MPIDI_CH3I_Win_lock_entry_alloc
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-static inline int MPIDI_CH3I_Win_lock_entry_alloc(MPID_Win * win_ptr,
-                                                  MPIDI_CH3_Pkt_t *pkt,
-                                                  MPIDI_Win_lock_queue **lock_entry)
+static inline MPIDI_Win_lock_queue *MPIDI_CH3I_Win_lock_entry_alloc(MPID_Win * win_ptr,
+                                                                    MPIDI_CH3_Pkt_t *pkt)
 {
     MPIDI_Win_lock_queue *new_ptr = NULL;
-    int mpi_errno = MPI_SUCCESS;
 
-    /* FIXME: we should use a lock entry queue to manage all this. */
+    if (win_ptr->lock_entry_pool != NULL) {
+        new_ptr = win_ptr->lock_entry_pool;
+        MPL_LL_DELETE(win_ptr->lock_entry_pool, win_ptr->lock_entry_pool_tail, new_ptr);
+    }
 
-    /* allocate lock queue entry */
-    MPIR_T_PVAR_TIMER_START(RMA, rma_lockqueue_alloc);
-    new_ptr = (MPIDI_Win_lock_queue *) MPIU_Malloc(sizeof(MPIDI_Win_lock_queue));
-    MPIR_T_PVAR_TIMER_END(RMA, rma_lockqueue_alloc);
-    if (!new_ptr) {
-        MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s",
-                             "MPIDI_Win_lock_queue");
+    if (new_ptr != NULL) {
+        new_ptr->next = NULL;
+        new_ptr->pkt = (*pkt);
+        new_ptr->data = NULL;
+        new_ptr->all_data_recved = 0;
     }
 
-    new_ptr->next = NULL;
-    new_ptr->pkt = (*pkt);
-    new_ptr->data = NULL;
-    new_ptr->all_data_recved = 0;
+    return new_ptr;
+}
+
+/* MPIDI_CH3I_Win_lock_entry_free(): put a lock queue entry back to
+ * the global pool. */
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Win_lock_entry_free
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+static inline int MPIDI_CH3I_Win_lock_entry_free(MPID_Win * win_ptr,
+                                                 MPIDI_Win_lock_queue *lock_entry)
+{
+    int mpi_errno = MPI_SUCCESS;
+
+    if (lock_entry->data != NULL) {
+        MPIU_Free(lock_entry->data);
+    }
 
-    (*lock_entry) = new_ptr;
+    /* use PREPEND when return objects back to the pool
+       in order to improve cache performance */
+    MPL_LL_PREPEND(win_ptr->lock_entry_pool, win_ptr->lock_entry_pool_tail, lock_entry);
 
- fn_exit:
     return mpi_errno;
- fn_fail:
-    goto fn_exit;
 }
 
 #endif  /* MPID_RMA_ISSUE_H_INCLUDED */
diff --git a/src/mpid/ch3/include/mpidpre.h b/src/mpid/ch3/include/mpidpre.h
index c87e4d5..f6da014 100644
--- a/src/mpid/ch3/include/mpidpre.h
+++ b/src/mpid/ch3/include/mpidpre.h
@@ -350,6 +350,9 @@ extern MPIDI_RMA_Pkt_orderings_t *MPIDI_RMA_Pkt_orderings;
     int outstanding_locks; /* when issuing multiple lock requests in     \
                             MPI_WIN_LOCK_ALL, this counter keeps track   \
                             of number of locks not being granted yet. */ \
+    struct MPIDI_Win_lock_queue *lock_entry_pool_start;                  \
+    struct MPIDI_Win_lock_queue *lock_entry_pool;                        \
+    struct MPIDI_Win_lock_queue *lock_entry_pool_tail;                   \
 
 #ifdef MPIDI_CH3_WIN_DECL
 #define MPID_DEV_WIN_DECL \
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index cf4dfcd..bae66d3 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -235,9 +235,11 @@ static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
 
     (*reqp) = NULL;
 
-    mpi_errno = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, pkt, &new_ptr);
-    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-    MPIU_Assert(new_ptr != NULL);
+    new_ptr = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, pkt);
+    if (new_ptr == NULL) {
+        /* FIXME: we run out of resources of lock requests, needs to
+           send LOCK DISCARDED packet back to origin */
+    }
     MPL_LL_APPEND(win_ptr->lock_queue, win_ptr->lock_queue_tail, new_ptr);
 
     if (pkt->type == MPIDI_CH3_PKT_LOCK ||
@@ -386,9 +388,11 @@ static inline int acquire_local_lock(MPID_Win * win_ptr, int lock_type)
         lock_pkt->lock_type = lock_type;
         lock_pkt->origin_rank = win_ptr->comm_ptr->rank;
 
-        mpi_errno = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, &pkt, &new_ptr);
-        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-        MPIU_Assert(new_ptr != NULL);
+        new_ptr = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, &pkt);
+        if (new_ptr == NULL) {
+            /* FIXME: we run out of resources of lock requests, needs to
+               send LOCK DISCARDED packet back to origin */
+        }
         MPL_LL_APPEND(win_ptr->lock_queue, win_ptr->lock_queue_tail, new_ptr);
 
         new_ptr->all_data_recved = 1;
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index e4a03de..73f67ef 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -1417,13 +1417,12 @@ int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
                     mpi_errno = perform_op_in_lock_queue(win_ptr, lock_entry);
                     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-                    /* free data buffer in lock queue entry */
-                    if (lock_entry->data != NULL)
-                        MPIU_Free(lock_entry->data);
-			    
                     /* dequeue entry from lock queue */
                     MPL_LL_DELETE(win_ptr->lock_queue, win_ptr->lock_queue_tail, lock_entry);
-                    MPIU_Free(lock_entry);
+
+                    /* free this entry */
+                    mpi_errno = MPIDI_CH3I_Win_lock_entry_free(win_ptr, lock_entry);
+                    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
                     /* if the granted lock is exclusive,
                        no need to continue */
@@ -1482,13 +1481,12 @@ int MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete( MPIDI_VC_t *vc,
         mpi_errno = perform_op_in_lock_queue(win_ptr, lock_queue_entry);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-        /* free data buffer in lock queue entry */
-        if (lock_queue_entry->data != NULL)
-            MPIU_Free(lock_queue_entry->data);
-
         /* dequeue entry from lock queue */
         MPL_LL_DELETE(win_ptr->lock_queue, win_ptr->lock_queue_tail, lock_queue_entry);
-        MPIU_Free(lock_queue_entry);
+
+        /* free this entry */
+        mpi_errno = MPIDI_CH3I_Win_lock_entry_free(win_ptr, lock_queue_entry);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
     }
     /* If try acquiring lock failed, just leave the lock queue entry in the queue with
        all_data_recved marked as 1, release_lock() function will traverse the queue
diff --git a/src/mpid/ch3/src/mpid_rma.c b/src/mpid/ch3/src/mpid_rma.c
index 0eb2621..5f1e345 100644
--- a/src/mpid/ch3/src/mpid_rma.c
+++ b/src/mpid/ch3/src/mpid_rma.c
@@ -290,7 +290,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
     MPID_Comm *win_comm_ptr;
     int win_target_pool_size;
     MPIDI_RMA_Win_list_t *win_elem;
-    MPIU_CHKPMEM_DECL(4);
+    MPIU_CHKPMEM_DECL(5);
     MPIDI_STATE_DECL(MPID_STATE_WIN_INIT);
 
     MPIDI_FUNC_ENTER(MPID_STATE_WIN_INIT);
@@ -385,6 +385,18 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
         (*win_ptr)->slots[i].target_list_tail = NULL;
     }
 
+    /* FIXME: we can optimize by letting the user to pass WIN INFO hint if they will not use passive target,
+       in such case we do not need to allocate window pool for lock entries. */
+    MPIU_CHKPMEM_MALLOC((*win_ptr)->lock_entry_pool_start, struct MPIDI_Win_lock_queue *,
+                        sizeof(MPIDI_Win_lock_queue) * MPIR_CVAR_CH3_RMA_LOCK_ENTRY_WIN_POOL_SIZE,
+                        mpi_errno, "RMA lock entry pool");
+    (*win_ptr)->lock_entry_pool = NULL;
+    (*win_ptr)->lock_entry_pool_tail = NULL;
+    for (i = 0; i < MPIR_CVAR_CH3_RMA_LOCK_ENTRY_WIN_POOL_SIZE; i++) {
+        MPL_LL_APPEND((*win_ptr)->lock_entry_pool, (*win_ptr)->lock_entry_pool_tail,
+                      &((*win_ptr)->lock_entry_pool_start[i]));
+    }
+
     MPID_WIN_FTABLE_SET_DEFAULTS(win_ptr);
 
     /* enqueue window into the global list */
diff --git a/src/mpid/ch3/src/mpidi_rma.c b/src/mpid/ch3/src/mpidi_rma.c
index 88ca0f8..45acc76 100644
--- a/src/mpid/ch3/src/mpidi_rma.c
+++ b/src/mpid/ch3/src/mpidi_rma.c
@@ -63,6 +63,30 @@ cvars:
         targets) that stores information about RMA targets that
         could not be issued immediatly.  Requires a positive value.
 
+    - name        : MPIR_CVAR_CH3_RMA_LOCK_ENTRY_WIN_POOL_SIZE
+      category    : CH3
+      type        : int
+      default     : 256
+      class       : none
+      verbosity   : MPI_T_VERBOSITY_USER_BASIC
+      scope       : MPI_T_SCOPE_ALL_EQ
+      description : >-
+        Size of the window-private RMA lock entries pool (in number of
+        lock entries) that stores information about RMA lock requests that
+        could not be satisfied immediatly.  Requires a positive value.
+
+    - name        : MPIR_CVAR_CH3_RMA_LOCK_ENTRY_GLOBAL_POOL_SIZE
+      category    : CH3
+      type        : int
+      default     : 16384
+      class       : none
+      verbosity   : MPI_T_VERBOSITY_USER_BASIC
+      scope       : MPI_T_SCOPE_ALL_EQ
+      description : >-
+        Size of the Global RMA lock entries pool (in number of
+        lock entries) that stores information about RMA lock requests that
+        could not be satisfied immediatly.  Requires a positive value.
+
 === END_MPI_T_CVAR_INFO_BLOCK ===
 */
 
@@ -214,6 +238,7 @@ int MPIDI_Win_free(MPID_Win ** win_ptr)
     MPIU_Free((*win_ptr)->op_pool_start);
     MPIU_Free((*win_ptr)->target_pool_start);
     MPIU_Free((*win_ptr)->slots);
+    MPIU_Free((*win_ptr)->lock_entry_pool_start);
 
     /* Free the attached buffer for windows created with MPI_Win_allocate() */
     if ((*win_ptr)->create_flavor == MPI_WIN_FLAVOR_ALLOCATE ||

http://git.mpich.org/mpich.git/commitdiff/89d8f6c19de179e993dc993e4b5931bd10c4ce73

commit 89d8f6c19de179e993dc993e4b5931bd10c4ce73
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Thu Dec 11 23:28:29 2014 -0600

    Set window epoch state at proper places in RMA calls.
    
    (1) Win_fence/Win_start: set access state right after we
        issue synchronization calls.
    (2) Win_post: set exposure state at beginning.
    (3) Win_wait/Win_test: set exposure state at end.
    (4) Win_lock/Win_lock_all: set access state at beginning.
    (5) Win_unlock/Win_unlock_all: set access state at end.
    
    No reviewer.

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 452075b..f2d562e 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -344,6 +344,7 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
             mpi_errno = MPIR_Ibarrier_impl(win_ptr->comm_ptr, &(win_ptr->fence_sync_req));
             if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
+            /* Set window access state properly. */
             win_ptr->states.access_state = MPIDI_RMA_FENCE_ISSUED;
             num_active_issued_win++;
 
@@ -411,6 +412,7 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
        in this function call. */
     progress_engine_triggered = 1;
 
+    /* Set window access state properly. */
     if (assert & MPI_MODE_NOSUCCEED) {
         win_ptr->states.access_state = MPIDI_RMA_NONE;
     }
@@ -488,6 +490,7 @@ int MPIDI_Win_post(MPID_Group * post_grp_ptr, int assert, MPID_Win * win_ptr)
         OPA_read_write_barrier();
     }
 
+    /* Set window exposure state properly. */
     win_ptr->states.exposure_state = MPIDI_RMA_PSCW_EXPO;
 
     win_ptr->at_completion_counter += post_grp_ptr->size;
@@ -660,10 +663,11 @@ int MPIDI_Win_start(MPID_Group * group_ptr, int assert, MPID_Win * win_ptr)
         }
     }
 
+ finish_start:
+    /* Set window access state properly. */
     win_ptr->states.access_state = MPIDI_RMA_PSCW_ISSUED;
     num_active_issued_win++;
 
- finish_start:
     /* BEGINNING synchronization: the following counter should be zero. */
     MPIU_Assert(win_ptr->accumulated_ops_cnt == 0);
 
@@ -780,9 +784,10 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
     mpi_errno = MPIDI_CH3I_RMA_Cleanup_targets_win(win_ptr);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
+ finish_complete:
+    /* Set window access state properly. */
     win_ptr->states.access_state = MPIDI_RMA_NONE;
 
- finish_complete:
     /* free start group stored in window */
     MPIU_Free(win_ptr->start_ranks_in_win_grp);
     win_ptr->start_ranks_in_win_grp = NULL;
@@ -846,9 +851,10 @@ int MPIDI_Win_wait(MPID_Win * win_ptr)
         progress_engine_triggered = 1;
     }
 
+ finish_wait:
+    /* Set window exposure state properly. */
     win_ptr->states.exposure_state = MPIDI_RMA_NONE;
 
- finish_wait:
     if (!progress_engine_triggered) {
         /* In some cases (e.g. target is myself, or process on SHM),
            this function call does not go through the progress engine.
@@ -898,6 +904,7 @@ int MPIDI_Win_test(MPID_Win * win_ptr, int *flag)
 
     *flag = (win_ptr->at_completion_counter) ? 0 : 1;
     if (*flag) {
+        /* Set window exposure state properly. */
         win_ptr->states.exposure_state = MPIDI_RMA_NONE;
 
         /* Ensure ordering of load/store operations. */
@@ -967,6 +974,7 @@ int MPIDI_Win_lock(int lock_type, int dest, int assert, MPID_Win * win_ptr)
     /* Error handling is finished. */
 
     if (win_ptr->lock_epoch_count == 0) {
+        /* Set window access state properly. */
         win_ptr->states.access_state = MPIDI_RMA_PER_TARGET;
         num_passive_win++;
     }
@@ -1103,6 +1111,14 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
     } while (!remote_completed);
 
  finish_unlock:
+    win_ptr->lock_epoch_count--;
+    if (win_ptr->lock_epoch_count == 0) {
+        /* Set window access state properly. */
+        win_ptr->states.access_state = MPIDI_RMA_NONE;
+        num_passive_win--;
+        MPIU_Assert(num_passive_win >= 0);
+    }
+
     if (target != NULL) {
         /* ENDING synchronization: correctly decrement the following counter. */
         win_ptr->accumulated_ops_cnt -= target->accumulated_ops_cnt;
@@ -1115,13 +1131,6 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
     }
 
-    win_ptr->lock_epoch_count--;
-    if (win_ptr->lock_epoch_count == 0) {
-        win_ptr->states.access_state = MPIDI_RMA_NONE;
-        num_passive_win--;
-        MPIU_Assert(num_passive_win >= 0);
-    }
-
     if (!progress_engine_triggered) {
         /* In some cases (e.g. target is myself, or process on SHM),
            this function call does not go through the progress engine.
@@ -1394,6 +1403,7 @@ int MPIDI_Win_lock_all(int assert, MPID_Win * win_ptr)
                         win_ptr->states.access_state != MPIDI_RMA_FENCE_GRANTED,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
 
+    /* Set window access state properly. */
     if (assert & MPI_MODE_NOCHECK)
         win_ptr->states.access_state = MPIDI_RMA_LOCK_ALL_GRANTED;
     else
@@ -1586,11 +1596,12 @@ int MPIDI_Win_unlock_all(MPID_Win * win_ptr)
     mpi_errno = MPIDI_CH3I_RMA_Cleanup_targets_win(win_ptr);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
+ finish_unlock_all:
+    /* Set window access state properly. */
     win_ptr->states.access_state = MPIDI_RMA_NONE;
     num_passive_win--;
     MPIU_Assert(num_passive_win >= 0);
 
- finish_unlock_all:
     /* reset lock_all assert on window. */
     win_ptr->lock_all_assert = 0;
 

http://git.mpich.org/mpich.git/commitdiff/ff6e5f9b52c6eecfcc24c1b443770751e36225e0

commit ff6e5f9b52c6eecfcc24c1b443770751e36225e0
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Thu Dec 11 23:19:09 2014 -0600

    Reset/release window attributes in RMA sync calls.
    
    In Win_complete, release all requests on window; in
    Win_unlock_all, reset lock_assert on window.
    
    No reviewer.

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index a0e800e..452075b 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -780,16 +780,14 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
     mpi_errno = MPIDI_CH3I_RMA_Cleanup_targets_win(win_ptr);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
+    win_ptr->states.access_state = MPIDI_RMA_NONE;
 
+ finish_complete:
     /* free start group stored in window */
     MPIU_Free(win_ptr->start_ranks_in_win_grp);
     win_ptr->start_ranks_in_win_grp = NULL;
-
     MPIU_Assert(win_ptr->start_req == NULL);
 
-    win_ptr->states.access_state = MPIDI_RMA_NONE;
-
- finish_complete:
     /* Make sure that all targets are freed. */
     MPIU_Assert(win_ptr->non_empty_slots == 0);
 
@@ -1588,14 +1586,14 @@ int MPIDI_Win_unlock_all(MPID_Win * win_ptr)
     mpi_errno = MPIDI_CH3I_RMA_Cleanup_targets_win(win_ptr);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-
-    win_ptr->lock_all_assert = 0;
-
     win_ptr->states.access_state = MPIDI_RMA_NONE;
     num_passive_win--;
     MPIU_Assert(num_passive_win >= 0);
 
  finish_unlock_all:
+    /* reset lock_all assert on window. */
+    win_ptr->lock_all_assert = 0;
+
     /* Make sure that all targets are freed. */
     MPIU_Assert(win_ptr->non_empty_slots == 0);
 

http://git.mpich.org/mpich.git/commitdiff/264be641c733a3a6a6d465c1e888eb68374567d6

commit 264be641c733a3a6a6d465c1e888eb68374567d6
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Thu Dec 11 23:04:05 2014 -0600

    Bug-fix: always allocate ranks array in Win_start.
    
    We always need to allocate a array to store group ranks
    even for MPI_MODE_NOCHECK case, because we need always
    need that in Win_complete.
    
    No reviewer.

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index fd24a4f..a0e800e 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -583,6 +583,13 @@ int MPIDI_Win_start(MPID_Group * group_ptr, int assert, MPID_Win * win_ptr)
 
     win_ptr->start_grp_size = group_ptr->size;
 
+    MPIU_CHKPMEM_MALLOC(win_ptr->start_ranks_in_win_grp, int *,
+                        win_ptr->start_grp_size * sizeof(int),
+                        mpi_errno, "win_ptr->start_ranks_in_win_grp");
+
+    mpi_errno = fill_ranks_in_win_grp(win_ptr, group_ptr, win_ptr->start_ranks_in_win_grp);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
     if ((assert & MPI_MODE_NOCHECK) == 0) {
         int i, intra_cnt;
         MPI_Request *intra_start_req = NULL;
@@ -591,10 +598,6 @@ int MPIDI_Win_start(MPID_Group * group_ptr, int assert, MPID_Win * win_ptr)
         int rank = comm_ptr->rank;
 
         /* wait for messages from local processes */
-        MPIU_CHKPMEM_MALLOC(win_ptr->start_ranks_in_win_grp, int *, win_ptr->start_grp_size * sizeof(int),
-                            mpi_errno, "win_ptr->start_ranks_in_win_grp");
-        mpi_errno = fill_ranks_in_win_grp(win_ptr, group_ptr, win_ptr->start_ranks_in_win_grp);
-        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
         /* post IRECVs */
         MPIU_CHKPMEM_MALLOC(win_ptr->start_req, MPI_Request *,

http://git.mpich.org/mpich.git/commitdiff/6b56d44a209a75c685673fafd738f160dfc6400a

commit 6b56d44a209a75c685673fafd738f160dfc6400a
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Thu Dec 11 22:58:54 2014 -0600

    Check if all targets are freed at end of RMA sync calls.
    
    For Win_fence, Win_complete and Win_unlock_all, check if
    all targets are freed at the end of function calls.
    
    No reviewer.

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 2ff6033..fd24a4f 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -403,8 +403,6 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
     mpi_errno = MPIDI_CH3I_RMA_Cleanup_targets_win(win_ptr);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-    MPIU_Assert(win_ptr->non_empty_slots == 0);
-
     mpi_errno = MPIR_Barrier_impl(win_ptr->comm_ptr, &errflag);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
     MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
@@ -421,6 +419,9 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
     }
 
  finish_fence:
+    /* Make sure that all targets are freed. */
+    MPIU_Assert(win_ptr->non_empty_slots == 0);
+
     if (assert & MPI_MODE_NOPRECEDE) {
         /* BEGINNING synchronization: the following counter should be zero. */
         MPIU_Assert(win_ptr->accumulated_ops_cnt == 0);
@@ -776,7 +777,6 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
     mpi_errno = MPIDI_CH3I_RMA_Cleanup_targets_win(win_ptr);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-    MPIU_Assert(win_ptr->non_empty_slots == 0);
 
     /* free start group stored in window */
     MPIU_Free(win_ptr->start_ranks_in_win_grp);
@@ -787,6 +787,9 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
     win_ptr->states.access_state = MPIDI_RMA_NONE;
 
  finish_complete:
+    /* Make sure that all targets are freed. */
+    MPIU_Assert(win_ptr->non_empty_slots == 0);
+
     /* ENDING synchronization: correctly decrement the following counter. */
     win_ptr->accumulated_ops_cnt = 0;
 
@@ -1582,7 +1585,6 @@ int MPIDI_Win_unlock_all(MPID_Win * win_ptr)
     mpi_errno = MPIDI_CH3I_RMA_Cleanup_targets_win(win_ptr);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-    MPIU_Assert(win_ptr->non_empty_slots == 0);
 
     win_ptr->lock_all_assert = 0;
 
@@ -1591,6 +1593,9 @@ int MPIDI_Win_unlock_all(MPID_Win * win_ptr)
     MPIU_Assert(num_passive_win >= 0);
 
  finish_unlock_all:
+    /* Make sure that all targets are freed. */
+    MPIU_Assert(win_ptr->non_empty_slots == 0);
+
     /* ENDING synchronization: correctly decrement the following counter. */
     win_ptr->accumulated_ops_cnt = 0;
 

http://git.mpich.org/mpich.git/commitdiff/6f8c3e59c2b8d01096e3b264af6ae3c056a6c5ce

commit 6f8c3e59c2b8d01096e3b264af6ae3c056a6c5ce
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Thu Dec 11 23:15:34 2014 -0600

    Do memory barriers at proper places in RMA sync calls.
    
    We call memory barriers at proper places in RMA sync calls
    as following, and remove unnecessary memory barriers:
    
    (1) Win_fence: very beginning and very end.
    (2) Win_post/Win_complete: very beginning.
    (3) Win_start/Win_wait/Win_test: very end.
    (4) Win_lock/Win_lock_all: very end.
    (5) Win_unlock/Win_unlock_all: very beginning.
    (6) Win_flush/Win_flush_local/Win_flush_all/Win_flush_local_all: very beginning.
    
    About the reason of doing this, please refer to comments
    at the beginning of src/mpid/ch3/src/ch3u_rma_sync.c.
    
    No reviewer.

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 116ca8e..2ff6033 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -308,6 +308,11 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
                         win_ptr->states.exposure_state != MPIDI_RMA_NONE,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
 
+    /* Ensure ordering of load/store operations. */
+    if (win_ptr->shm_allocated == TRUE) {
+        OPA_read_write_barrier();
+    }
+
     if (assert & MPI_MODE_NOPRECEDE) {
         if (assert & MPI_MODE_NOSUCCEED) {
             goto finish_fence;
@@ -327,9 +332,6 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
             if (win_ptr->shm_allocated == TRUE) {
                 MPID_Comm *node_comm_ptr = win_ptr->comm_ptr->node_comm;
 
-                /* Ensure ordering of load/store operations. */
-                OPA_read_write_barrier();
-
                 mpi_errno = MPIR_Barrier_impl(node_comm_ptr, &errflag);
                 if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
                 MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
@@ -337,9 +339,6 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
                 /* Mark that we triggered the progress engine
                    in this function call. */
                 progress_engine_triggered = 1;
-
-                /* Ensure ordering of load/store operations. */
-                OPA_read_write_barrier();
             }
 
             mpi_errno = MPIR_Ibarrier_impl(win_ptr->comm_ptr, &(win_ptr->fence_sync_req));
@@ -406,11 +405,6 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
 
     MPIU_Assert(win_ptr->non_empty_slots == 0);
 
-    /* Ensure ordering of load/store operations. */
-    if (win_ptr->shm_allocated == TRUE) {
-        OPA_read_write_barrier();
-    }
-
     mpi_errno = MPIR_Barrier_impl(win_ptr->comm_ptr, &errflag);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
     MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
@@ -419,11 +413,6 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
        in this function call. */
     progress_engine_triggered = 1;
 
-    /* Ensure ordering of load/store operations. */
-    if (win_ptr->shm_allocated == TRUE) {
-        OPA_read_write_barrier();
-    }
-
     if (assert & MPI_MODE_NOSUCCEED) {
         win_ptr->states.access_state = MPIDI_RMA_NONE;
     }
@@ -458,6 +447,11 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
         }
     }
 
+    /* Ensure ordering of load/store operations. */
+    if (win_ptr->shm_allocated == TRUE) {
+        OPA_read_write_barrier();
+    }
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_FENCE);
     return mpi_errno;
@@ -488,15 +482,15 @@ int MPIDI_Win_post(MPID_Group * post_grp_ptr, int assert, MPID_Win * win_ptr)
     MPIU_ERR_CHKANDJUMP(win_ptr->states.exposure_state != MPIDI_RMA_NONE,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
 
-    win_ptr->states.exposure_state = MPIDI_RMA_PSCW_EXPO;
-
-    win_ptr->at_completion_counter += post_grp_ptr->size;
-
     /* Ensure ordering of load/store operations. */
     if (win_ptr->shm_allocated == TRUE) {
         OPA_read_write_barrier();
     }
 
+    win_ptr->states.exposure_state = MPIDI_RMA_PSCW_EXPO;
+
+    win_ptr->at_completion_counter += post_grp_ptr->size;
+
     if ((assert & MPI_MODE_NOCHECK) == 0) {
         MPI_Request *req;
         MPI_Status *status;
@@ -660,11 +654,6 @@ int MPIDI_Win_start(MPID_Group * group_ptr, int assert, MPID_Win * win_ptr)
             }
             /* --END ERROR HANDLING-- */
         }
-
-        if (win_ptr->shm_allocated == TRUE) {
-            /* Ensure ordering of load/store operations */
-            OPA_read_write_barrier();
-        }
     }
 
     win_ptr->states.access_state = MPIDI_RMA_PSCW_ISSUED;
@@ -676,6 +665,11 @@ int MPIDI_Win_start(MPID_Group * group_ptr, int assert, MPID_Win * win_ptr)
 
     MPIU_Assert(win_ptr->active_req_cnt == 0);
 
+    /* Ensure ordering of load/store operations. */
+    if (win_ptr->shm_allocated == TRUE) {
+        OPA_read_write_barrier();
+    }
+
  fn_exit:
     MPIU_CHKLMEM_FREEALL();
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_START);
@@ -709,6 +703,11 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
                         win_ptr->states.access_state != MPIDI_RMA_PSCW_GRANTED,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
 
+    /* Ensure ordering of load/store operations. */
+    if (win_ptr->shm_allocated == TRUE) {
+        OPA_read_write_barrier();
+    }
+
     if (win_ptr->states.access_state == MPIDI_RMA_PSCW_ISSUED) {
         while (win_ptr->states.access_state != MPIDI_RMA_PSCW_GRANTED) {
             mpi_errno = wait_progress_engine();
@@ -779,11 +778,6 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
 
     MPIU_Assert(win_ptr->non_empty_slots == 0);
 
-    /* Ensure ordering of load/store operations. */
-    if (win_ptr->shm_allocated == TRUE) {
-        OPA_read_write_barrier();
-    }
-
     /* free start group stored in window */
     MPIU_Free(win_ptr->start_ranks_in_win_grp);
     win_ptr->start_ranks_in_win_grp = NULL;
@@ -848,11 +842,6 @@ int MPIDI_Win_wait(MPID_Win * win_ptr)
         progress_engine_triggered = 1;
     }
 
-    /* Ensure ordering of load/store operations. */
-    if (win_ptr->shm_allocated == TRUE) {
-        OPA_read_write_barrier();
-    }
-
     win_ptr->states.exposure_state = MPIDI_RMA_NONE;
 
  finish_wait:
@@ -869,6 +858,11 @@ int MPIDI_Win_wait(MPID_Win * win_ptr)
             MPIU_ERR_POP(mpi_errno);
     }
 
+    /* Ensure ordering of load/store operations. */
+    if (win_ptr->shm_allocated == TRUE) {
+        OPA_read_write_barrier();
+    }
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_WAIT);
     return mpi_errno;
@@ -900,12 +894,12 @@ int MPIDI_Win_test(MPID_Win * win_ptr, int *flag)
 
     *flag = (win_ptr->at_completion_counter) ? 0 : 1;
     if (*flag) {
+        win_ptr->states.exposure_state = MPIDI_RMA_NONE;
+
         /* Ensure ordering of load/store operations. */
         if (win_ptr->shm_allocated == TRUE) {
             OPA_read_write_barrier();
         }
-
-        win_ptr->states.exposure_state = MPIDI_RMA_NONE;
     }
 
   fn_exit:
@@ -1277,6 +1271,11 @@ int MPIDI_Win_flush_local(int dest, MPID_Win * win_ptr)
                         win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_GRANTED,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
 
+    /* Ensure ordering of load/store operations. */
+    if (win_ptr->shm_allocated) {
+        OPA_read_write_barrier();
+    }
+
     if (dest == MPI_PROC_NULL)
         goto finish_flush_local;
 
@@ -1433,17 +1432,17 @@ int MPIDI_Win_lock_all(int assert, MPID_Win * win_ptr)
         }
     }
 
-    /* Ensure ordering of load/store operations. */
-    if (win_ptr->shm_allocated == TRUE) {
-        OPA_read_write_barrier();
-    }
-
  finish_lock_all:
     /* BEGINNING synchronization: the following counter should be zero. */
     MPIU_Assert(win_ptr->accumulated_ops_cnt == 0);
 
     MPIU_Assert(win_ptr->active_req_cnt == 0);
 
+    /* Ensure ordering of load/store operations. */
+    if (win_ptr->shm_allocated == TRUE) {
+        OPA_read_write_barrier();
+    }
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_LOCK_ALL);
     return mpi_errno;
@@ -1736,6 +1735,11 @@ int MPIDI_Win_flush_local_all(MPID_Win * win_ptr)
                         win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_GRANTED,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
 
+    /* Ensure ordering of load/store operations. */
+    if (win_ptr->shm_allocated == TRUE) {
+        OPA_read_write_barrier();
+    }
+
     /* Set sync_flag in sync struct. */
     for (i = 0; i < win_ptr->num_slots; i++) {
         curr_target = win_ptr->slots[i].target_list;

http://git.mpich.org/mpich.git/commitdiff/fb6a441b9fd125f4ec9b183262d917d4d1d34850

commit fb6a441b9fd125f4ec9b183262d917d4d1d34850
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Thu Dec 11 15:59:14 2014 -0600

    Poke progress engine in RMA sync call when needed
    
    In ending RMA synchronization calls, we poke the
    progress engine at last if we never poke it before.
    Because some program execution depends on the
    incoming events in progress engine, if we never
    process them we may cause deadlock in the program.
    
    No reviewer.

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 683c00f..116ca8e 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -296,6 +296,7 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
     int local_completed = 0, remote_completed = 0;
     MPIDI_RMA_Target_t *curr_target = NULL;
     mpir_errflag_t errflag = MPIR_ERR_NONE;
+    int progress_engine_triggered = 0;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FENCE);
 
@@ -333,6 +334,10 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
                 if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
                 MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
+                /* Mark that we triggered the progress engine
+                   in this function call. */
+                progress_engine_triggered = 1;
+
                 /* Ensure ordering of load/store operations. */
                 OPA_read_write_barrier();
             }
@@ -352,6 +357,10 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
             mpi_errno = wait_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
+
+            /* Mark that we triggered the progress engine
+               in this function call. */
+            progress_engine_triggered = 1;
         }
     }
 
@@ -384,6 +393,10 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
             mpi_errno = wait_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
+
+            /* Mark that we triggered the progress engine
+               in this function call. */
+            progress_engine_triggered = 1;
         }
     } while (!remote_completed);
 
@@ -402,6 +415,10 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
     MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
+    /* Mark that we triggered the progress engine
+       in this function call. */
+    progress_engine_triggered = 1;
+
     /* Ensure ordering of load/store operations. */
     if (win_ptr->shm_allocated == TRUE) {
         OPA_read_write_barrier();
@@ -426,6 +443,21 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
 
     MPIU_Assert(win_ptr->active_req_cnt == 0);
 
+    if (!(assert & MPI_MODE_NOPRECEDE)) {
+        if (!progress_engine_triggered) {
+            /* In some cases (e.g. target is myself, or process on SHM),
+               this function call does not go through the progress engine.
+               Therefore, it is possible that this process never process
+               events coming from other processes. This may cause deadlock in
+               applications where the program execution on this process depends
+               on the happening of events from other processes. Here we poke
+               the progress engine once to avoid such issue.  */
+            mpi_errno = poke_progress_engine();
+            if (mpi_errno != MPI_SUCCESS)
+                MPIU_ERR_POP(mpi_errno);
+        }
+    }
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_FENCE);
     return mpi_errno;
@@ -666,6 +698,7 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
     int local_completed = 0, remote_completed = 0;
     MPID_Comm *win_comm_ptr = win_ptr->comm_ptr;
     MPIDI_RMA_Target_t *curr_target;
+    int progress_engine_triggered = 0;
     int made_progress;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_COMPLETE);
 
@@ -681,6 +714,10 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
             mpi_errno = wait_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
+
+            /* Mark that we triggered the progress engine
+               in this function call. */
+            progress_engine_triggered = 1;
         }
     }
 
@@ -729,6 +766,10 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
             mpi_errno = wait_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
+
+            /* Mark that we triggered the progress engine
+               in this function call. */
+            progress_engine_triggered = 1;
         }
     } while (!remote_completed);
 
@@ -757,6 +798,19 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
 
     MPIU_Assert(win_ptr->active_req_cnt == 0);
 
+    if (!progress_engine_triggered) {
+        /* In some cases (e.g. target is myself, or process on SHM),
+           this function call does not go through the progress engine.
+           Therefore, it is possible that this process never process
+           events coming from other processes. This may cause deadlock in
+           applications where the program execution on this process depends
+           on the happening of events from other processes. Here we poke
+           the progress engine once to avoid such issue.  */
+        mpi_errno = poke_progress_engine();
+        if (mpi_errno != MPI_SUCCESS)
+            MPIU_ERR_POP(mpi_errno);
+    }
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_COMPLETE);
     return mpi_errno;
@@ -774,6 +828,7 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 int MPIDI_Win_wait(MPID_Win * win_ptr)
 {
+    int progress_engine_triggered = 0;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_WAIT);
 
@@ -787,6 +842,10 @@ int MPIDI_Win_wait(MPID_Win * win_ptr)
         mpi_errno = wait_progress_engine();
         if (mpi_errno != MPI_SUCCESS)
             MPIU_ERR_POP(mpi_errno);
+
+        /* Mark that we triggered the progress engine
+           in this function call. */
+        progress_engine_triggered = 1;
     }
 
     /* Ensure ordering of load/store operations. */
@@ -796,6 +855,20 @@ int MPIDI_Win_wait(MPID_Win * win_ptr)
 
     win_ptr->states.exposure_state = MPIDI_RMA_NONE;
 
+ finish_wait:
+    if (!progress_engine_triggered) {
+        /* In some cases (e.g. target is myself, or process on SHM),
+           this function call does not go through the progress engine.
+           Therefore, it is possible that this process never process
+           events coming from other processes. This may cause deadlock in
+           applications where the program execution on this process depends
+           on the happening of events from other processes. Here we poke
+           the progress engine once to avoid such issue.  */
+        mpi_errno = poke_progress_engine();
+        if (mpi_errno != MPI_SUCCESS)
+            MPIU_ERR_POP(mpi_errno);
+    }
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_WAIT);
     return mpi_errno;
@@ -968,6 +1041,7 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
     int local_completed = 0, remote_completed = 0;
     MPIDI_RMA_Target_t *target = NULL;
     enum MPIDI_RMA_sync_types sync_flag;
+    int progress_engine_triggered = 0;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_UNLOCK);
 
@@ -984,17 +1058,6 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
     if (dest == MPI_PROC_NULL)
         goto finish_unlock;
 
-    /* When the process tries to acquire the lock on itself, it does not
-       go through the progress engine. Therefore, it is possible that
-       one process always grants the lock to itself but never process
-       events coming from other processes. This may cause deadlock in
-       applications where the program execution on target process depends
-       on the happening of events from other processes. Here we poke
-       the progress engine once to avoid such issue.  */
-    mpi_errno = poke_progress_engine();
-    if (mpi_errno != MPI_SUCCESS)
-        MPIU_ERR_POP(mpi_errno);
-
     /* Find or recreate target. */
     mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, dest, &target);
     if (mpi_errno != MPI_SUCCESS)
@@ -1034,6 +1097,10 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
             mpi_errno = wait_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
+
+            /* Mark that we triggered the progress engine
+               in this function call. */
+            progress_engine_triggered = 1;
         }
     } while (!remote_completed);
 
@@ -1057,6 +1124,19 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
         MPIU_Assert(num_passive_win >= 0);
     }
 
+    if (!progress_engine_triggered) {
+        /* In some cases (e.g. target is myself, or process on SHM),
+           this function call does not go through the progress engine.
+           Therefore, it is possible that this process never process
+           events coming from other processes. This may cause deadlock in
+           applications where the program execution on this process depends
+           on the happening of events from other processes. Here we poke
+           the progress engine once to avoid such issue.  */
+        mpi_errno = poke_progress_engine();
+        if (mpi_errno != MPI_SUCCESS)
+            MPIU_ERR_POP(mpi_errno);
+    }
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_UNLOCK);
     return mpi_errno;
@@ -1077,6 +1157,7 @@ int MPIDI_Win_flush(int dest, MPID_Win *win_ptr)
     int local_completed = 0, remote_completed = 0;
     int rank = win_ptr->comm_ptr->rank;
     MPIDI_RMA_Target_t *target = NULL;
+    int progress_engine_triggered = 0;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FLUSH);
 
@@ -1096,17 +1177,6 @@ int MPIDI_Win_flush(int dest, MPID_Win *win_ptr)
     if (dest == MPI_PROC_NULL)
         goto finish_flush;
 
-    /* When the process tries to acquire the lock on itself, it does not
-       go through the progress engine. Therefore, it is possible that
-       one process always grants the lock to itself but never process
-       events coming from other processes. This may cause deadlock in
-       applications where the program execution on target process depends
-       on the happening of events from other processes. Here we poke
-       the progress engine once to avoid such issue.  */
-    mpi_errno = poke_progress_engine();
-    if (mpi_errno != MPI_SUCCESS)
-        MPIU_ERR_POP(mpi_errno);
-
     mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, dest, &target);
     if (mpi_errno != MPI_SUCCESS)
         MPIU_ERR_POP(mpi_errno);
@@ -1148,6 +1218,10 @@ int MPIDI_Win_flush(int dest, MPID_Win *win_ptr)
             mpi_errno = wait_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
+
+            /* Mark that we triggered the progress engine
+               in this function call. */
+            progress_engine_triggered = 1;
         }
     } while (!remote_completed);
 
@@ -1158,6 +1232,19 @@ int MPIDI_Win_flush(int dest, MPID_Win *win_ptr)
         target->accumulated_ops_cnt = 0;
     }
 
+    if (!progress_engine_triggered) {
+        /* In some cases (e.g. target is myself, or process on SHM),
+           this function call does not go through the progress engine.
+           Therefore, it is possible that this process never process
+           events coming from other processes. This may cause deadlock in
+           applications where the program execution on this process depends
+           on the happening of events from other processes. Here we poke
+           the progress engine once to avoid such issue.  */
+        mpi_errno = poke_progress_engine();
+        if (mpi_errno != MPI_SUCCESS)
+            MPIU_ERR_POP(mpi_errno);
+    }
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_FLUSH);
     return mpi_errno;
@@ -1178,6 +1265,7 @@ int MPIDI_Win_flush_local(int dest, MPID_Win * win_ptr)
     int local_completed = 0, remote_completed = 0;
     int rank = win_ptr->comm_ptr->rank;
     MPIDI_RMA_Target_t *target = NULL;
+    int progress_engine_triggered = 0;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FLUSH_LOCAL);
 
@@ -1192,17 +1280,6 @@ int MPIDI_Win_flush_local(int dest, MPID_Win * win_ptr)
     if (dest == MPI_PROC_NULL)
         goto finish_flush_local;
 
-    /* When the process tries to acquire the lock on itself, it does not
-       go through the progress engine. Therefore, it is possible that
-       one process always grants the lock to itself but never process
-       events coming from other processes. This may cause deadlock in
-       applications where the program execution on target process depends
-       on the happening of events from other processes. Here we poke
-       the progress engine once to avoid such issue.  */
-    mpi_errno = poke_progress_engine();
-    if (mpi_errno != MPI_SUCCESS)
-        MPIU_ERR_POP(mpi_errno);
-
     mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, dest, &target);
     if (mpi_errno != MPI_SUCCESS)
         MPIU_ERR_POP(mpi_errno);
@@ -1251,6 +1328,10 @@ int MPIDI_Win_flush_local(int dest, MPID_Win * win_ptr)
             mpi_errno = wait_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
+
+            /* Mark that we triggered the progress engine
+               in this function call. */
+            progress_engine_triggered = 1;
         }
     } while ((target->disable_flush_local && !remote_completed) ||
              (!target->disable_flush_local && !local_completed));
@@ -1265,6 +1346,19 @@ int MPIDI_Win_flush_local(int dest, MPID_Win * win_ptr)
         target->accumulated_ops_cnt = 0;
     }
 
+    if (!progress_engine_triggered) {
+        /* In some cases (e.g. target is myself, or process on SHM),
+           this function call does not go through the progress engine.
+           Therefore, it is possible that this process never process
+           events coming from other processes. This may cause deadlock in
+           applications where the program execution on this process depends
+           on the happening of events from other processes. Here we poke
+           the progress engine once to avoid such issue.  */
+        mpi_errno = poke_progress_engine();
+        if (mpi_errno != MPI_SUCCESS)
+            MPIU_ERR_POP(mpi_errno);
+    }
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_FLUSH_LOCAL);
     return mpi_errno;
@@ -1370,6 +1464,7 @@ int MPIDI_Win_unlock_all(MPID_Win * win_ptr)
     int local_completed = 0,remote_completed = 0;
     int rank = win_ptr->comm_ptr->rank;
     MPIDI_RMA_Target_t *curr_target = NULL;
+    int progress_engine_triggered = 0;
     enum MPIDI_RMA_sync_types sync_flag;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_UNLOCK_ALL);
@@ -1477,6 +1572,10 @@ int MPIDI_Win_unlock_all(MPID_Win * win_ptr)
             mpi_errno = wait_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
+
+            /* Mark that we triggered the progress engine
+               in this function call. */
+            progress_engine_triggered = 1;
         }
     } while (!remote_completed);
 
@@ -1498,6 +1597,19 @@ int MPIDI_Win_unlock_all(MPID_Win * win_ptr)
 
     MPIU_Assert(win_ptr->active_req_cnt == 0);
 
+    if (!progress_engine_triggered) {
+        /* In some cases (e.g. target is myself, or process on SHM),
+           this function call does not go through the progress engine.
+           Therefore, it is possible that this process never process
+           events coming from other processes. This may cause deadlock in
+           applications where the program execution on this process depends
+           on the happening of events from other processes. Here we poke
+           the progress engine once to avoid such issue.  */
+        mpi_errno = poke_progress_engine();
+        if (mpi_errno != MPI_SUCCESS)
+            MPIU_ERR_POP(mpi_errno);
+    }
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_UNLOCK_ALL);
     return mpi_errno;
@@ -1517,6 +1629,7 @@ int MPIDI_Win_flush_all(MPID_Win * win_ptr)
     int i, made_progress = 0;
     int local_completed = 0, remote_completed = 0;
     MPIDI_RMA_Target_t *curr_target = NULL;
+    int progress_engine_triggered = 0;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPIDI_STATE_MPIDI_WIN_FLUSH_ALL);
 
@@ -1533,17 +1646,6 @@ int MPIDI_Win_flush_all(MPID_Win * win_ptr)
         OPA_read_write_barrier();
     }
 
-    /* When the process tries to acquire the lock on itself, it does not
-       go through the progress engine. Therefore, it is possible that
-       one process always grants the lock to itself but never process
-       events coming from other processes. This may cause deadlock in
-       applications where the program execution on target process depends
-       on the happening of events from other processes. Here we poke
-       the progress engine once to avoid such issue.  */
-    mpi_errno = poke_progress_engine();
-    if (mpi_errno != MPI_SUCCESS)
-        MPIU_ERR_POP(mpi_errno);
-
     /* Set sync_flag in sync struct. */
     for (i = 0; i < win_ptr->num_slots; i++) {
         curr_target = win_ptr->slots[i].target_list;
@@ -1575,6 +1677,10 @@ int MPIDI_Win_flush_all(MPID_Win * win_ptr)
             mpi_errno = wait_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
+
+            /* Mark that we triggered the progress engine
+               in this function call. */
+            progress_engine_triggered = 1;
         }
     } while (!remote_completed);
 
@@ -1584,6 +1690,19 @@ int MPIDI_Win_flush_all(MPID_Win * win_ptr)
 
     MPIU_Assert(win_ptr->active_req_cnt == 0);
 
+    if (!progress_engine_triggered) {
+        /* In some cases (e.g. target is myself, or process on SHM),
+           this function call does not go through the progress engine.
+           Therefore, it is possible that this process never process
+           events coming from other processes. This may cause deadlock in
+           applications where the program execution on this process depends
+           on the happening of events from other processes. Here we poke
+           the progress engine once to avoid such issue.  */
+        mpi_errno = poke_progress_engine();
+        if (mpi_errno != MPI_SUCCESS)
+            MPIU_ERR_POP(mpi_errno);
+    }
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPIDI_STATE_MPIDI_WIN_FLUSH_ALL);
     return mpi_errno;
@@ -1605,6 +1724,7 @@ int MPIDI_Win_flush_local_all(MPID_Win * win_ptr)
     MPIDI_RMA_Target_t *curr_target = NULL;
     int enable_flush_local_cnt = 0, disable_flush_local_cnt = 0;
     int remote_completed_cnt = 0, local_completed_cnt = 0;
+    int progress_engine_triggered = 0;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FLUSH_LOCAL_ALL);
 
@@ -1616,17 +1736,6 @@ int MPIDI_Win_flush_local_all(MPID_Win * win_ptr)
                         win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_GRANTED,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
 
-    /* When the process tries to acquire the lock on itself, it does not
-       go through the progress engine. Therefore, it is possible that
-       one process always grants the lock to itself but never process
-       events coming from other processes. This may cause deadlock in
-       applications where the program execution on target process depends
-       on the happening of events from other processes. Here we poke
-       the progress engine once to avoid such issue.  */
-    mpi_errno = poke_progress_engine();
-    if (mpi_errno != MPI_SUCCESS)
-        MPIU_ERR_POP(mpi_errno);
-
     /* Set sync_flag in sync struct. */
     for (i = 0; i < win_ptr->num_slots; i++) {
         curr_target = win_ptr->slots[i].target_list;
@@ -1688,6 +1797,10 @@ int MPIDI_Win_flush_local_all(MPID_Win * win_ptr)
             mpi_errno = wait_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
+
+            /* Mark that we triggered the progress engine
+               in this function call. */
+            progress_engine_triggered = 1;
         }
     } while (remote_completed_cnt < disable_flush_local_cnt ||
              local_completed_cnt < enable_flush_local_cnt);
@@ -1705,6 +1818,19 @@ int MPIDI_Win_flush_local_all(MPID_Win * win_ptr)
     /* ENDING synchronization: correctly decrement the following counter. */
     win_ptr->accumulated_ops_cnt = 0;
 
+    if (!progress_engine_triggered) {
+        /* In some cases (e.g. target is myself, or process on SHM),
+           this function call does not go through the progress engine.
+           Therefore, it is possible that this process never process
+           events coming from other processes. This may cause deadlock in
+           applications where the program execution on this process depends
+           on the happening of events from other processes. Here we poke
+           the progress engine once to avoid such issue.  */
+        mpi_errno = poke_progress_engine();
+        if (mpi_errno != MPI_SUCCESS)
+            MPIU_ERR_POP(mpi_errno);
+    }
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_FLUSH_LOCAL_ALL);
     return mpi_errno;

http://git.mpich.org/mpich.git/commitdiff/04d151901aef08d003b6b9b432fcab91007874de

commit 04d151901aef08d003b6b9b432fcab91007874de
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Fri Nov 21 14:13:32 2014 -0600

    Bug-fix: modify free_ops_before_completion function
    
    Originally free_ops_before_completion functions only
    works with active target. Here we modify it to accomodate
    passive target as well.
    
    Also, everytime we trigger free_ops_before_completion,
    we lose the chance to do real Win_flush_local operation
    and must do a Win_flush instead. Here we transfer
    Win_flush_local to Win_flush if disable_flush_local flag
    is set, and unset that flag after the current flush
    is fone.
    
    No reviewer.

diff --git a/src/mpid/ch3/src/ch3u_rma_oplist.c b/src/mpid/ch3/src/ch3u_rma_oplist.c
index d728a60..77de769 100644
--- a/src/mpid/ch3/src/ch3u_rma_oplist.c
+++ b/src/mpid/ch3/src/ch3u_rma_oplist.c
@@ -466,30 +466,47 @@ int MPIDI_CH3I_RMA_Free_ops_before_completion(MPID_Win * win_ptr)
     int i, made_progress = 0;
     int mpi_errno = MPI_SUCCESS;
 
+    /* If we are in an free_ops_before_completion, the window must be holding
+     * up resources.  If it isn't, we are in the wrong window and
+     * incorrectly entered this function. */
     MPIU_ERR_CHKANDJUMP(win_ptr->non_empty_slots == 0, mpi_errno, MPI_ERR_OTHER,
                         "**rmanoop");
 
     /* make nonblocking progress once */
+    mpi_errno = MPIDI_CH3I_RMA_Make_progress_win(win_ptr, &made_progress);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
     if (win_ptr->states.access_state == MPIDI_RMA_FENCE_ISSUED ||
-        win_ptr->states.access_state == MPIDI_RMA_PSCW_ISSUED) {
-        mpi_errno = issue_ops_win(win_ptr, &made_progress);
-        if (mpi_errno != MPI_SUCCESS) {MPIU_ERR_POP(mpi_errno);}
-    }
-    if (win_ptr->states.access_state != MPIDI_RMA_FENCE_GRANTED)
+        win_ptr->states.access_state == MPIDI_RMA_PSCW_ISSUED ||
+        win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED)
         goto fn_exit;
 
     /* find targets that have operations */
     for (i = 0; i < win_ptr->num_slots; i++) {
         if (win_ptr->slots[i].target_list != NULL) {
             curr_target = win_ptr->slots[i].target_list;
-            while (curr_target != NULL && curr_target->read_op_list == NULL
-                   && curr_target->write_op_list == NULL)
+            while (curr_target != NULL) {
+                if (curr_target->read_op_list != NULL ||
+                    curr_target->write_op_list != NULL) {
+                    if (win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
+                        win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED) {
+                        if (curr_target->access_state == MPIDI_RMA_LOCK_GRANTED)
+                            break;
+                    }
+                    else {
+                        break;
+                    }
+                }
                 curr_target = curr_target->next;
+            }
             if (curr_target != NULL) break;
         }
     }
+
     if (curr_target == NULL) goto fn_exit;
 
+    /* After we do this, all following Win_flush_local
+       must do a Win_flush instead. */
     curr_target->disable_flush_local = 1;
 
     if (curr_target->read_op_list != NULL) {
@@ -507,6 +524,9 @@ int MPIDI_CH3I_RMA_Free_ops_before_completion(MPID_Win * win_ptr)
         MPID_Request_release(curr_op->request);
         MPL_LL_DELETE(*op_list, *op_list_tail, curr_op);
         MPIDI_CH3I_Win_op_free(win_ptr, curr_op);
+
+        win_ptr->active_req_cnt--;
+
         if (*op_list == NULL) {
             if (read_flag == 1) {
                 op_list = &curr_target->write_op_list;
diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 123ae39..683c00f 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -1221,8 +1221,17 @@ int MPIDI_Win_flush_local(int dest, MPID_Win * win_ptr)
     }
 
     /* Set sync_flag in sync struct. */
-    if (target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH_LOCAL)
-        target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH_LOCAL;
+    if (target->disable_flush_local) {
+        if (target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH) {
+            target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH;
+            target->sync.have_remote_incomplete_ops = 0;
+            target->sync.outstanding_acks++;
+        }
+    }
+    else {
+        if (target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH_LOCAL)
+            target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH_LOCAL;
+    }
 
     /* Issue out all operations. */
     mpi_errno = MPIDI_CH3I_RMA_Make_progress_target(win_ptr, dest,
@@ -1237,15 +1246,20 @@ int MPIDI_Win_flush_local(int dest, MPID_Win * win_ptr)
                                                       &remote_completed);
         if (mpi_errno != MPI_SUCCESS)
             MPIU_ERR_POP(mpi_errno);
-        if (!local_completed) {
+        if ((target->disable_flush_local && !remote_completed) ||
+            (!target->disable_flush_local && !local_completed)) {
             mpi_errno = wait_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
         }
-    } while (!local_completed);
+    } while ((target->disable_flush_local && !remote_completed) ||
+             (!target->disable_flush_local && !local_completed));
 
  finish_flush_local:
     if (target != NULL) {
+        /* reset disable_flush_local flag in target to 0 */
+        target->disable_flush_local = 0;
+
         /* ENDING synchronization: correctly decrement the following counters. */
         win_ptr->accumulated_ops_cnt -= target->accumulated_ops_cnt;
         target->accumulated_ops_cnt = 0;
@@ -1589,6 +1603,8 @@ int MPIDI_Win_flush_local_all(MPID_Win * win_ptr)
     int i, made_progress = 0;
     int local_completed = 0, remote_completed = 0;
     MPIDI_RMA_Target_t *curr_target = NULL;
+    int enable_flush_local_cnt = 0, disable_flush_local_cnt = 0;
+    int remote_completed_cnt = 0, local_completed_cnt = 0;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FLUSH_LOCAL_ALL);
 
@@ -1615,8 +1631,19 @@ int MPIDI_Win_flush_local_all(MPID_Win * win_ptr)
     for (i = 0; i < win_ptr->num_slots; i++) {
         curr_target = win_ptr->slots[i].target_list;
         while (curr_target != NULL) {
-            if (curr_target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH_LOCAL) {
-                curr_target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH_LOCAL;
+            if (curr_target->disable_flush_local) {
+                if (curr_target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH) {
+                    curr_target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH;
+                    curr_target->sync.have_remote_incomplete_ops = 0;
+                    curr_target->sync.outstanding_acks++;
+                }
+                disable_flush_local_cnt++;
+            }
+            else {
+                if (curr_target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH_LOCAL) {
+                    curr_target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH_LOCAL;
+                }
+                enable_flush_local_cnt++;
             }
 
             /* ENDING synchronization: correctly decrement the following counters. */
@@ -1630,20 +1657,51 @@ int MPIDI_Win_flush_local_all(MPID_Win * win_ptr)
     mpi_errno = MPIDI_CH3I_RMA_Make_progress_win(win_ptr, &made_progress);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-    /* Wait for local completion. */
+    /* wait for remote completion for those targets that disable flush_local,
+     * and wait for local completion for other targets */
     do {
-        mpi_errno = MPIDI_CH3I_RMA_Cleanup_ops_win(win_ptr, &local_completed,
-                                                   &remote_completed);
-        if (mpi_errno != MPI_SUCCESS)
-            MPIU_ERR_POP(mpi_errno);
-        if (!local_completed) {
+        for (i = 0; i < win_ptr->num_slots; i++) {
+            curr_target = win_ptr->slots[i].target_list;
+            while (curr_target != NULL) {
+                mpi_errno = MPIDI_CH3I_RMA_Cleanup_ops_target(win_ptr, curr_target,
+                                                              &local_completed,
+                                                              &remote_completed);
+                if (mpi_errno != MPI_SUCCESS)
+                    MPIU_ERR_POP(mpi_errno);
+
+                if (curr_target->disable_flush_local) {
+                    if (remote_completed) {
+                        remote_completed_cnt++;
+                    }
+                }
+                else {
+                    if (local_completed) {
+                        local_completed_cnt++;
+                    }
+                }
+                curr_target = curr_target->next;
+            }
+        }
+
+        if (remote_completed_cnt < disable_flush_local_cnt ||
+            local_completed_cnt < enable_flush_local_cnt) {
             mpi_errno = wait_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
         }
-    } while (!local_completed);
+    } while (remote_completed_cnt < disable_flush_local_cnt ||
+             local_completed_cnt < enable_flush_local_cnt);
 
   finish_flush_local_all:
+    /* reset disable_flush_local flag in target to 0 */
+    for (i = 0; i < win_ptr->num_slots; i++) {
+        curr_target = win_ptr->slots[i].target_list;
+        while (curr_target != NULL) {
+            curr_target->disable_flush_local = 0;
+            curr_target = curr_target->next;
+        }
+    }
+
     /* ENDING synchronization: correctly decrement the following counter. */
     win_ptr->accumulated_ops_cnt = 0;
 

http://git.mpich.org/mpich.git/commitdiff/097c96285a88f1276ac5f474fd354a1b68421591

commit 097c96285a88f1276ac5f474fd354a1b68421591
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Thu Dec 11 15:24:32 2014 -0600

    Code-refactor: move piggyback work from op routines to issue rountines
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpid_rma_issue.h b/src/mpid/ch3/include/mpid_rma_issue.h
index 485cecd..c64a226 100644
--- a/src/mpid/ch3/include/mpid_rma_issue.h
+++ b/src/mpid/ch3/include/mpid_rma_issue.h
@@ -342,11 +342,23 @@ static int issue_put_op(MPIDI_RMA_Op_t * rma_op, MPID_Win *win_ptr,
     if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         put_pkt->lock_type = target_ptr->lock_type;
 
-    MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
-
     MPID_Datatype_get_size_macro(rma_op->origin_datatype, origin_type_size);
     MPIU_Assign_trunc(len, rma_op->origin_count * origin_type_size, size_t);
 
+    if (!rma_op->is_dt) {
+        /* Fill origin data into packet header IMMED area as much as possible */
+        MPIU_Assign_trunc(put_pkt->immed_len,
+                          MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES/origin_type_size)*origin_type_size),
+                          int);
+        if (put_pkt->immed_len > 0) {
+            void *src = rma_op->origin_addr, *dest = put_pkt->data;
+            mpi_errno = immed_copy(src, dest, (size_t)put_pkt->immed_len);
+            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+        }
+    }
+
+    MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
+
     if (len == (size_t)put_pkt->immed_len) {
         /* All origin data is in packet header, issue the header. */
         MPIU_THREAD_CS_ENTER(CH3COMM, vc);
@@ -400,14 +412,27 @@ static int issue_acc_op(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
     rma_op->request = NULL;
 
     accum_pkt->flags |= flags;
+
     if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         accum_pkt->lock_type = target_ptr->lock_type;
 
-    MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
-
     MPID_Datatype_get_size_macro(rma_op->origin_datatype, origin_type_size);
     MPIU_Assign_trunc(len, rma_op->origin_count * origin_type_size, size_t);
 
+    if (!rma_op->is_dt) {
+        /* Fill origin data into packet header IMMED area as much as possible */
+        MPIU_Assign_trunc(accum_pkt->immed_len,
+                          MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES/origin_type_size)*origin_type_size),
+                          int);
+        if (accum_pkt->immed_len > 0) {
+            void *src = rma_op->origin_addr, *dest = accum_pkt->data;
+            mpi_errno = immed_copy(src, dest, (size_t)accum_pkt->immed_len);
+            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+        }
+    }
+
+    MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
+
     if (len == (size_t)accum_pkt->immed_len) {
         /* All origin data is in packet header, issue the header. */
         MPIU_THREAD_CS_ENTER(CH3COMM, vc);
@@ -485,14 +510,32 @@ static int issue_get_acc_op(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
     get_accum_pkt->request_handle = resp_req->handle;
 
     get_accum_pkt->flags |= flags;
+    if (!rma_op->is_dt) {
+        /* Only fill IMMED data in response packet when both origin and target
+           buffers are basic datatype. */
+        get_accum_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP;
+    }
+
     if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         get_accum_pkt->lock_type = target_ptr->lock_type;
 
-    MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
-
     MPID_Datatype_get_size_macro(rma_op->origin_datatype, origin_type_size);
     MPIU_Assign_trunc(len, rma_op->origin_count * origin_type_size, size_t);
 
+    if (!rma_op->is_dt) {
+        /* Fill origin data into packet header IMMED area as much as possible */
+        MPIU_Assign_trunc(get_accum_pkt->immed_len,
+                          MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES/origin_type_size)*origin_type_size),
+                          int);
+        if (get_accum_pkt->immed_len > 0) {
+            void *src = rma_op->origin_addr, *dest = get_accum_pkt->data;
+            mpi_errno = immed_copy(src, dest, (size_t)get_accum_pkt->immed_len);
+            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+        }
+    }
+
+    MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
+
     if (len == (size_t)get_accum_pkt->immed_len) {
         /* All origin data is in packet header, issue the header. */
         MPIU_THREAD_CS_ENTER(CH3COMM, vc);
@@ -599,7 +642,14 @@ static int issue_get_op(MPIDI_RMA_Op_t * rma_op, MPID_Win * win_ptr,
     }
 
     get_pkt->request_handle = rma_op->request->handle;
+
     get_pkt->flags |= flags;
+    if (!rma_op->is_dt) {
+        /* Only fill IMMED data in response packet when both origin and target
+           buffers are basic datatype. */
+        get_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP;
+    }
+
     if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         get_pkt->lock_type = target_ptr->lock_type;
 
@@ -745,6 +795,8 @@ static int issue_fop_op(MPIDI_RMA_Op_t * rma_op,
     MPID_Comm *comm_ptr = win_ptr->comm_ptr;
     MPIDI_CH3_Pkt_fop_t *fop_pkt = &rma_op->pkt.fop;
     MPID_Request *resp_req = NULL;
+    size_t len;
+    MPI_Aint origin_type_size;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_ISSUE_FOP_OP);
 
@@ -772,6 +824,19 @@ static int issue_fop_op(MPIDI_RMA_Op_t * rma_op,
     if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         fop_pkt->lock_type = target_ptr->lock_type;
 
+    MPID_Datatype_get_size_macro(rma_op->origin_datatype, origin_type_size);
+    MPIU_Assign_trunc(len, rma_op->origin_count * origin_type_size, size_t);
+
+    /* Fill origin data into packet header IMMED area as much as possible */
+    MPIU_Assign_trunc(fop_pkt->immed_len,
+                      MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES/origin_type_size)*origin_type_size),
+                      int);
+    if (fop_pkt->immed_len > 0) {
+        void *src = rma_op->origin_addr, *dest = fop_pkt->data;
+        mpi_errno = immed_copy(src, dest, (size_t)fop_pkt->immed_len);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    }
+
     MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
 
     /* All origin data is in packet header, issue the header. */
diff --git a/src/mpid/ch3/src/ch3u_rma_ops.c b/src/mpid/ch3/src/ch3u_rma_ops.c
index 8230b3e..f05d110 100644
--- a/src/mpid/ch3/src/ch3u_rma_ops.c
+++ b/src/mpid/ch3/src/ch3u_rma_ops.c
@@ -174,27 +174,12 @@ int MPIDI_CH3I_Put(const void *origin_addr, int origin_count, MPI_Datatype
             new_ptr->is_dt = 1;
         }
 
-        /* If both origin and target are basic datatype, try to
-           copy origin data to packet header as much as possible. */
+        /* Judge if this operation is an piggyback candidate */
         if (!new_ptr->is_dt) {
             size_t len;
             MPI_Aint origin_type_size;
-
             MPID_Datatype_get_size_macro(new_ptr->origin_datatype, origin_type_size);
-            /* length of origin data */
             MPIU_Assign_trunc(len, new_ptr->origin_count * origin_type_size, size_t);
-            /* length of origin data that can fit into immed area in pkt header */
-            MPIU_Assign_trunc(put_pkt->immed_len,
-                              MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / origin_type_size) * origin_type_size),
-                              int);
-
-            if (put_pkt->immed_len > 0) {
-                void *src = new_ptr->origin_addr, *dest = put_pkt->data;
-                /* copy data from origin buffer to immed area in packet header */
-                mpi_errno = immed_copy(src, dest, (size_t)put_pkt->immed_len);
-                if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-            }
-
             if (len <= MPIR_MAX(MPIDI_RMA_IMMED_BYTES,
                                 MPIR_CVAR_CH3_RMA_OP_PIGGYBACK_LOCK_DATA_SIZE))
                 new_ptr->piggyback_lock_candidate = 1;
@@ -356,12 +341,9 @@ int MPIDI_CH3I_Get(void *origin_addr, int origin_count, MPI_Datatype
             new_ptr->is_dt = 1;
         }
 
+        /* Judge if this operation is an piggyback candidate. */
         if (!new_ptr->is_dt) {
             new_ptr->piggyback_lock_candidate = 1;
-
-            /* Only fill IMMED data in response packet when both origin and target
-               buffers are basic datatype. */
-            get_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP;
         }
 
         MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
@@ -524,27 +506,12 @@ int MPIDI_CH3I_Accumulate(const void *origin_addr, int origin_count, MPI_Datatyp
             new_ptr->is_dt = 1;
         }
 
-        /* If both origin and target are basic datatype, try to
-           copy origin data to packet header as much as possible. */
+        /* Judge if this operation is an piggyback candidate. */
         if (!new_ptr->is_dt) {
             size_t len;
             MPI_Aint origin_type_size;
-
             MPID_Datatype_get_size_macro(new_ptr->origin_datatype, origin_type_size);
-            /* length of origin data */
             MPIU_Assign_trunc(len, new_ptr->origin_count * origin_type_size, size_t);
-            /* length of origin data that can fit into immed areas in packet header */
-            MPIU_Assign_trunc(accum_pkt->immed_len,
-                              MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / origin_type_size) * origin_type_size),
-                              int);
-
-            if (accum_pkt->immed_len > 0) {
-                void *src = new_ptr->origin_addr, *dest = accum_pkt->data;
-                /* copy data from origin buffer to immed area in packet header */
-                mpi_errno = immed_copy(src, dest, (size_t)accum_pkt->immed_len);
-                if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-            }
-
             if (len <= MPIR_MAX(MPIDI_RMA_IMMED_BYTES,
                                 MPIR_CVAR_CH3_RMA_OP_PIGGYBACK_LOCK_DATA_SIZE))
                 new_ptr->piggyback_lock_candidate = 1;
@@ -697,12 +664,9 @@ int MPIDI_CH3I_Get_accumulate(const void *origin_addr, int origin_count,
                 new_ptr->is_dt = 1;
             }
 
+            /* Judge if this operation is a piggyback candidate */
             if (!new_ptr->is_dt) {
                 new_ptr->piggyback_lock_candidate = 1;
-
-                /* Only fill IMMED data in response packet when both origin and target
-                   buffers are basic datatype. */
-                get_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP;
             }
         }
 
@@ -747,34 +711,15 @@ int MPIDI_CH3I_Get_accumulate(const void *origin_addr, int origin_count,
                 new_ptr->is_dt = 1;
             }
 
-            /* If all buffers are basic datatype, try to copy origin data to
-               packet header as much as possible. */
+            /* Judge if this operation is a piggyback candidate */
             if (!new_ptr->is_dt) {
                 size_t len;
                 MPI_Aint origin_type_size;
-
                 MPID_Datatype_get_size_macro(new_ptr->origin_datatype, origin_type_size);
-                /* length of origin data */
                 MPIU_Assign_trunc(len, new_ptr->origin_count * origin_type_size, size_t);
-                /* length of origin data that can fit into immed area in packet header */
-                MPIU_Assign_trunc(get_accum_pkt->immed_len,
-                                  MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / origin_type_size) * origin_type_size),
-                                  int);
-
-                if (get_accum_pkt->immed_len > 0) {
-                    void *src = new_ptr->origin_addr, *dest = get_accum_pkt->data;
-                    /* copy data from origin buffer to immed area in packet header */
-                    mpi_errno = immed_copy(src, dest, (size_t)get_accum_pkt->immed_len);
-                    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-                }
-
                 if (len <= MPIR_MAX(MPIDI_RMA_IMMED_BYTES,
                                     MPIR_CVAR_CH3_RMA_OP_PIGGYBACK_LOCK_DATA_SIZE))
                     new_ptr->piggyback_lock_candidate = 1;
-
-                /* Only fill IMMED data in response packet when both origin and target
-                   buffers are basic datatype. */
-                get_accum_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP;
             }
         }
 
@@ -1140,7 +1085,6 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
             get_pkt->source_win_handle = win_ptr->handle;
             get_pkt->origin_rank = rank;
             get_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
-            get_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP;
 
             new_ptr->origin_addr = result_addr;
             new_ptr->origin_count = 1;
@@ -1150,8 +1094,6 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
         }
         else {
             MPIDI_CH3_Pkt_fop_t *fop_pkt = &(new_ptr->pkt.fop);
-            size_t len;
-            MPI_Aint origin_type_size;
 
             MPIDI_Pkt_init(fop_pkt, MPIDI_CH3_PKT_FOP);
             fop_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
@@ -1171,21 +1113,6 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
             new_ptr->result_datatype = datatype;
             new_ptr->target_rank = target_rank;
             new_ptr->piggyback_lock_candidate = 1;
-
-            MPID_Datatype_get_size_macro(new_ptr->origin_datatype, origin_type_size);
-            /* length of origin data */
-            MPIU_Assign_trunc(len, new_ptr->origin_count * origin_type_size, size_t);
-            /* length of origin data that can fit into immed area in pkt header */
-            MPIU_Assign_trunc(fop_pkt->immed_len,
-                              MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / origin_type_size) * origin_type_size),
-                              int);
-
-            if (fop_pkt->immed_len > 0) {
-                void *src = new_ptr->origin_addr, *dest = fop_pkt->data;
-                /* copy data from origin buffer to immed area in packet header */
-                mpi_errno = immed_copy(src, dest, (size_t)fop_pkt->immed_len);
-                if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-            }
         }
 
         new_ptr->ureq = NULL; /* reset user request */

http://git.mpich.org/mpich.git/commitdiff/3a05784f1cb7b9e3ccb1d7d65c73671d886ad41d

commit 3a05784f1cb7b9e3ccb1d7d65c73671d886ad41d
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Mon Nov 17 15:24:19 2014 -0600

    Use int instead of size_t in RMA pkt header.
    
    Use int instead of size_t in RMA pkt header to reduce
    packet size.
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpid_rma_issue.h b/src/mpid/ch3/include/mpid_rma_issue.h
index 08bad83..485cecd 100644
--- a/src/mpid/ch3/include/mpid_rma_issue.h
+++ b/src/mpid/ch3/include/mpid_rma_issue.h
@@ -347,7 +347,7 @@ static int issue_put_op(MPIDI_RMA_Op_t * rma_op, MPID_Win *win_ptr,
     MPID_Datatype_get_size_macro(rma_op->origin_datatype, origin_type_size);
     MPIU_Assign_trunc(len, rma_op->origin_count * origin_type_size, size_t);
 
-    if (len == put_pkt->immed_len) {
+    if (len == (size_t)put_pkt->immed_len) {
         /* All origin data is in packet header, issue the header. */
         MPIU_THREAD_CS_ENTER(CH3COMM, vc);
         mpi_errno = MPIDI_CH3_iStartMsg(vc, put_pkt, sizeof(*put_pkt), &(rma_op->request));
@@ -408,7 +408,7 @@ static int issue_acc_op(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
     MPID_Datatype_get_size_macro(rma_op->origin_datatype, origin_type_size);
     MPIU_Assign_trunc(len, rma_op->origin_count * origin_type_size, size_t);
 
-    if (len == accum_pkt->immed_len) {
+    if (len == (size_t)accum_pkt->immed_len) {
         /* All origin data is in packet header, issue the header. */
         MPIU_THREAD_CS_ENTER(CH3COMM, vc);
         mpi_errno = MPIDI_CH3_iStartMsg(vc, accum_pkt, sizeof(*accum_pkt), &(rma_op->request));
@@ -493,7 +493,7 @@ static int issue_get_acc_op(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
     MPID_Datatype_get_size_macro(rma_op->origin_datatype, origin_type_size);
     MPIU_Assign_trunc(len, rma_op->origin_count * origin_type_size, size_t);
 
-    if (len == get_accum_pkt->immed_len) {
+    if (len == (size_t)get_accum_pkt->immed_len) {
         /* All origin data is in packet header, issue the header. */
         MPIU_THREAD_CS_ENTER(CH3COMM, vc);
         mpi_errno = MPIDI_CH3_iStartMsg(vc, get_accum_pkt, sizeof(*get_accum_pkt), &(rma_op->request));
diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index 301fc9b..2dc8114 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -444,7 +444,7 @@ typedef struct MPIDI_CH3_Pkt_put {
     int lock_type;
     int origin_rank;
     /* Followings are to piggyback IMMED data */
-    size_t immed_len;
+    int immed_len;
     char data[MPIDI_RMA_IMMED_BYTES];
 } MPIDI_CH3_Pkt_put_t;
 
@@ -481,7 +481,7 @@ typedef struct MPIDI_CH3_Pkt_accum {
     int lock_type;
     int origin_rank;
     /* Followings are to piggyback IMMED data */
-    size_t immed_len;
+    int immed_len;
     char data[MPIDI_RMA_IMMED_BYTES];
 } MPIDI_CH3_Pkt_accum_t;
 
@@ -503,7 +503,7 @@ typedef struct MPIDI_CH3_Pkt_get_accum {
     int lock_type;
     int origin_rank;
     /* Followings are to piggback IMMED data */
-    size_t immed_len;
+    int immed_len;
     char data[MPIDI_RMA_IMMED_BYTES];
 } MPIDI_CH3_Pkt_get_accum_t;
 
@@ -561,7 +561,7 @@ typedef struct MPIDI_CH3_Pkt_get_resp {
     MPI_Win source_win_handle;
     int target_rank;
     /* Followings are to piggyback IMMED data */
-    size_t immed_len;
+    int immed_len;
     char data[MPIDI_RMA_IMMED_BYTES];
 } MPIDI_CH3_Pkt_get_resp_t;
 
@@ -575,7 +575,7 @@ typedef struct MPIDI_CH3_Pkt_get_accum_resp {
     MPI_Win source_win_handle;
     int target_rank;
     /* Followings are to piggyback IMMED data */
-    size_t immed_len;
+    int immed_len;
     char data[MPIDI_RMA_IMMED_BYTES];
 } MPIDI_CH3_Pkt_get_accum_resp_t;
 
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index f132d96..cf4dfcd 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -255,7 +255,7 @@ static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
         MPID_Request *req = NULL;
         MPI_Datatype target_dtp;
         int target_count;
-        size_t immed_len = 0;
+        int immed_len = 0;
         void *immed_data = NULL;
         int complete = 0;
         MPIDI_msg_sz_t data_len;
@@ -302,7 +302,7 @@ static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
 
         if (immed_len > 0) {
             /* see if we can receive some data from packet header */
-            MPIU_Memcpy(req->dev.user_buf, immed_data, immed_len);
+            MPIU_Memcpy(req->dev.user_buf, immed_data, (size_t)immed_len);
             req->dev.user_buf = (void*)((char*)req->dev.user_buf + immed_len);
             req->dev.recv_data_sz -= immed_len;
         }
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index fe77f0b..e4a03de 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -287,18 +287,18 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete( MPIDI_VC_t *vc,
         /* Try to copy target data into packet header. */
         MPIU_Assign_trunc(get_accum_resp_pkt->immed_len,
                           MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / type_size) * type_size),
-                          size_t);
+                          int);
 
         if (get_accum_resp_pkt->immed_len > 0) {
             void *src = resp_req->dev.user_buf;
             void *dest = (void*) get_accum_resp_pkt->data;
             /* copy data from origin buffer to immed area in packet header */
-            mpi_errno = immed_copy(src, dest, get_accum_resp_pkt->immed_len);
+            mpi_errno = immed_copy(src, dest, (size_t)get_accum_resp_pkt->immed_len);
             if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
         }
     }
 
-    if (len == get_accum_resp_pkt->immed_len) {
+    if (len == (size_t)get_accum_resp_pkt->immed_len) {
         /* All origin data is in packet header, issue the header. */
         iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_accum_resp_pkt;
         iov[0].MPID_IOV_LEN = sizeof(*get_accum_resp_pkt);
@@ -914,18 +914,18 @@ static inline int perform_get_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_qu
         /* Try to copy target data into packet header. */
         MPIU_Assign_trunc(get_resp_pkt->immed_len,
                           MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / type_size) * type_size),
-                          size_t);
+                          int);
 
         if (get_resp_pkt->immed_len > 0) {
             void *src = get_pkt->addr;
             void *dest = (void*) get_resp_pkt->data;
             /* copy data from origin buffer to immed area in packet header */
-            mpi_errno = immed_copy(src, dest, get_resp_pkt->immed_len);
+            mpi_errno = immed_copy(src, dest, (size_t)get_resp_pkt->immed_len);
             if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
         }
     }
 
-    if (len == get_resp_pkt->immed_len) {
+    if (len == (size_t)get_resp_pkt->immed_len) {
         /* All origin data is in packet header, issue the header. */
         iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_resp_pkt;
         iov[0].MPID_IOV_LEN = sizeof(*get_resp_pkt);
@@ -1066,18 +1066,18 @@ static inline int perform_get_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_loc
         /* Try to copy target data into packet header. */
         MPIU_Assign_trunc(get_accum_resp_pkt->immed_len,
                           MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / type_size) * type_size),
-                          size_t);
+                          int);
 
         if (get_accum_resp_pkt->immed_len > 0) {
             void *src = sreq->dev.user_buf;
             void *dest = (void*) get_accum_resp_pkt->data;
             /* copy data from origin buffer to immed area in packet header */
-            mpi_errno = immed_copy(src, dest, get_accum_resp_pkt->immed_len);
+            mpi_errno = immed_copy(src, dest, (size_t)get_accum_resp_pkt->immed_len);
             if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
         }
     }
 
-    if (len == get_accum_resp_pkt->immed_len) {
+    if (len == (size_t)get_accum_resp_pkt->immed_len) {
         /* All origin data is in packet header, issue the header. */
         iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_accum_resp_pkt;
         iov[0].MPID_IOV_LEN = sizeof(*get_accum_resp_pkt);
@@ -1156,7 +1156,7 @@ static inline int perform_fop_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_qu
 
     /* copy data to resp pkt header */
     void *src = fop_pkt->addr, *dest = fop_resp_pkt->data;
-    mpi_errno = immed_copy(src, dest, fop_resp_pkt->immed_len);
+    mpi_errno = immed_copy(src, dest, (size_t)fop_resp_pkt->immed_len);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
     /* Apply the op */
diff --git a/src/mpid/ch3/src/ch3u_rma_ops.c b/src/mpid/ch3/src/ch3u_rma_ops.c
index 35c6c12..8230b3e 100644
--- a/src/mpid/ch3/src/ch3u_rma_ops.c
+++ b/src/mpid/ch3/src/ch3u_rma_ops.c
@@ -186,12 +186,12 @@ int MPIDI_CH3I_Put(const void *origin_addr, int origin_count, MPI_Datatype
             /* length of origin data that can fit into immed area in pkt header */
             MPIU_Assign_trunc(put_pkt->immed_len,
                               MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / origin_type_size) * origin_type_size),
-                              size_t);
+                              int);
 
             if (put_pkt->immed_len > 0) {
                 void *src = new_ptr->origin_addr, *dest = put_pkt->data;
                 /* copy data from origin buffer to immed area in packet header */
-                mpi_errno = immed_copy(src, dest, put_pkt->immed_len);
+                mpi_errno = immed_copy(src, dest, (size_t)put_pkt->immed_len);
                 if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
             }
 
@@ -536,12 +536,12 @@ int MPIDI_CH3I_Accumulate(const void *origin_addr, int origin_count, MPI_Datatyp
             /* length of origin data that can fit into immed areas in packet header */
             MPIU_Assign_trunc(accum_pkt->immed_len,
                               MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / origin_type_size) * origin_type_size),
-                              size_t);
+                              int);
 
             if (accum_pkt->immed_len > 0) {
                 void *src = new_ptr->origin_addr, *dest = accum_pkt->data;
                 /* copy data from origin buffer to immed area in packet header */
-                mpi_errno = immed_copy(src, dest, accum_pkt->immed_len);
+                mpi_errno = immed_copy(src, dest, (size_t)accum_pkt->immed_len);
                 if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
             }
 
@@ -759,12 +759,12 @@ int MPIDI_CH3I_Get_accumulate(const void *origin_addr, int origin_count,
                 /* length of origin data that can fit into immed area in packet header */
                 MPIU_Assign_trunc(get_accum_pkt->immed_len,
                                   MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / origin_type_size) * origin_type_size),
-                                  size_t);
+                                  int);
 
                 if (get_accum_pkt->immed_len > 0) {
                     void *src = new_ptr->origin_addr, *dest = get_accum_pkt->data;
                     /* copy data from origin buffer to immed area in packet header */
-                    mpi_errno = immed_copy(src, dest, get_accum_pkt->immed_len);
+                    mpi_errno = immed_copy(src, dest, (size_t)get_accum_pkt->immed_len);
                     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
                 }
 
@@ -1178,12 +1178,12 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
             /* length of origin data that can fit into immed area in pkt header */
             MPIU_Assign_trunc(fop_pkt->immed_len,
                               MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / origin_type_size) * origin_type_size),
-                              size_t);
+                              int);
 
             if (fop_pkt->immed_len > 0) {
                 void *src = new_ptr->origin_addr, *dest = fop_pkt->data;
                 /* copy data from origin buffer to immed area in packet header */
-                mpi_errno = immed_copy(src, dest, fop_pkt->immed_len);
+                mpi_errno = immed_copy(src, dest, (size_t)fop_pkt->immed_len);
                 if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
             }
         }
diff --git a/src/mpid/ch3/src/ch3u_rma_pkthandler.c b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
index 1430557..df9ea36 100644
--- a/src/mpid/ch3/src/ch3u_rma_pkthandler.c
+++ b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
@@ -237,7 +237,7 @@ int MPIDI_CH3_PktHandler_Put(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
         if (put_pkt->immed_len > 0) {
             /* See if we can receive some data from packet header. */
-            MPIU_Memcpy(req->dev.user_buf, put_pkt->data, put_pkt->immed_len);
+            MPIU_Memcpy(req->dev.user_buf, put_pkt->data, (size_t)put_pkt->immed_len);
             req->dev.user_buf = (void*)((char*)req->dev.user_buf + put_pkt->immed_len);
             req->dev.recv_data_sz -= put_pkt->immed_len;
         }
@@ -426,18 +426,18 @@ int MPIDI_CH3_PktHandler_Get(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
             /* Try to copy target data into packet header. */
             MPIU_Assign_trunc(get_resp_pkt->immed_len,
                               MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / type_size) * type_size),
-                              size_t);
+                              int);
 
             if (get_resp_pkt->immed_len > 0) {
                 void *src = get_pkt->addr;
                 void *dest = (void*) get_resp_pkt->data;
                 /* copy data from origin buffer to immed area in packet header */
-                mpi_errno = immed_copy(src, dest, get_resp_pkt->immed_len);
+                mpi_errno = immed_copy(src, dest, (size_t)get_resp_pkt->immed_len);
                 if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
             }
         }
 
-        if (len == get_resp_pkt->immed_len) {
+        if (len == (size_t)get_resp_pkt->immed_len) {
             /* All origin data is in packet header, issue the header. */
             iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_resp_pkt;
             iov[0].MPID_IOV_LEN = sizeof(*get_resp_pkt);
@@ -608,7 +608,7 @@ int MPIDI_CH3_PktHandler_Accumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
         if (accum_pkt->immed_len > 0) {
             /* See if we can receive some data from packet header. */
-            MPIU_Memcpy(req->dev.user_buf, accum_pkt->data, accum_pkt->immed_len);
+            MPIU_Memcpy(req->dev.user_buf, accum_pkt->data, (size_t)accum_pkt->immed_len);
             req->dev.user_buf = (void*)((char*)req->dev.user_buf + accum_pkt->immed_len);
             req->dev.recv_data_sz -= accum_pkt->immed_len;
         }
@@ -783,7 +783,7 @@ int MPIDI_CH3_PktHandler_GetAccumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
         if (get_accum_pkt->immed_len > 0) {
             /* See if we can receive some data from packet header. */
-            MPIU_Memcpy(req->dev.user_buf, get_accum_pkt->data, get_accum_pkt->immed_len);
+            MPIU_Memcpy(req->dev.user_buf, get_accum_pkt->data, (size_t)get_accum_pkt->immed_len);
             req->dev.user_buf = (void*)((char*)req->dev.user_buf + get_accum_pkt->immed_len);
             req->dev.recv_data_sz -= get_accum_pkt->immed_len;
         }
@@ -1102,7 +1102,7 @@ int MPIDI_CH3_PktHandler_FOP(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
     /* copy data to resp pkt header */
     void *src = fop_pkt->addr, *dest = fop_resp_pkt->data;
-    mpi_errno = immed_copy(src, dest, fop_resp_pkt->immed_len);
+    mpi_errno = immed_copy(src, dest, (size_t)fop_resp_pkt->immed_len);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
     /* Apply the op */
@@ -1182,7 +1182,7 @@ int MPIDI_CH3_PktHandler_FOPResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 
     /* Copy data to result buffer on orgin */
     MPID_Request_get_ptr(fop_resp_pkt->request_handle, req);
-    MPIU_Memcpy(req->dev.user_buf, fop_resp_pkt->data, fop_resp_pkt->immed_len);
+    MPIU_Memcpy(req->dev.user_buf, fop_resp_pkt->data, (size_t)fop_resp_pkt->immed_len);
 
     /* decrement ack_counter */
     if (fop_resp_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
@@ -1263,7 +1263,7 @@ int MPIDI_CH3_PktHandler_Get_AccumResp(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
     if (get_accum_resp_pkt->immed_len > 0) {
         /* first copy IMMED data from pkt header to origin buffer */
-        MPIU_Memcpy(req->dev.user_buf, get_accum_resp_pkt->data, get_accum_resp_pkt->immed_len);
+        MPIU_Memcpy(req->dev.user_buf, get_accum_resp_pkt->data, (size_t)get_accum_resp_pkt->immed_len);
         req->dev.user_buf = (void*)((char*)req->dev.user_buf + get_accum_resp_pkt->immed_len);
         req->dev.recv_data_sz -= get_accum_resp_pkt->immed_len;
         if (req->dev.recv_data_sz == 0)
@@ -1398,7 +1398,7 @@ int MPIDI_CH3_PktHandler_GetResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 
     if (get_resp_pkt->immed_len > 0) {
         /* first copy IMMED data from pkt header to origin buffer */
-        MPIU_Memcpy(req->dev.user_buf, get_resp_pkt->data, get_resp_pkt->immed_len);
+        MPIU_Memcpy(req->dev.user_buf, get_resp_pkt->data, (size_t)get_resp_pkt->immed_len);
         req->dev.user_buf = (void*)((char*)req->dev.user_buf + get_resp_pkt->immed_len);
         req->dev.recv_data_sz -= get_resp_pkt->immed_len;
         if (req->dev.recv_data_sz == 0)

http://git.mpich.org/mpich.git/commitdiff/cc158ff2bd0b80bea572493e41565b9a657c2726

commit cc158ff2bd0b80bea572493e41565b9a657c2726
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Dec 3 12:31:42 2014 -0800

    Bug-fix: set put_acc_issued flag correctly
    
    No reviewer.

diff --git a/src/mpid/ch3/src/ch3u_rma_oplist.c b/src/mpid/ch3/src/ch3u_rma_oplist.c
index 4f5c575..d728a60 100644
--- a/src/mpid/ch3/src/ch3u_rma_oplist.c
+++ b/src/mpid/ch3/src/ch3u_rma_oplist.c
@@ -323,6 +323,12 @@ static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t *targe
         if (mpi_errno != MPI_SUCCESS)
             MPIU_ERR_POP(mpi_errno);
 
+        if (curr_op->pkt.type == MPIDI_CH3_PKT_PUT ||
+            curr_op->pkt.type == MPIDI_CH3_PKT_ACCUMULATE) {
+            target->put_acc_issued = 1; /* set PUT_ACC_FLAG when sending
+                                           PUT/ACC operation. */
+        }
+
         if (!curr_op->request) {
             if (curr_op->ureq) {
                 /* Complete user request and release the ch3 ref */
@@ -346,8 +352,6 @@ static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t *targe
                      curr_op->pkt.type == MPIDI_CH3_PKT_ACCUMULATE) {
                 MPIDI_CH3I_RMA_Ops_append(&(target->write_op_list),
                                           &(target->write_op_list_tail), curr_op);
-                target->put_acc_issued = 1; /* set PUT_ACC_FLAG when sending
-                                               PUT/ACC operation. */
             }
             else {
                 MPIDI_CH3I_RMA_Ops_append(&(target->read_op_list),

http://git.mpich.org/mpich.git/commitdiff/2493e98b996d7a1ba9a5e5434666cbea1d89779d

commit 2493e98b996d7a1ba9a5e5434666cbea1d89779d
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Thu Nov 20 09:57:30 2014 -0600

    Perf-optimize: avoid FLUSH/FLUSH_ACK messages if no PUT/ACC.
    
    No reviewer.

diff --git a/src/mpid/ch3/src/ch3u_rma_oplist.c b/src/mpid/ch3/src/ch3u_rma_oplist.c
index 69e98d5..4f5c575 100644
--- a/src/mpid/ch3/src/ch3u_rma_oplist.c
+++ b/src/mpid/ch3/src/ch3u_rma_oplist.c
@@ -233,13 +233,10 @@ static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t *targe
                 target->sync.outstanding_acks--;
                 MPIU_Assert(target->sync.outstanding_acks == 0);
             }
-            else if (target->read_op_list == NULL &&
-                     target->write_op_list == NULL &&
-                     target->dt_op_list == NULL &&
-                     target->put_acc_issued == 0) {
-                /* both pending list and all waiting lists for
-                   this target are empty, we do not need to send
-                   FLUSH message then. */
+            else if (target->put_acc_issued == 0) {
+                /* We did not issue PUT/ACC since the last
+                   synchronization call, therefore we do
+                   not need to issue FLUSH here. */
                 target->sync.outstanding_acks--;
                 MPIU_Assert(target->sync.outstanding_acks >= 0);
             }
@@ -258,11 +255,10 @@ static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t *targe
                 target->sync.outstanding_acks--;
                 MPIU_Assert(target->sync.outstanding_acks == 0);
             }
-            else if (target->read_op_list == NULL &&
-                     target->write_op_list == NULL &&
-                     target->dt_op_list == NULL &&
-                     target->put_acc_issued == 0) {
-                /* send message to unlock target, but don't need ACK */
+            else if (target->put_acc_issued == 0) {
+                /* We did not issue PUT/ACC since the last
+                   synchronization call, therefore here we
+                   don't need ACK back */
                 mpi_errno = send_unlock_msg(target->target_rank, win_ptr, MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_NO_ACK);
                 if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 

http://git.mpich.org/mpich.git/commitdiff/87acbbbe5201f207d3d210c3340741748243e993

commit 87acbbbe5201f207d3d210c3340741748243e993
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Nov 19 16:40:42 2014 -0600

    Bug-fix: add IMMED area in GET/GACC response packets
    
    In this patch we allow GET/GACC response packets to
    piggyback some IMMED data, just like what we did
    for PUT/GACC/FOP/CAS packets.
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index ac64d41..301fc9b 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -119,7 +119,8 @@ typedef enum {
     MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK = 256,
     MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK = 512,
     MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED = 1024,
-    MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_NO_ACK = 2048
+    MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_NO_ACK = 2048,
+    MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP = 4096
 } MPIDI_CH3_Pkt_flags_t;
 
 typedef struct MPIDI_CH3_Pkt_send {
@@ -559,6 +560,9 @@ typedef struct MPIDI_CH3_Pkt_get_resp {
     /* Followings are used to decrement ack_counter at origin */
     MPI_Win source_win_handle;
     int target_rank;
+    /* Followings are to piggyback IMMED data */
+    size_t immed_len;
+    char data[MPIDI_RMA_IMMED_BYTES];
 } MPIDI_CH3_Pkt_get_resp_t;
 
 typedef struct MPIDI_CH3_Pkt_get_accum_resp {
@@ -570,6 +574,9 @@ typedef struct MPIDI_CH3_Pkt_get_accum_resp {
     /* Followings are used to decrement ack_counter at origin */
     MPI_Win source_win_handle;
     int target_rank;
+    /* Followings are to piggyback IMMED data */
+    size_t immed_len;
+    char data[MPIDI_RMA_IMMED_BYTES];
 } MPIDI_CH3_Pkt_get_accum_resp_t;
 
 typedef struct MPIDI_CH3_Pkt_fop_resp {
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index 150e1c7..fe77f0b 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -224,6 +224,8 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete( MPIDI_VC_t *vc,
     MPID_Request *resp_req;
     MPID_IOV iov[MPID_IOV_LIMIT];
     MPI_Aint true_lb, true_extent;
+    size_t len;
+    int iovcnt;
     MPIU_CHKPMEM_DECL(1);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMRECVCOMPLETE);
 
@@ -242,6 +244,7 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete( MPIDI_VC_t *vc,
         get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
     if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
         get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
+    get_accum_resp_pkt->immed_len = 0;
 
     MPID_Datatype_get_size_macro(rreq->dev.datatype, type_size);
 
@@ -275,14 +278,42 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete( MPIDI_VC_t *vc,
        operation are completed when counter reaches zero. */
     win_ptr->at_completion_counter++;
 
-    iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_accum_resp_pkt;
-    iov[0].MPID_IOV_LEN = sizeof(*get_accum_resp_pkt);
+    /* length of target data */
+    MPIU_Assign_trunc(len, rreq->dev.user_count * type_size, size_t);
+
+    /* both origin buffer and target buffer are basic datatype,
+       fill IMMED data area in response packet header. */
+    if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP) {
+        /* Try to copy target data into packet header. */
+        MPIU_Assign_trunc(get_accum_resp_pkt->immed_len,
+                          MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / type_size) * type_size),
+                          size_t);
+
+        if (get_accum_resp_pkt->immed_len > 0) {
+            void *src = resp_req->dev.user_buf;
+            void *dest = (void*) get_accum_resp_pkt->data;
+            /* copy data from origin buffer to immed area in packet header */
+            mpi_errno = immed_copy(src, dest, get_accum_resp_pkt->immed_len);
+            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+        }
+    }
 
-    iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)resp_req->dev.user_buf;
-    iov[1].MPID_IOV_LEN = type_size*rreq->dev.user_count;
+    if (len == get_accum_resp_pkt->immed_len) {
+        /* All origin data is in packet header, issue the header. */
+        iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_accum_resp_pkt;
+        iov[0].MPID_IOV_LEN = sizeof(*get_accum_resp_pkt);
+        iovcnt = 1;
+    }
+    else {
+        iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_accum_resp_pkt;
+        iov[0].MPID_IOV_LEN = sizeof(*get_accum_resp_pkt);
+        iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) ((char *)resp_req->dev.user_buf + get_accum_resp_pkt->immed_len);
+        iov[1].MPID_IOV_LEN = rreq->dev.user_count * type_size - get_accum_resp_pkt->immed_len;
+        iovcnt = 2;
+    }
 
     MPIU_THREAD_CS_ENTER(CH3COMM,vc);
-    mpi_errno = MPIDI_CH3_iSendv(vc, resp_req, iov, 2);
+    mpi_errno = MPIDI_CH3_iSendv(vc, resp_req, iov, iovcnt);
     MPIU_THREAD_CS_EXIT(CH3COMM,vc);
 
     MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
@@ -569,6 +600,7 @@ int MPIDI_CH3_ReqHandler_GetDerivedDTRecvComplete( MPIDI_VC_t *vc,
         get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
     if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
         get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
+    get_resp_pkt->immed_len = 0;
     
     sreq->dev.segment_ptr = MPID_Segment_alloc( );
     MPIU_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
@@ -836,6 +868,8 @@ static inline int perform_get_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_qu
     MPID_Request *sreq = NULL;
     MPIDI_VC_t *vc = NULL;
     MPI_Aint type_size;
+    size_t len;
+    int iovcnt;
     MPID_IOV iov[MPID_IOV_LIMIT];
     int mpi_errno = MPI_SUCCESS;
 
@@ -868,18 +902,47 @@ static inline int perform_get_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_qu
         get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
     get_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
     get_resp_pkt->source_win_handle = get_pkt->source_win_handle;
+    get_resp_pkt->immed_len = 0;
 
-    iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_resp_pkt;
-    iov[0].MPID_IOV_LEN = sizeof(*get_resp_pkt);
-
-    iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)get_pkt->addr;
+    /* length of target data */
     MPID_Datatype_get_size_macro(get_pkt->datatype, type_size);
-    iov[1].MPID_IOV_LEN = get_pkt->count * type_size;
+    MPIU_Assign_trunc(len, get_pkt->count * type_size, size_t);
+
+    /* both origin buffer and target buffer are basic datatype,
+       fill IMMED data area in response packet header. */
+    if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP) {
+        /* Try to copy target data into packet header. */
+        MPIU_Assign_trunc(get_resp_pkt->immed_len,
+                          MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / type_size) * type_size),
+                          size_t);
+
+        if (get_resp_pkt->immed_len > 0) {
+            void *src = get_pkt->addr;
+            void *dest = (void*) get_resp_pkt->data;
+            /* copy data from origin buffer to immed area in packet header */
+            mpi_errno = immed_copy(src, dest, get_resp_pkt->immed_len);
+            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+        }
+    }
+
+    if (len == get_resp_pkt->immed_len) {
+        /* All origin data is in packet header, issue the header. */
+        iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_resp_pkt;
+        iov[0].MPID_IOV_LEN = sizeof(*get_resp_pkt);
+        iovcnt = 1;
+    }
+    else {
+        iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_resp_pkt;
+        iov[0].MPID_IOV_LEN = sizeof(*get_resp_pkt);
+        iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) ((char *)get_pkt->addr + get_resp_pkt->immed_len);
+        iov[1].MPID_IOV_LEN = get_pkt->count * type_size - get_resp_pkt->immed_len;
+        iovcnt = 2;
+    }
 
     /* get vc object */
     MPIDI_Comm_get_vc(win_ptr->comm_ptr, get_pkt->origin_rank, &vc);
 
-    mpi_errno = MPIDI_CH3_iSendv(vc, sreq, iov, 2);
+    mpi_errno = MPIDI_CH3_iSendv(vc, sreq, iov, iovcnt);
     if (mpi_errno != MPI_SUCCESS) {
         MPID_Request_release(sreq);
 	MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
@@ -940,6 +1003,8 @@ static inline int perform_get_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_loc
     MPID_Request *sreq = NULL;
     MPIDI_VC_t *vc = NULL;
     MPI_Aint type_size;
+    size_t len;
+    int iovcnt;
     MPID_IOV iov[MPID_IOV_LIMIT];
     int mpi_errno = MPI_SUCCESS;
 
@@ -990,17 +1055,46 @@ static inline int perform_get_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_loc
         get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
     get_accum_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
     get_accum_resp_pkt->source_win_handle = get_accum_pkt->source_win_handle;
+    get_accum_resp_pkt->immed_len = 0;
+
+    /* length of target data */
+    MPIU_Assign_trunc(len, get_accum_pkt->count * type_size, size_t);
+
+    /* both origin buffer and target buffer are basic datatype,
+       fill IMMED data area in response packet header. */
+    if (get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP) {
+        /* Try to copy target data into packet header. */
+        MPIU_Assign_trunc(get_accum_resp_pkt->immed_len,
+                          MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / type_size) * type_size),
+                          size_t);
+
+        if (get_accum_resp_pkt->immed_len > 0) {
+            void *src = sreq->dev.user_buf;
+            void *dest = (void*) get_accum_resp_pkt->data;
+            /* copy data from origin buffer to immed area in packet header */
+            mpi_errno = immed_copy(src, dest, get_accum_resp_pkt->immed_len);
+            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+        }
+    }
 
-    iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_accum_resp_pkt;
-    iov[0].MPID_IOV_LEN = sizeof(*get_accum_resp_pkt);
-
-    iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) sreq->dev.user_buf;
-    iov[1].MPID_IOV_LEN = get_accum_pkt->count * type_size;
+    if (len == get_accum_resp_pkt->immed_len) {
+        /* All origin data is in packet header, issue the header. */
+        iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_accum_resp_pkt;
+        iov[0].MPID_IOV_LEN = sizeof(*get_accum_resp_pkt);
+        iovcnt = 1;
+    }
+    else {
+        iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_accum_resp_pkt;
+        iov[0].MPID_IOV_LEN = sizeof(*get_accum_resp_pkt);
+        iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) ((char *)sreq->dev.user_buf + get_accum_resp_pkt->immed_len);
+        iov[1].MPID_IOV_LEN = get_accum_pkt->count * type_size - get_accum_resp_pkt->immed_len;
+        iovcnt = 2;
+    }
 
     /* get vc object */
     MPIDI_Comm_get_vc(win_ptr->comm_ptr, get_accum_pkt->origin_rank, &vc);
 
-    mpi_errno = MPIDI_CH3_iSendv(vc, sreq, iov, 2);
+    mpi_errno = MPIDI_CH3_iSendv(vc, sreq, iov, iovcnt);
     if (mpi_errno != MPI_SUCCESS) {
         MPID_Request_release(sreq);
 	MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
diff --git a/src/mpid/ch3/src/ch3u_rma_ops.c b/src/mpid/ch3/src/ch3u_rma_ops.c
index 4347edf..35c6c12 100644
--- a/src/mpid/ch3/src/ch3u_rma_ops.c
+++ b/src/mpid/ch3/src/ch3u_rma_ops.c
@@ -358,6 +358,10 @@ int MPIDI_CH3I_Get(void *origin_addr, int origin_count, MPI_Datatype
 
         if (!new_ptr->is_dt) {
             new_ptr->piggyback_lock_candidate = 1;
+
+            /* Only fill IMMED data in response packet when both origin and target
+               buffers are basic datatype. */
+            get_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP;
         }
 
         MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
@@ -695,6 +699,10 @@ int MPIDI_CH3I_Get_accumulate(const void *origin_addr, int origin_count,
 
             if (!new_ptr->is_dt) {
                 new_ptr->piggyback_lock_candidate = 1;
+
+                /* Only fill IMMED data in response packet when both origin and target
+                   buffers are basic datatype. */
+                get_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP;
             }
         }
 
@@ -763,6 +771,10 @@ int MPIDI_CH3I_Get_accumulate(const void *origin_addr, int origin_count,
                 if (len <= MPIR_MAX(MPIDI_RMA_IMMED_BYTES,
                                     MPIR_CVAR_CH3_RMA_OP_PIGGYBACK_LOCK_DATA_SIZE))
                     new_ptr->piggyback_lock_candidate = 1;
+
+                /* Only fill IMMED data in response packet when both origin and target
+                   buffers are basic datatype. */
+                get_accum_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP;
             }
         }
 
@@ -1128,6 +1140,7 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
             get_pkt->source_win_handle = win_ptr->handle;
             get_pkt->origin_rank = rank;
             get_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
+            get_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP;
 
             new_ptr->origin_addr = result_addr;
             new_ptr->origin_count = 1;
diff --git a/src/mpid/ch3/src/ch3u_rma_pkthandler.c b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
index 63cb77c..1430557 100644
--- a/src/mpid/ch3/src/ch3u_rma_pkthandler.c
+++ b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
@@ -395,6 +395,8 @@ int MPIDI_CH3_PktHandler_Get(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         /* basic datatype. send the data. */
         MPIDI_CH3_Pkt_t upkt;
         MPIDI_CH3_Pkt_get_resp_t *get_resp_pkt = &upkt.get_resp;
+        size_t len;
+        int iovcnt;
 
         MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_GET_RESP);
         req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GetSendComplete;
@@ -412,16 +414,45 @@ int MPIDI_CH3_PktHandler_Get(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
             get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
         get_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
         get_resp_pkt->source_win_handle = get_pkt->source_win_handle;
+        get_resp_pkt->immed_len = 0;
 
-        iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_resp_pkt;
-        iov[0].MPID_IOV_LEN = sizeof(*get_resp_pkt);
-
-        iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_pkt->addr;
+        /* length of target data */
         MPID_Datatype_get_size_macro(get_pkt->datatype, type_size);
-        iov[1].MPID_IOV_LEN = get_pkt->count * type_size;
+        MPIU_Assign_trunc(len, get_pkt->count * type_size, size_t);
+
+        /* both origin buffer and target buffer are basic datatype,
+           fill IMMED data area in response packet header. */
+        if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP) {
+            /* Try to copy target data into packet header. */
+            MPIU_Assign_trunc(get_resp_pkt->immed_len,
+                              MPIR_MIN(len, (MPIDI_RMA_IMMED_BYTES / type_size) * type_size),
+                              size_t);
+
+            if (get_resp_pkt->immed_len > 0) {
+                void *src = get_pkt->addr;
+                void *dest = (void*) get_resp_pkt->data;
+                /* copy data from origin buffer to immed area in packet header */
+                mpi_errno = immed_copy(src, dest, get_resp_pkt->immed_len);
+                if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+            }
+        }
+
+        if (len == get_resp_pkt->immed_len) {
+            /* All origin data is in packet header, issue the header. */
+            iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_resp_pkt;
+            iov[0].MPID_IOV_LEN = sizeof(*get_resp_pkt);
+            iovcnt = 1;
+        }
+        else {
+            iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_resp_pkt;
+            iov[0].MPID_IOV_LEN = sizeof(*get_resp_pkt);
+            iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) ((char *)get_pkt->addr + get_resp_pkt->immed_len);
+            iov[1].MPID_IOV_LEN = get_pkt->count * type_size - get_resp_pkt->immed_len;
+            iovcnt = 2;
+        }
 
         MPIU_THREAD_CS_ENTER(CH3COMM, vc);
-        mpi_errno = MPIDI_CH3_iSendv(vc, req, iov, 2);
+        mpi_errno = MPIDI_CH3_iSendv(vc, req, iov, iovcnt);
         MPIU_THREAD_CS_EXIT(CH3COMM, vc);
         /* --BEGIN ERROR HANDLING-- */
         if (mpi_errno != MPI_SUCCESS) {
@@ -1230,10 +1261,27 @@ int MPIDI_CH3_PktHandler_Get_AccumResp(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPID_Datatype_get_size_macro(req->dev.datatype, type_size);
     req->dev.recv_data_sz = type_size * req->dev.user_count;
 
+    if (get_accum_resp_pkt->immed_len > 0) {
+        /* first copy IMMED data from pkt header to origin buffer */
+        MPIU_Memcpy(req->dev.user_buf, get_accum_resp_pkt->data, get_accum_resp_pkt->immed_len);
+        req->dev.user_buf = (void*)((char*)req->dev.user_buf + get_accum_resp_pkt->immed_len);
+        req->dev.recv_data_sz -= get_accum_resp_pkt->immed_len;
+        if (req->dev.recv_data_sz == 0)
+            complete = 1;
+
+        /* return the number of bytes processed in this function */
+        *buflen = sizeof(MPIDI_CH3_Pkt_t);
+    }
+
+    if(req->dev.recv_data_sz > 0) {
     *rreqp = req;
     mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
     MPIU_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
                          "**ch3|postrecv %s", "MPIDI_CH3_PKT_GET_ACCUM_RESP");
+
+    /* return the number of bytes processed in this function */
+    *buflen = data_len + sizeof(MPIDI_CH3_Pkt_t);
+    }
     if (complete) {
         /* Request-based RMA defines final actions for completing user request. */
         int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
@@ -1246,8 +1294,6 @@ int MPIDI_CH3_PktHandler_Get_AccumResp(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         }
         *rreqp = NULL;
     }
-    /* return the number of bytes processed in this function */
-    *buflen = data_len + sizeof(MPIDI_CH3_Pkt_t);
 
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_get_accum_resp);
@@ -1350,10 +1396,28 @@ int MPIDI_CH3_PktHandler_GetResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
     MPID_Datatype_get_size_macro(req->dev.datatype, type_size);
     req->dev.recv_data_sz = type_size * req->dev.user_count;
 
+    if (get_resp_pkt->immed_len > 0) {
+        /* first copy IMMED data from pkt header to origin buffer */
+        MPIU_Memcpy(req->dev.user_buf, get_resp_pkt->data, get_resp_pkt->immed_len);
+        req->dev.user_buf = (void*)((char*)req->dev.user_buf + get_resp_pkt->immed_len);
+        req->dev.recv_data_sz -= get_resp_pkt->immed_len;
+        if (req->dev.recv_data_sz == 0)
+            complete = 1;
+
+        /* return the number of bytes processed in this function */
+        *buflen = sizeof(MPIDI_CH3_Pkt_t);
+    }
+
+    if (req->dev.recv_data_sz > 0) {
     *rreqp = req;
     mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
     MPIU_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv", "**ch3|postrecv %s",
                          "MPIDI_CH3_PKT_GET_RESP");
+
+    /* return the number of bytes processed in this function */
+    *buflen = data_len + sizeof(MPIDI_CH3_Pkt_t);
+    }
+
     if (complete) {
         /* Request-based RMA defines final actions for completing user request. */
         int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
@@ -1367,8 +1431,6 @@ int MPIDI_CH3_PktHandler_GetResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 
         *rreqp = NULL;
     }
-    /* return the number of bytes processed in this function */
-    *buflen = data_len + sizeof(MPIDI_CH3_Pkt_t);
 
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_get_resp);

http://git.mpich.org/mpich.git/commitdiff/4739df598e83186eaa25db33606cc00193385e58

commit 4739df598e83186eaa25db33606cc00193385e58
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Thu Nov 20 20:47:01 2014 -0600

    Perf-optimize: support piggybacking LOCK on large RMA operations.
    
    Originally we only allows LOCK request to be piggybacked
    with small RMA operations (all data can be fit in packet
    header). This brings communication overhead for larger
    operations since origin side needs to wait for the LOCK
    ACK before it can transmit data to the target.
    
    In this patch we add support of piggybacking LOCK with
    RMA operations with arbitrary size. Note that (1) this
    only works with basic datatypes; (2) if the LOCK cannot
    be satisfied, we temporarily buffer this operation on
    the target side.
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpid_rma_lockqueue.h b/src/mpid/ch3/include/mpid_rma_lockqueue.h
new file mode 100644
index 0000000..9fe6fb2
--- /dev/null
+++ b/src/mpid/ch3/include/mpid_rma_lockqueue.h
@@ -0,0 +1,53 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2014 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#if !defined(MPID_RMA_LOCKQUEUE_H_INCLUDED)
+#define MPID_RMA_LOCKQUEUE_H_INCLUDED
+
+#include "mpl_utlist.h"
+#include "mpid_rma_types.h"
+
+MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_lockqueue_alloc);
+MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_winlock_getlocallock);
+
+/* MPIDI_CH3I_Win_lock_entry_alloc(): return a new lock queue entry and
+ * initialize it. If we cannot get one, return NULL. */
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Win_lock_entry_alloc
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+static inline int MPIDI_CH3I_Win_lock_entry_alloc(MPID_Win * win_ptr,
+                                                  MPIDI_CH3_Pkt_t *pkt,
+                                                  MPIDI_Win_lock_queue **lock_entry)
+{
+    MPIDI_Win_lock_queue *new_ptr = NULL;
+    int mpi_errno = MPI_SUCCESS;
+
+    /* FIXME: we should use a lock entry queue to manage all this. */
+
+    /* allocate lock queue entry */
+    MPIR_T_PVAR_TIMER_START(RMA, rma_lockqueue_alloc);
+    new_ptr = (MPIDI_Win_lock_queue *) MPIU_Malloc(sizeof(MPIDI_Win_lock_queue));
+    MPIR_T_PVAR_TIMER_END(RMA, rma_lockqueue_alloc);
+    if (!new_ptr) {
+        MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s",
+                             "MPIDI_Win_lock_queue");
+    }
+
+    new_ptr->next = NULL;
+    new_ptr->pkt = (*pkt);
+    new_ptr->data = NULL;
+    new_ptr->all_data_recved = 0;
+
+    (*lock_entry) = new_ptr;
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#endif  /* MPID_RMA_ISSUE_H_INCLUDED */
diff --git a/src/mpid/ch3/include/mpid_rma_types.h b/src/mpid/ch3/include/mpid_rma_types.h
index 8037cc4..81ff07f 100644
--- a/src/mpid/ch3/include/mpid_rma_types.h
+++ b/src/mpid/ch3/include/mpid_rma_types.h
@@ -132,6 +132,8 @@ extern MPIDI_RMA_Win_list_t *MPIDI_RMA_Win_list, *MPIDI_RMA_Win_list_tail;
 typedef struct MPIDI_Win_lock_queue {
     struct MPIDI_Win_lock_queue *next;
     MPIDI_CH3_Pkt_t pkt;    /* all information for this request packet */
+    void *data;             /* for queued PUTs / ACCs / GACCs, data is copied here */
+    int all_data_recved;    /* indicate if all data has been received */
 } MPIDI_Win_lock_queue;
 
 typedef MPIDI_RMA_Op_t *MPIDI_RMA_Ops_list_t;
diff --git a/src/mpid/ch3/include/mpidimpl.h b/src/mpid/ch3/include/mpidimpl.h
index a64d32f..76c52f2 100644
--- a/src/mpid/ch3/include/mpidimpl.h
+++ b/src/mpid/ch3/include/mpidimpl.h
@@ -1918,6 +1918,8 @@ int MPIDI_CH3_ReqHandler_GaccumDerivedDTRecvComplete( MPIDI_VC_t *,
                                                       int * );
 int MPIDI_CH3_ReqHandler_GetDerivedDTRecvComplete( MPIDI_VC_t *,
 						   MPID_Request *, int * );
+int MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete( MPIDI_VC_t *,
+                                                      MPID_Request *, int * );
 /* Send Handlers */
 int MPIDI_CH3_ReqHandler_SendReloadIOV( MPIDI_VC_t *vc, MPID_Request *sreq, 
 					int *complete );
diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index 9d0840f..ac64d41 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -215,6 +215,79 @@ MPIDI_CH3_PKT_DEFS
         }                                                               \
     }
 
+#define MPIDI_CH3_PKT_RMA_GET_TARGET_COUNT(pkt_, count_, err_)          \
+    {                                                                   \
+        err_ = MPI_SUCCESS;                                             \
+        switch((pkt_).type) {                                           \
+        case (MPIDI_CH3_PKT_PUT):                                       \
+            count_ = (pkt_).put.count;                                  \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_GET):                                       \
+            count_ = (pkt_).get.count;                                  \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_ACCUMULATE):                                \
+            count_ = (pkt_).accum.count;                                \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_GET_ACCUM):                                 \
+            count_ = (pkt_).get_accum.count;                            \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_CAS):                                       \
+        case (MPIDI_CH3_PKT_FOP):                                       \
+            count_ = 1;                                                 \
+            break;                                                      \
+        default:                                                        \
+            MPIU_ERR_SETANDJUMP1(err_, MPI_ERR_OTHER, "**invalidpkt", "**invalidpkt %d", (pkt_).type); \
+        }                                                               \
+    }
+
+#define MPIDI_CH3_PKT_RMA_GET_IMMED_LEN(pkt_, immed_len_, err_)         \
+    {                                                                   \
+        err_ = MPI_SUCCESS;                                             \
+        switch((pkt_).type) {                                           \
+        case (MPIDI_CH3_PKT_PUT):                                       \
+            immed_len_ = (pkt_).put.immed_len;                          \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_ACCUMULATE):                                \
+            immed_len_ = (pkt_).accum.immed_len;                        \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_GET_ACCUM):                                 \
+            immed_len_ = (pkt_).get_accum.immed_len;                    \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_FOP):                                       \
+            immed_len_ = (pkt_).fop.immed_len;                          \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_CAS):                                       \
+            /* FIXME: we should deal with CAS here */                   \
+            break;                                                      \
+        default:                                                        \
+            MPIU_ERR_SETANDJUMP1(err_, MPI_ERR_OTHER, "**invalidpkt", "**invalidpkt %d", (pkt_).type); \
+        }                                                               \
+    }
+
+#define MPIDI_CH3_PKT_RMA_GET_IMMED_DATA_PTR(pkt_, immed_data_, err_)   \
+    {                                                                   \
+        err_ = MPI_SUCCESS;                                             \
+        switch((pkt_).type) {                                           \
+        case (MPIDI_CH3_PKT_PUT):                                       \
+            immed_data_ = (pkt_).put.data;                              \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_ACCUMULATE):                                \
+            immed_data_ = (pkt_).accum.data;                            \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_GET_ACCUM):                                 \
+            immed_data_ = (pkt_).get_accum.data;                        \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_FOP):                                       \
+            immed_data_ = (pkt_).fop.data;                              \
+            break;                                                      \
+        case (MPIDI_CH3_PKT_CAS):                                       \
+            /* FIXME: we should deal with CAS here */                   \
+            break;                                                      \
+        default:                                                        \
+            MPIU_ERR_SETANDJUMP1(err_, MPI_ERR_OTHER, "**invalidpkt", "**invalidpkt %d", (pkt_).type); \
+        }                                                               \
+    }
+
 #define MPIDI_CH3_PKT_RMA_GET_LOCK_TYPE(pkt_, lock_type_, err_)         \
     {                                                                   \
         err_ = MPI_SUCCESS;                                             \
@@ -302,54 +375,27 @@ MPIDI_CH3_PKT_DEFS
         }                                                               \
     }
 
-#define MPIDI_CH3_PKT_RMA_UNSET_FLAG(pkt_, flag_, err_)                 \
-    {                                                                   \
-        err_ = MPI_SUCCESS;                                             \
-        switch((pkt_).type) {                                           \
-        case (MPIDI_CH3_PKT_PUT):                                       \
-            (pkt_).put.flags &= ~(flag_);                               \
-            break;                                                      \
-        case (MPIDI_CH3_PKT_GET):                                       \
-            (pkt_).get.flags &= ~(flag_);                               \
-            break;                                                      \
-        case (MPIDI_CH3_PKT_ACCUMULATE):                                \
-            (pkt_).accum.flags &= ~(flag_);                             \
-            break;                                                      \
-        case (MPIDI_CH3_PKT_GET_ACCUM):                                 \
-            (pkt_).get_accum.flags &= ~(flag_);                         \
-            break;                                                      \
-        case (MPIDI_CH3_PKT_CAS):                                       \
-            (pkt_).cas.flags &= ~(flag_);                               \
-            break;                                                      \
-        case (MPIDI_CH3_PKT_FOP):                                       \
-            (pkt_).fop.flags &= ~(flag_);                               \
-            break;                                                      \
-        default:                                                        \
-            MPIU_ERR_SETANDJUMP1(err_, MPI_ERR_OTHER, "**invalidpkt", "**invalidpkt %d", (pkt_).type); \
-        }                                                               \
-    }
-
-#define MPIDI_CH3_PKT_RMA_SET_FLAG(pkt_, flag_, err_)                   \
+#define MPIDI_CH3_PKT_RMA_GET_TARGET_WIN_HANDLE(pkt_, win_hdl_, err_)   \
     {                                                                   \
         err_ = MPI_SUCCESS;                                             \
         switch((pkt_).type) {                                           \
         case (MPIDI_CH3_PKT_PUT):                                       \
-            (pkt_).put.flags |= (flag_);                                \
+            win_hdl_ = (pkt_).put.target_win_handle;                    \
             break;                                                      \
         case (MPIDI_CH3_PKT_GET):                                       \
-            (pkt_).get.flags |= (flag_);                                \
+            win_hdl_ = (pkt_).get.target_win_handle;                    \
             break;                                                      \
         case (MPIDI_CH3_PKT_ACCUMULATE):                                \
-            (pkt_).accum.flags |= (flag_);                              \
+            win_hdl_ = (pkt_).accum.target_win_handle;                  \
             break;                                                      \
         case (MPIDI_CH3_PKT_GET_ACCUM):                                 \
-            (pkt_).get_accum.flags |= (flag_);                          \
+            win_hdl_ = (pkt_).get_accum.target_win_handle;              \
             break;                                                      \
         case (MPIDI_CH3_PKT_CAS):                                       \
-            (pkt_).cas.flags |= (flag_);                                \
+            win_hdl_ = (pkt_).cas.target_win_handle;                    \
             break;                                                      \
         case (MPIDI_CH3_PKT_FOP):                                       \
-            (pkt_).fop.flags |= (flag_);                                \
+            win_hdl_ = (pkt_).fop.target_win_handle;                    \
             break;                                                      \
         default:                                                        \
             MPIU_ERR_SETANDJUMP1(err_, MPI_ERR_OTHER, "**invalidpkt", "**invalidpkt %d", (pkt_).type); \
diff --git a/src/mpid/ch3/include/mpidpre.h b/src/mpid/ch3/include/mpidpre.h
index 3d93b61..c87e4d5 100644
--- a/src/mpid/ch3/include/mpidpre.h
+++ b/src/mpid/ch3/include/mpidpre.h
@@ -440,6 +440,7 @@ typedef struct MPIDI_Request {
     MPI_Win     target_win_handle;
     MPI_Win     source_win_handle;
     MPIDI_CH3_Pkt_flags_t flags; /* flags that were included in the original RMA packet header */
+    struct MPIDI_Win_lock_queue *lock_queue_entry;
     MPI_Request resp_request_handle; /* Handle for get_accumulate response */
 
     MPIDI_REQUEST_SEQNUM
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index de73290..f132d96 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -11,9 +11,7 @@
 #include "mpid_rma_oplist.h"
 #include "mpid_rma_shm.h"
 #include "mpid_rma_issue.h"
-
-MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_lockqueue_alloc);
-MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_winlock_getlocallock);
+#include "mpid_rma_lockqueue.h"
 
 #undef FUNCNAME
 #define FUNCNAME send_lock_msg
@@ -161,7 +159,9 @@ static inline int MPIDI_CH3I_Send_flush_ack_pkt(MPIDI_VC_t *vc, MPID_Win *win_pt
     MPIDI_Pkt_init(flush_ack_pkt, MPIDI_CH3_PKT_FLUSH_ACK);
     flush_ack_pkt->source_win_handle = source_win_handle;
     flush_ack_pkt->target_rank = win_ptr->comm_ptr->rank;
-    flush_ack_pkt->flags = flags;
+    flush_ack_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
+    if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
+        flush_ack_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
 
     /* Because this is in a packet handler, it is already within a critical section */	
     /* MPIU_THREAD_CS_ENTER(CH3COMM,vc); */
@@ -225,23 +225,109 @@ static inline int send_decr_at_cnt_msg(int dst, MPID_Win * win_ptr)
 
 
 /* enqueue an unsatisfied origin in passive target at target side. */
-static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_CH3_Pkt_t *pkt)
+static inline int enqueue_lock_origin(MPID_Win *win_ptr, MPIDI_VC_t *vc,
+                                      MPIDI_CH3_Pkt_t *pkt,
+                                      MPIDI_msg_sz_t *buflen,
+                                      MPID_Request **reqp)
 {
     MPIDI_Win_lock_queue *new_ptr = NULL;
     int mpi_errno = MPI_SUCCESS;
 
-    MPIR_T_PVAR_TIMER_START(RMA, rma_lockqueue_alloc);
-    new_ptr = (MPIDI_Win_lock_queue *) MPIU_Malloc(sizeof(MPIDI_Win_lock_queue));
-    MPIR_T_PVAR_TIMER_END(RMA, rma_lockqueue_alloc);
-    if (!new_ptr) {
-        MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s",
-                             "MPIDI_Win_lock_queue");
-    }
+    (*reqp) = NULL;
 
-    new_ptr->next = NULL;
-    new_ptr->pkt = (*pkt);
+    mpi_errno = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, pkt, &new_ptr);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    MPIU_Assert(new_ptr != NULL);
     MPL_LL_APPEND(win_ptr->lock_queue, win_ptr->lock_queue_tail, new_ptr);
 
+    if (pkt->type == MPIDI_CH3_PKT_LOCK ||
+        pkt->type == MPIDI_CH3_PKT_GET ||
+        pkt->type == MPIDI_CH3_PKT_FOP ||
+        pkt->type == MPIDI_CH3_PKT_CAS) {
+        new_ptr->all_data_recved = 1;
+        /* return bytes of data processed in this pkt handler */
+        (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
+        goto fn_exit;
+    }
+    else {
+        MPI_Aint type_size = 0;
+        MPIDI_msg_sz_t recv_data_sz = 0;
+        MPID_Request *req = NULL;
+        MPI_Datatype target_dtp;
+        int target_count;
+        size_t immed_len = 0;
+        void *immed_data = NULL;
+        int complete = 0;
+        MPIDI_msg_sz_t data_len;
+        char *data_buf = NULL;
+
+        /* This is PUT, ACC, GACC */
+
+        MPIDI_CH3_PKT_RMA_GET_TARGET_DATATYPE((*pkt), target_dtp, mpi_errno);
+        MPIDI_CH3_PKT_RMA_GET_TARGET_COUNT((*pkt), target_count, mpi_errno);
+
+        MPID_Datatype_get_size_macro(target_dtp, type_size);
+        recv_data_sz = type_size * target_count;
+
+        if (recv_data_sz <= MPIDI_RMA_IMMED_BYTES) {
+            /* all data fits in packet header */
+            new_ptr->all_data_recved = 1;
+            /* return bytes of data processed in this pkt handler */
+            (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
+            goto fn_exit;
+        }
+
+        /* allocate tmp buffer to recieve data. */
+        new_ptr->data = MPIU_Malloc(recv_data_sz);
+        if (new_ptr->data == NULL) {
+            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %d",
+                                 recv_data_sz);
+        }
+
+        /* create request to receive upcoming requests */
+        req = MPID_Request_create();
+        MPIU_Object_set_ref(req, 1);
+
+        /* fill in area in req that will be used in Receive_data_found() */
+        req->dev.user_buf = new_ptr->data;
+        req->dev.user_count = target_count;
+        req->dev.datatype = target_dtp;
+        req->dev.recv_data_sz = recv_data_sz;
+        req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
+        req->dev.OnFinal = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete;
+        req->dev.lock_queue_entry = new_ptr;
+
+        MPIDI_CH3_PKT_RMA_GET_IMMED_LEN((*pkt), immed_len, mpi_errno);
+        MPIDI_CH3_PKT_RMA_GET_IMMED_DATA_PTR((*pkt), immed_data, mpi_errno);
+
+        if (immed_len > 0) {
+            /* see if we can receive some data from packet header */
+            MPIU_Memcpy(req->dev.user_buf, immed_data, immed_len);
+            req->dev.user_buf = (void*)((char*)req->dev.user_buf + immed_len);
+            req->dev.recv_data_sz -= immed_len;
+        }
+
+        data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
+        data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
+        MPIU_Assert(req->dev.recv_data_sz > 0);
+
+        mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+        /* return bytes of data processed in this pkt handler */
+        (*buflen) = sizeof(MPIDI_CH3_Pkt_t) + data_len;
+
+        if (complete) {
+            mpi_errno = MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete(vc, req, &complete);
+            if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+            if (complete) {
+                goto fn_exit;
+            }
+        }
+
+        (*reqp) = req;
+    }
+
  fn_exit:
     return mpi_errno;
  fn_fail:
@@ -294,13 +380,18 @@ static inline int acquire_local_lock(MPID_Win * win_ptr, int lock_type)
         /* Queue the lock information. */
         MPIDI_CH3_Pkt_t pkt;
         MPIDI_CH3_Pkt_lock_t *lock_pkt = &pkt.lock;
+        MPIDI_Win_lock_queue *new_ptr = NULL;
 
         MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
         lock_pkt->lock_type = lock_type;
         lock_pkt->origin_rank = win_ptr->comm_ptr->rank;
 
-        mpi_errno = enqueue_lock_origin(win_ptr, &pkt);
+        mpi_errno = MPIDI_CH3I_Win_lock_entry_alloc(win_ptr, &pkt, &new_ptr);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+        MPIU_Assert(new_ptr != NULL);
+        MPL_LL_APPEND(win_ptr->lock_queue, win_ptr->lock_queue_tail, new_ptr);
+
+        new_ptr->all_data_recved = 1;
     }
 
   fn_exit:
@@ -443,31 +534,29 @@ static inline int do_accumulate_op(void *source_buf, void *target_buf,
 }
 
 
-static inline int check_piggyback_lock(MPID_Win *win_ptr, MPIDI_CH3_Pkt_t *pkt, int *acquire_lock_fail) {
+static inline int check_piggyback_lock(MPID_Win *win_ptr, MPIDI_VC_t *vc,
+                                       MPIDI_CH3_Pkt_t *pkt,
+                                       MPIDI_msg_sz_t *buflen,
+                                       int *acquire_lock_fail,
+                                       MPID_Request **reqp) {
     int lock_type;
     MPIDI_CH3_Pkt_flags_t flags;
     int mpi_errno = MPI_SUCCESS;
 
     (*acquire_lock_fail) = 0;
+    (*reqp) = NULL;
 
     MPIDI_CH3_PKT_RMA_GET_FLAGS((*pkt), flags, mpi_errno);
     MPIDI_CH3_PKT_RMA_GET_LOCK_TYPE((*pkt), lock_type, mpi_errno);
 
     if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK) {
         if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 0) {
-
             /* cannot acquire the lock, queue up this operation. */
-            mpi_errno = enqueue_lock_origin(win_ptr, pkt);
+            mpi_errno = enqueue_lock_origin(win_ptr, vc, pkt, buflen, reqp);
             if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
             (*acquire_lock_fail) = 1;
         }
-        else {
-            /* unset LOCK flag */
-            MPIDI_CH3_PKT_RMA_UNSET_FLAG((*pkt), MPIDI_CH3_PKT_FLAG_RMA_LOCK, mpi_errno);
-            /* set LOCK_GRANTED flag */
-            MPIDI_CH3_PKT_RMA_SET_FLAG((*pkt), MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED, mpi_errno);
-        }
     }
 
  fn_exit:
@@ -484,7 +573,7 @@ static inline int finish_op_on_target(MPID_Win *win_ptr, MPIDI_VC_t *vc,
 
     if (type == MPIDI_CH3_PKT_PUT || type == MPIDI_CH3_PKT_ACCUMULATE) {
         /* This is PUT or ACC */
-        if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
+        if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK) {
             if (!(flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) &&
                 !(flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)) {
                 mpi_errno = MPIDI_CH3I_Send_lock_granted_pkt(vc, win_ptr, source_win_handle);
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index dbf2b36..150e1c7 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -236,7 +236,7 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete( MPIDI_VC_t *vc,
     get_accum_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
     get_accum_resp_pkt->source_win_handle = rreq->dev.source_win_handle;
     get_accum_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
-    if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED)
+    if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
     if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
         get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
@@ -563,7 +563,7 @@ int MPIDI_CH3_ReqHandler_GetDerivedDTRecvComplete( MPIDI_VC_t *vc,
     get_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
     get_resp_pkt->source_win_handle = rreq->dev.source_win_handle;
     get_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
-    if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED)
+    if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
     if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
         get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
@@ -796,15 +796,407 @@ static int create_derived_datatype(MPID_Request *req, MPID_Datatype **dtp)
 }
 
 
-static inline int perform_op_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_entry)
+static inline int perform_put_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_entry)
+{
+    MPIDI_CH3_Pkt_put_t *put_pkt = &((lock_entry->pkt).put);
+    MPIDI_VC_t *vc = NULL;
+    int mpi_errno = MPI_SUCCESS;
+
+    if (lock_entry->data == NULL) {
+        /* all data fits in packet header */
+        mpi_errno = MPIR_Localcopy(put_pkt->data, put_pkt->count, put_pkt->datatype,
+                                   put_pkt->addr, put_pkt->count, put_pkt->datatype);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    }
+    else {
+        mpi_errno = MPIR_Localcopy(lock_entry->data, put_pkt->count, put_pkt->datatype,
+                                   put_pkt->addr, put_pkt->count, put_pkt->datatype);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    }
+
+    /* get vc object */
+    MPIDI_Comm_get_vc(win_ptr->comm_ptr, put_pkt->origin_rank, &vc);
+
+    /* do final action */
+    mpi_errno = finish_op_on_target(win_ptr, vc, MPIDI_CH3_PKT_PUT,
+                                    put_pkt->flags, put_pkt->source_win_handle);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+static inline int perform_get_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_entry)
+{
+    MPIDI_CH3_Pkt_t upkt;
+    MPIDI_CH3_Pkt_get_resp_t *get_resp_pkt = &upkt.get_resp;
+    MPIDI_CH3_Pkt_get_t *get_pkt = &((lock_entry->pkt).get);
+    MPID_Request *sreq = NULL;
+    MPIDI_VC_t *vc = NULL;
+    MPI_Aint type_size;
+    MPID_IOV iov[MPID_IOV_LIMIT];
+    int mpi_errno = MPI_SUCCESS;
+
+    sreq = MPID_Request_create();
+    if (sreq == NULL) {
+        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**nomemreq");
+    }
+    MPIU_Object_set_ref(sreq, 1);
+
+    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_GET_RESP);
+    sreq->kind = MPID_REQUEST_SEND;
+    sreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GetSendComplete;
+    sreq->dev.OnFinal = MPIDI_CH3_ReqHandler_GetSendComplete;
+
+    sreq->dev.target_win_handle = win_ptr->handle;
+    sreq->dev.flags = get_pkt->flags;
+
+    /* here we increment the Active Target counter to guarantee the GET-like
+       operation are completed when counter reaches zero. */
+    win_ptr->at_completion_counter++;
+
+    MPIDI_Pkt_init(get_resp_pkt, MPIDI_CH3_PKT_GET_RESP);
+    get_resp_pkt->request_handle = get_pkt->request_handle;
+    get_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
+    if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
+        get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
+    if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+        get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
+    if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
+        get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
+    get_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
+    get_resp_pkt->source_win_handle = get_pkt->source_win_handle;
+
+    iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_resp_pkt;
+    iov[0].MPID_IOV_LEN = sizeof(*get_resp_pkt);
+
+    iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)get_pkt->addr;
+    MPID_Datatype_get_size_macro(get_pkt->datatype, type_size);
+    iov[1].MPID_IOV_LEN = get_pkt->count * type_size;
+
+    /* get vc object */
+    MPIDI_Comm_get_vc(win_ptr->comm_ptr, get_pkt->origin_rank, &vc);
+
+    mpi_errno = MPIDI_CH3_iSendv(vc, sreq, iov, 2);
+    if (mpi_errno != MPI_SUCCESS) {
+        MPID_Request_release(sreq);
+	MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
+    }
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+
+static inline int perform_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_entry)
+{
+    MPIDI_CH3_Pkt_accum_t *acc_pkt = &((lock_entry->pkt).accum);
+    MPIDI_VC_t *vc = NULL;
+    int mpi_errno = MPI_SUCCESS;
+
+    MPIU_Assert(lock_entry->all_data_recved == 1);
+
+    if (win_ptr->shm_allocated == TRUE)
+        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
+
+    if (lock_entry->data == NULL) {
+        /* All data fits in packet header */
+        mpi_errno = do_accumulate_op(acc_pkt->data, acc_pkt->addr,
+                                     acc_pkt->count, acc_pkt->datatype, acc_pkt->op);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    }
+    else {
+        mpi_errno = do_accumulate_op(lock_entry->data, acc_pkt->addr,
+                                     acc_pkt->count, acc_pkt->datatype, acc_pkt->op);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    }
+
+    if (win_ptr->shm_allocated == TRUE)
+        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
+
+    /* get vc object */
+    MPIDI_Comm_get_vc(win_ptr->comm_ptr, acc_pkt->origin_rank, &vc);
+
+    mpi_errno = finish_op_on_target(win_ptr, vc, MPIDI_CH3_PKT_ACCUMULATE,
+                                    acc_pkt->flags, acc_pkt->source_win_handle);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+
+static inline int perform_get_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_entry)
+{
+    MPIDI_CH3_Pkt_t upkt;
+    MPIDI_CH3_Pkt_get_accum_resp_t *get_accum_resp_pkt = &upkt.get_accum_resp;
+    MPIDI_CH3_Pkt_get_accum_t *get_accum_pkt = &((lock_entry->pkt).get_accum);
+    MPID_Request *sreq = NULL;
+    MPIDI_VC_t *vc = NULL;
+    MPI_Aint type_size;
+    MPID_IOV iov[MPID_IOV_LIMIT];
+    int mpi_errno = MPI_SUCCESS;
+
+    sreq = MPID_Request_create();
+    if (sreq == NULL) {
+        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**nomemreq");
+    }
+    MPIU_Object_set_ref(sreq, 1);
+
+    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_GET_ACCUM_RESP);
+    sreq->kind = MPID_REQUEST_SEND;
+    sreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GaccumSendComplete;
+    sreq->dev.OnFinal = MPIDI_CH3_ReqHandler_GaccumSendComplete;
+
+    sreq->dev.target_win_handle = win_ptr->handle;
+    sreq->dev.flags = get_accum_pkt->flags;
+
+    /* Copy data into a temporary buffer */
+    MPID_Datatype_get_size_macro(get_accum_pkt->datatype, type_size);
+    sreq->dev.user_buf = (void *)MPIU_Malloc(get_accum_pkt->count * type_size);
+
+    if (MPIR_DATATYPE_IS_PREDEFINED(get_accum_pkt->datatype)) {
+        MPIU_Memcpy(sreq->dev.user_buf, get_accum_pkt->addr,
+                    get_accum_pkt->count * type_size);
+    } else {
+        MPID_Segment *seg = MPID_Segment_alloc();
+        MPI_Aint last = type_size * get_accum_pkt->count;
+
+        MPIU_ERR_CHKANDJUMP1(seg == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment");
+        MPID_Segment_init(get_accum_pkt->addr, get_accum_pkt->count,
+                          get_accum_pkt->datatype, seg, 0);
+        MPID_Segment_pack(seg, 0, &last, sreq->dev.user_buf);
+        MPID_Segment_free(seg);
+    }
+
+    /* here we increment the Active Target counter to guarantee the GET-like
+       operation are completed when counter reaches zero. */
+    win_ptr->at_completion_counter++;
+
+    MPIDI_Pkt_init(get_accum_resp_pkt, MPIDI_CH3_PKT_GET_ACCUM_RESP);
+    get_accum_resp_pkt->request_handle = get_accum_pkt->request_handle;
+    get_accum_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
+    if (get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
+        get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
+    if (get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+        get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
+    if (get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
+        get_accum_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
+    get_accum_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
+    get_accum_resp_pkt->source_win_handle = get_accum_pkt->source_win_handle;
+
+    iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) get_accum_resp_pkt;
+    iov[0].MPID_IOV_LEN = sizeof(*get_accum_resp_pkt);
+
+    iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) sreq->dev.user_buf;
+    iov[1].MPID_IOV_LEN = get_accum_pkt->count * type_size;
+
+    /* get vc object */
+    MPIDI_Comm_get_vc(win_ptr->comm_ptr, get_accum_pkt->origin_rank, &vc);
+
+    mpi_errno = MPIDI_CH3_iSendv(vc, sreq, iov, 2);
+    if (mpi_errno != MPI_SUCCESS) {
+        MPID_Request_release(sreq);
+	MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
+    }
+
+    /* Perform ACCUMULATE OP */
+    if (win_ptr->shm_allocated == TRUE)
+        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
+
+    if (lock_entry->data == NULL) {
+        /* All data fits in packet header */
+        mpi_errno = do_accumulate_op(get_accum_pkt->data, get_accum_pkt->addr,
+                                     get_accum_pkt->count, get_accum_pkt->datatype, get_accum_pkt->op);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    }
+    else {
+        mpi_errno = do_accumulate_op(lock_entry->data, get_accum_pkt->addr,
+                                     get_accum_pkt->count, get_accum_pkt->datatype, get_accum_pkt->op);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    }
+
+    if (win_ptr->shm_allocated == TRUE)
+        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+
+static inline int perform_fop_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_entry)
 {
+    MPIDI_CH3_Pkt_t upkt;
+    MPIDI_CH3_Pkt_fop_resp_t *fop_resp_pkt = &upkt.fop_resp;
+    MPIDI_CH3_Pkt_fop_t *fop_pkt = &((lock_entry->pkt).fop);
+    MPID_Request *resp_req = NULL;
+    MPIDI_VC_t *vc = NULL;
     int mpi_errno = MPI_SUCCESS;
-    MPID_Request *req = NULL;
-    MPIDI_msg_sz_t len = sizeof(MPIDI_CH3_Pkt_t);
+
+    /* FIXME: this function is same with PktHandler_FOP(), should
+       do code refactoring on both of them. */
+
+    /* get vc object */
+    MPIDI_Comm_get_vc(win_ptr->comm_ptr, fop_pkt->origin_rank, &vc);
+
+    MPIDI_Pkt_init(fop_resp_pkt, MPIDI_CH3_PKT_FOP_RESP);
+    fop_resp_pkt->request_handle = fop_pkt->request_handle;
+    fop_resp_pkt->source_win_handle = fop_pkt->source_win_handle;
+    fop_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
+    fop_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
+    if (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
+        fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
+    if (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+        fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
+    if (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
+        fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
+    fop_resp_pkt->immed_len = fop_pkt->immed_len;
+
+    /* copy data to resp pkt header */
+    void *src = fop_pkt->addr, *dest = fop_resp_pkt->data;
+    mpi_errno = immed_copy(src, dest, fop_resp_pkt->immed_len);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+    /* Apply the op */
+    if (fop_pkt->op != MPI_NO_OP) {
+        MPI_User_function *uop = MPIR_OP_HDL_TO_FN(fop_pkt->op);
+        int one = 1;
+        if (win_ptr->shm_allocated == TRUE)
+            MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
+        (*uop)(fop_pkt->data, fop_pkt->addr, &one, &(fop_pkt->datatype));
+        if (win_ptr->shm_allocated == TRUE)
+            MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
+    }
+
+    /* send back the original data */
+    MPIU_THREAD_CS_ENTER(CH3COMM,vc);
+    mpi_errno = MPIDI_CH3_iStartMsg(vc, fop_resp_pkt, sizeof(*fop_resp_pkt), &resp_req);
+    MPIU_THREAD_CS_EXIT(CH3COMM,vc);
+    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
+
+    if (resp_req != NULL) {
+        if (!MPID_Request_is_complete(resp_req)) {
+            /* sending process is not completed, set proper OnDataAvail
+               (it is initialized to NULL by lower layer) */
+            resp_req->dev.target_win_handle = fop_pkt->target_win_handle;
+            resp_req->dev.flags = fop_pkt->flags;
+            resp_req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_FOPSendComplete;
+
+            /* here we increment the Active Target counter to guarantee the GET-like
+               operation are completed when counter reaches zero. */
+            win_ptr->at_completion_counter++;
+
+            MPID_Request_release(resp_req);
+            goto fn_exit;
+        }
+        else {
+            MPID_Request_release(resp_req);
+        }
+    }
+
+    /* do final action */
+    mpi_errno = finish_op_on_target(win_ptr, vc, MPIDI_CH3_PKT_FOP,
+                                    fop_pkt->flags, fop_pkt->source_win_handle);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+
+static inline int perform_cas_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_entry)
+{
+    MPIDI_CH3_Pkt_t upkt;
+    MPIDI_CH3_Pkt_cas_resp_t *cas_resp_pkt = &upkt.cas_resp;
+    MPIDI_CH3_Pkt_cas_t *cas_pkt = &((lock_entry->pkt).cas);
+    MPID_Request *send_req = NULL;
     MPIDI_VC_t *vc = NULL;
-    int origin_rank;
-    static MPIDI_CH3_PktHandler_Fcn *pktArray[MPIDI_CH3_PKT_END_ALL+1];
-    static int needsInit = 1;
+    MPI_Aint len;
+    int mpi_errno = MPI_SUCCESS;
+
+    /* get vc object */
+    MPIDI_Comm_get_vc(win_ptr->comm_ptr, cas_pkt->origin_rank, &vc);
+
+    MPIDI_Pkt_init(cas_resp_pkt, MPIDI_CH3_PKT_CAS_RESP);
+    cas_resp_pkt->request_handle = cas_pkt->request_handle;
+    cas_resp_pkt->source_win_handle = cas_pkt->source_win_handle;
+    cas_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
+    cas_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
+    if (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
+        cas_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
+    if (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+        cas_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
+    if (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)
+        cas_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_ACK;
+
+    /* Copy old value into the response packet */
+    MPID_Datatype_get_size_macro(cas_pkt->datatype, len);
+    MPIU_Assert(len <= sizeof(MPIDI_CH3_CAS_Immed_u));
+
+    if (win_ptr->shm_allocated == TRUE)
+        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
+
+    MPIU_Memcpy((void *) &cas_resp_pkt->data, cas_pkt->addr, len);
+
+    /* Compare and replace if equal */
+    if (MPIR_Compare_equal(&cas_pkt->compare_data, cas_pkt->addr, cas_pkt->datatype)) {
+        MPIU_Memcpy(cas_pkt->addr, &cas_pkt->origin_data, len);
+    }
+
+    if (win_ptr->shm_allocated == TRUE)
+        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
+
+    /* Send the response packet */
+    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
+    mpi_errno = MPIDI_CH3_iStartMsg(vc, cas_resp_pkt, sizeof(*cas_resp_pkt), &send_req);
+    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
+
+    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
+
+    if (send_req != NULL) {
+        if (!MPID_Request_is_complete(send_req)) {
+            /* sending process is not completed, set proper OnDataAvail
+               (it is initialized to NULL by lower layer) */
+            send_req->dev.target_win_handle = cas_pkt->target_win_handle;
+            send_req->dev.flags = cas_pkt->flags;
+            send_req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_CASSendComplete;
+
+            /* here we increment the Active Target counter to guarantee the GET-like
+               operation are completed when counter reaches zero. */
+            win_ptr->at_completion_counter++;
+
+            MPID_Request_release(send_req);
+            goto fn_exit;
+        }
+        else
+            MPID_Request_release(send_req);
+    }
+
+    /* do final action */
+    mpi_errno = finish_op_on_target(win_ptr, vc, MPIDI_CH3_PKT_CAS,
+                                    cas_pkt->flags, cas_pkt->source_win_handle);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+
+static inline int perform_op_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_entry)
+{
+    int mpi_errno = MPI_SUCCESS;
 
     if (lock_entry->pkt.type == MPIDI_CH3_PKT_LOCK) {
 
@@ -812,21 +1204,11 @@ static inline int perform_op_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_que
 
         MPIDI_CH3_Pkt_lock_t *lock_pkt = &(lock_entry->pkt.lock);
         if (lock_pkt->origin_rank == win_ptr->comm_ptr->rank) {
-            if (win_ptr->outstanding_locks > 0) {
-                win_ptr->outstanding_locks--;
-                MPIU_Assert(win_ptr->outstanding_locks >= 0);
-            }
-            else {
-                MPIDI_RMA_Target_t *t = NULL;
-                mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr,
-                                                       win_ptr->comm_ptr->rank, &t);
-                if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-                MPIU_Assert(t != NULL);
-                t->outstanding_lock--;
-                MPIU_Assert(t->outstanding_lock == 0);
-            }
+            mpi_errno = set_lock_sync_counter(win_ptr, lock_pkt->origin_rank);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         }
         else {
+            MPIDI_VC_t *vc = NULL;
             MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr,
                                          lock_pkt->origin_rank, &vc);
             mpi_errno = MPIDI_CH3I_Send_lock_granted_pkt(vc, win_ptr,
@@ -836,32 +1218,35 @@ static inline int perform_op_in_lock_queue(MPID_Win *win_ptr, MPIDI_Win_lock_que
     }
     else {
         /* LOCK+OP packet */
-
-        /* get VC */
-        MPIDI_CH3_PKT_RMA_GET_ORIGIN_RANK(lock_entry->pkt, origin_rank, mpi_errno);
-        MPIDI_Comm_get_vc(win_ptr->comm_ptr, origin_rank, &vc);
-
-        /* unset LOCK flag */
-        MPIDI_CH3_PKT_RMA_UNSET_FLAG(lock_entry->pkt, MPIDI_CH3_PKT_FLAG_RMA_LOCK, mpi_errno);
-
-        /* set LOCK_GRANTED flag */
-        MPIDI_CH3_PKT_RMA_SET_FLAG(lock_entry->pkt, MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED, mpi_errno);
-
-        if (needsInit) {
-            mpi_errno = MPIDI_CH3_PktHandler_Init(pktArray, MPIDI_CH3_PKT_END_CH3);
+        switch(lock_entry->pkt.type) {
+        case (MPIDI_CH3_PKT_PUT):
+            mpi_errno = perform_put_in_lock_queue(win_ptr, lock_entry);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            break;
+        case (MPIDI_CH3_PKT_GET):
+            mpi_errno = perform_get_in_lock_queue(win_ptr, lock_entry);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            break;
+        case (MPIDI_CH3_PKT_ACCUMULATE):
+            mpi_errno = perform_acc_in_lock_queue(win_ptr, lock_entry);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            break;
+        case (MPIDI_CH3_PKT_GET_ACCUM):
+            mpi_errno = perform_get_acc_in_lock_queue(win_ptr, lock_entry);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-            needsInit = 0;
+            break;
+        case (MPIDI_CH3_PKT_FOP):
+            mpi_errno = perform_fop_in_lock_queue(win_ptr, lock_entry);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            break;
+        case (MPIDI_CH3_PKT_CAS):
+            mpi_errno = perform_cas_in_lock_queue(win_ptr, lock_entry);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            break;
+        default:
+            MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER,
+                                 "**invalidpkt", "**invalidpkt %d", lock_entry->pkt.type);
         }
-
-        /* invalid pkt data will result in unpredictable behavior */
-        MPIU_Assert((lock_entry->pkt).type >= MPIDI_CH3_PKT_PUT && (lock_entry->pkt).type <= MPIDI_CH3_PKT_CAS);
-
-        /* trigger packet handler to deal with this op. */
-        mpi_errno = pktArray[lock_entry->pkt.type](vc, &(lock_entry->pkt), &len, &req);
-        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-        MPIU_Assert(len == sizeof(MPIDI_CH3_Pkt_t));
-        MPIU_Assert(req == NULL);
     }
 
  fn_exit:
@@ -930,12 +1315,17 @@ int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
             while (lock_entry) {
                 lock_entry_next = lock_entry->next;
 
+                if (lock_entry->all_data_recved) {
                 MPIDI_CH3_PKT_RMA_GET_LOCK_TYPE(lock_entry->pkt, requested_lock, mpi_errno);
                 if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, requested_lock) == 1) {
                     /* perform this OP */
 
                     mpi_errno = perform_op_in_lock_queue(win_ptr, lock_entry);
                     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+                    /* free data buffer in lock queue entry */
+                    if (lock_entry->data != NULL)
+                        MPIU_Free(lock_entry->data);
 			    
                     /* dequeue entry from lock queue */
                     MPL_LL_DELETE(win_ptr->lock_queue, win_ptr->lock_queue_tail, lock_entry);
@@ -946,6 +1336,7 @@ int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
                     if (requested_lock == MPI_LOCK_EXCLUSIVE)
                         break;
                 }
+                }
                 lock_entry = lock_entry_next;
 	    }
 	} while (temp_entered_count != entered_count);
@@ -959,3 +1350,62 @@ int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
  fn_fail:
     goto fn_exit;
 }
+
+
+
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete( MPIDI_VC_t *vc,
+                                                      MPID_Request *rreq,
+                                                      int *complete )
+{
+    int requested_lock;
+    MPI_Win target_win_handle;
+    MPID_Win *win_ptr = NULL;
+    MPIDI_Win_lock_queue *lock_queue_entry = rreq->dev.lock_queue_entry;
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_PIGGYBACKLOCKOPRECVCOMPLETE);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_PIGGYBACKLOCKOPRECVCOMPLETE);
+
+    /* This handler is triggered when we received all data of a lock queue
+       entry */
+
+    MPIU_Assert(lock_queue_entry != NULL);
+
+    /* Mark all data received in lock queue entry */
+    lock_queue_entry->all_data_recved = 1;
+
+    /* try to acquire the lock here */
+    MPIDI_CH3_PKT_RMA_GET_LOCK_TYPE(lock_queue_entry->pkt, requested_lock, mpi_errno);
+    MPIDI_CH3_PKT_RMA_GET_TARGET_WIN_HANDLE(lock_queue_entry->pkt, target_win_handle, mpi_errno);
+    MPID_Win_get_ptr(target_win_handle, win_ptr);
+
+    if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, requested_lock) == 1) {
+        /* perform this OP */
+        mpi_errno = perform_op_in_lock_queue(win_ptr, lock_queue_entry);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+        /* free data buffer in lock queue entry */
+        if (lock_queue_entry->data != NULL)
+            MPIU_Free(lock_queue_entry->data);
+
+        /* dequeue entry from lock queue */
+        MPL_LL_DELETE(win_ptr->lock_queue, win_ptr->lock_queue_tail, lock_queue_entry);
+        MPIU_Free(lock_queue_entry);
+    }
+    /* If try acquiring lock failed, just leave the lock queue entry in the queue with
+       all_data_recved marked as 1, release_lock() function will traverse the queue
+       and find entry with all_data_recved being 1 to grant the lock. */
+
+    /* mark receive data transfer as complete and decrement CC in receive
+       request */
+    MPIDI_CH3U_Request_complete(rreq);
+    *complete = TRUE;
+
+ fn_fail:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_PIGGYBACKLOCKOPRECVCOMPLETE);
+    return mpi_errno;
+}
diff --git a/src/mpid/ch3/src/ch3u_request.c b/src/mpid/ch3/src/ch3u_request.c
index 08a71da..7547ce5 100644
--- a/src/mpid/ch3/src/ch3u_request.c
+++ b/src/mpid/ch3/src/ch3u_request.c
@@ -82,6 +82,7 @@ MPID_Request * MPID_Request_create(void)
 	   request for RMA operations */
 	req->dev.target_win_handle = MPI_WIN_NULL;
 	req->dev.source_win_handle = MPI_WIN_NULL;
+        req->dev.lock_queue_entry  = NULL;
 	req->dev.dtype_info	   = NULL;
 	req->dev.dataloop	   = NULL;
 	req->dev.iov_offset        = 0;
diff --git a/src/mpid/ch3/src/ch3u_rma_ops.c b/src/mpid/ch3/src/ch3u_rma_ops.c
index 4b8145b..4347edf 100644
--- a/src/mpid/ch3/src/ch3u_rma_ops.c
+++ b/src/mpid/ch3/src/ch3u_rma_ops.c
@@ -32,6 +32,25 @@ cvars:
           starts to poke progress engine when number of posted
           operations reaches that value.
 
+    - name        : MPIR_CVAR_CH3_RMA_OP_PIGGYBACK_LOCK_DATA_SIZE
+      category    : CH3
+      type        : int
+      default     : 65536
+      class       : none
+      verbosity   : MPI_T_VERBOSITY_USER_BASIC
+      scope       : MPI_T_SCOPE_ALL_EQ
+      description : >-
+          Specify the threshold of data size of a RMA operation
+          which can be piggybacked with a LOCK message. It is
+          always a positive value and should not be smaller
+          than MPIDI_RMA_IMMED_BYTES.
+          If user sets it as a small value, for middle and large
+          data size, we will lose performance because of always
+          waiting for round-trip of LOCK synchronization; if
+          user sets it as a large value, we need to consume
+          more memory on target side to buffer this lock request
+          when lock is not satisfied.
+
 === END_MPI_T_CVAR_INFO_BLOCK ===
 */
 
@@ -174,12 +193,11 @@ int MPIDI_CH3I_Put(const void *origin_addr, int origin_count, MPI_Datatype
                 /* copy data from origin buffer to immed area in packet header */
                 mpi_errno = immed_copy(src, dest, put_pkt->immed_len);
                 if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-                /* If all data is in pkt header, mark this op as a candidate
-                   for piggybacking LOCK. */
-                if (put_pkt->immed_len == len)
-                    new_ptr->piggyback_lock_candidate = 1;
             }
+
+            if (len <= MPIR_MAX(MPIDI_RMA_IMMED_BYTES,
+                                MPIR_CVAR_CH3_RMA_OP_PIGGYBACK_LOCK_DATA_SIZE))
+                new_ptr->piggyback_lock_candidate = 1;
         }
 
         MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
@@ -521,12 +539,11 @@ int MPIDI_CH3I_Accumulate(const void *origin_addr, int origin_count, MPI_Datatyp
                 /* copy data from origin buffer to immed area in packet header */
                 mpi_errno = immed_copy(src, dest, accum_pkt->immed_len);
                 if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-                /* If all data is in pkt header, mark this op as
-                   a candidate for piggybacking LOCK. */
-                if (accum_pkt->immed_len == len)
-                    new_ptr->piggyback_lock_candidate = 1;
             }
+
+            if (len <= MPIR_MAX(MPIDI_RMA_IMMED_BYTES,
+                                MPIR_CVAR_CH3_RMA_OP_PIGGYBACK_LOCK_DATA_SIZE))
+                new_ptr->piggyback_lock_candidate = 1;
         }
 
         MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
@@ -741,12 +758,11 @@ int MPIDI_CH3I_Get_accumulate(const void *origin_addr, int origin_count,
                     /* copy data from origin buffer to immed area in packet header */
                     mpi_errno = immed_copy(src, dest, get_accum_pkt->immed_len);
                     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-                    /* If all data is in pkt header, mark this op as a candidate
-                       for piggybacking LOCK. */
-                    if (get_accum_pkt->immed_len == len)
-                        new_ptr->piggyback_lock_candidate = 1;
                 }
+
+                if (len <= MPIR_MAX(MPIDI_RMA_IMMED_BYTES,
+                                    MPIR_CVAR_CH3_RMA_OP_PIGGYBACK_LOCK_DATA_SIZE))
+                    new_ptr->piggyback_lock_candidate = 1;
             }
         }
 
diff --git a/src/mpid/ch3/src/ch3u_rma_pkthandler.c b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
index b95eacd..63cb77c 100644
--- a/src/mpid/ch3/src/ch3u_rma_pkthandler.c
+++ b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
@@ -205,18 +205,19 @@ int MPIDI_CH3_PktHandler_Put(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIU_Assert(put_pkt->target_win_handle != MPI_WIN_NULL);
     MPID_Win_get_ptr(put_pkt->target_win_handle, win_ptr);
 
-    data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
-    data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
-
-    mpi_errno = check_piggyback_lock(win_ptr, pkt, &acquire_lock_fail);
+    mpi_errno = check_piggyback_lock(win_ptr, vc, pkt, buflen,
+                                     &acquire_lock_fail, &req);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
     if (acquire_lock_fail) {
-        (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
-        (*rreqp) = NULL;
+        (*rreqp) = req;
         goto fn_exit;
     }
 
+    /* get start location of data and length of data */
+    data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
+    data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
+
     req = MPID_Request_create();
     MPIU_Object_set_ref(req, 1);
 
@@ -368,23 +369,24 @@ int MPIDI_CH3_PktHandler_Get(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIU_Assert(get_pkt->target_win_handle != MPI_WIN_NULL);
     MPID_Win_get_ptr(get_pkt->target_win_handle, win_ptr);
 
-    mpi_errno = check_piggyback_lock(win_ptr, pkt, &acquire_lock_fail);
+    mpi_errno = check_piggyback_lock(win_ptr, vc, pkt,
+                                     buflen, &acquire_lock_fail, &req);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
     if (acquire_lock_fail) {
-        (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
-        (*rreqp) = NULL;
+        (*rreqp) = req;
         goto fn_exit;
     }
 
-    data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
-    data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
-
     req = MPID_Request_create();
     req->dev.target_win_handle = get_pkt->target_win_handle;
     req->dev.source_win_handle = get_pkt->source_win_handle;
     req->dev.flags = get_pkt->flags;
 
+    /* get start location of data and length of data */
+    data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
+    data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
+
     /* here we increment the Active Target counter to guarantee the GET-like
        operation are completed when counter reaches zero. */
     win_ptr->at_completion_counter++;
@@ -402,7 +404,7 @@ int MPIDI_CH3_PktHandler_Get(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         MPIDI_Pkt_init(get_resp_pkt, MPIDI_CH3_PKT_GET_RESP);
         get_resp_pkt->request_handle = get_pkt->request_handle;
         get_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
-        if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED)
+        if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
             get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
         if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
             get_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
@@ -524,18 +526,15 @@ int MPIDI_CH3_PktHandler_Accumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIU_Assert(accum_pkt->target_win_handle != MPI_WIN_NULL);
     MPID_Win_get_ptr(accum_pkt->target_win_handle, win_ptr);
 
-    mpi_errno = check_piggyback_lock(win_ptr, pkt, &acquire_lock_fail);
+    mpi_errno = check_piggyback_lock(win_ptr, vc, pkt, buflen,
+                                     &acquire_lock_fail, &req);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
     if (acquire_lock_fail) {
-        (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
-        (*rreqp) = NULL;
+        (*rreqp) = req;
         goto fn_exit;
     }
 
-    data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
-    data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
-
     req = MPID_Request_create();
     MPIU_Object_set_ref(req, 1);
     *rreqp = req;
@@ -550,6 +549,10 @@ int MPIDI_CH3_PktHandler_Accumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     req->dev.resp_request_handle = MPI_REQUEST_NULL;
     req->dev.OnFinal = MPIDI_CH3_ReqHandler_AccumRecvComplete;
 
+    /* get start location of data and length of data */
+    data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
+    data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
+
     if (MPIR_DATATYPE_IS_PREDEFINED(accum_pkt->datatype)) {
         MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_ACCUM_RESP);
         req->dev.datatype = accum_pkt->datatype;
@@ -697,18 +700,16 @@ int MPIDI_CH3_PktHandler_GetAccumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIU_Assert(get_accum_pkt->target_win_handle != MPI_WIN_NULL);
     MPID_Win_get_ptr(get_accum_pkt->target_win_handle, win_ptr);
 
-    mpi_errno = check_piggyback_lock(win_ptr, pkt, &acquire_lock_fail);
+    mpi_errno = check_piggyback_lock(win_ptr, vc, pkt,
+                                     buflen,
+                                     &acquire_lock_fail, &req);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
     if (acquire_lock_fail) {
-        (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
-        (*rreqp) = NULL;
+        (*rreqp) = req;
         goto fn_exit;
     }
 
-    data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
-    data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
-
     req = MPID_Request_create();
     MPIU_Object_set_ref(req, 1);
     *rreqp = req;
@@ -723,6 +724,10 @@ int MPIDI_CH3_PktHandler_GetAccumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     req->dev.resp_request_handle = get_accum_pkt->request_handle;
     req->dev.OnFinal = MPIDI_CH3_ReqHandler_GaccumRecvComplete;
 
+    /* get start location of data and length of data */
+    data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
+    data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
+
     if (MPIR_DATATYPE_IS_PREDEFINED(get_accum_pkt->datatype)) {
         MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_GET_ACCUM_RESP);
         req->dev.datatype = get_accum_pkt->datatype;
@@ -857,6 +862,7 @@ int MPIDI_CH3_PktHandler_CAS(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIDI_CH3_Pkt_cas_t *cas_pkt = &pkt->cas;
     MPID_Win *win_ptr;
     MPID_Request *req;
+    MPID_Request *rreq = NULL;
     MPI_Aint len;
     int acquire_lock_fail = 0;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_CAS);
@@ -870,23 +876,28 @@ int MPIDI_CH3_PktHandler_CAS(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIU_Assert(cas_pkt->target_win_handle != MPI_WIN_NULL);
     MPID_Win_get_ptr(cas_pkt->target_win_handle, win_ptr);
 
+    mpi_errno = check_piggyback_lock(win_ptr, vc, pkt, buflen,
+                                     &acquire_lock_fail, &rreq);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    MPIU_Assert(rreq == NULL); /* CAS should not have request because all data
+                                  can fit in packet header */
+
+    if (acquire_lock_fail) {
+        (*rreqp) = rreq;
+        goto fn_exit;
+    }
+
     /* return the number of bytes processed in this function */
     /* data_len == 0 (all within packet) */
     *buflen = sizeof(MPIDI_CH3_Pkt_t);
     *rreqp = NULL;
 
-    mpi_errno = check_piggyback_lock(win_ptr, pkt, &acquire_lock_fail);
-    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-    if (acquire_lock_fail)
-        goto fn_exit;
-
     MPIDI_Pkt_init(cas_resp_pkt, MPIDI_CH3_PKT_CAS_RESP);
     cas_resp_pkt->request_handle = cas_pkt->request_handle;
     cas_resp_pkt->source_win_handle = cas_pkt->source_win_handle;
     cas_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
     cas_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
-    if (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED)
+    if (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         cas_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
     if (cas_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
         cas_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
@@ -1018,6 +1029,7 @@ int MPIDI_CH3_PktHandler_FOP(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIDI_CH3_Pkt_t upkt;
     MPIDI_CH3_Pkt_fop_resp_t *fop_resp_pkt = &upkt.fop_resp;
     MPID_Request *resp_req = NULL;
+    MPID_Request *rreq = NULL;
     int acquire_lock_fail = 0;
     MPID_Win *win_ptr = NULL;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOP);
@@ -1030,22 +1042,26 @@ int MPIDI_CH3_PktHandler_FOP(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
     MPID_Win_get_ptr(fop_pkt->target_win_handle, win_ptr);
 
-    (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
-    (*rreqp) = NULL;
-
-    mpi_errno = check_piggyback_lock(win_ptr, pkt, &acquire_lock_fail);
+    mpi_errno = check_piggyback_lock(win_ptr, vc, pkt, buflen,
+                                     &acquire_lock_fail, &rreq);
     if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
+    MPIU_Assert(rreq == NULL); /* FOP should not have request because all data
+                                  can fit in packet header */
     if (acquire_lock_fail) {
+        (*rreqp) = rreq;
         goto fn_exit;
     }
 
+    (*buflen) = sizeof(MPIDI_CH3_Pkt_t);
+    (*rreqp) = NULL;
+
     MPIDI_Pkt_init(fop_resp_pkt, MPIDI_CH3_PKT_FOP_RESP);
     fop_resp_pkt->request_handle = fop_pkt->request_handle;
     fop_resp_pkt->source_win_handle = fop_pkt->source_win_handle;
     fop_resp_pkt->target_rank = win_ptr->comm_ptr->rank;
     fop_resp_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
-    if (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED)
+    if (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK)
         fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
     if (fop_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
         fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
@@ -1271,8 +1287,10 @@ int MPIDI_CH3_PktHandler_Lock(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     }
 
     else {
-        mpi_errno = enqueue_lock_origin(win_ptr, pkt);
+        MPID_Request *req = NULL;
+        mpi_errno = enqueue_lock_origin(win_ptr, vc, pkt, buflen, &req);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+        MPIU_Assert(req == NULL);
     }
 
     *rreqp = NULL;

http://git.mpich.org/mpich.git/commitdiff/c73451c01fc27d8a4d2198ef7042d1df713893a1

commit c73451c01fc27d8a4d2198ef7042d1df713893a1
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Thu Nov 20 20:58:35 2014 -0600

    Bug-fix: in UNLOCK handler, send FLUSH_ACK first, then release lock.
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index 23b58f3..de73290 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -506,11 +506,11 @@ static inline int finish_op_on_target(MPID_Win *win_ptr, MPIDI_VC_t *vc,
                 MPIDI_CH3_Progress_signal_completion();
         }
         if (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK) {
-            mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
-            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
             mpi_errno = MPIDI_CH3I_Send_flush_ack_pkt(vc, win_ptr, flags,
                                                       source_win_handle);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
             MPIDI_CH3_Progress_signal_completion();
         }
     }

http://git.mpich.org/mpich.git/commitdiff/e12376fd3763874907a177792b8fd81bb23aa9d1

commit e12376fd3763874907a177792b8fd81bb23aa9d1
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Thu Dec 11 14:36:16 2014 -0600

    Bug-fix: handle dest==MPI_PROC_NULL in Win_flush/flush_local
    
    No reviewer.

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index e8ad62c..123ae39 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -1093,6 +1093,9 @@ int MPIDI_Win_flush(int dest, MPID_Win *win_ptr)
         OPA_read_write_barrier();
     }
 
+    if (dest == MPI_PROC_NULL)
+        goto finish_flush;
+
     /* When the process tries to acquire the lock on itself, it does not
        go through the progress engine. Therefore, it is possible that
        one process always grants the lock to itself but never process
@@ -1186,6 +1189,9 @@ int MPIDI_Win_flush_local(int dest, MPID_Win * win_ptr)
                         win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_GRANTED,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
 
+    if (dest == MPI_PROC_NULL)
+        goto finish_flush_local;
+
     /* When the process tries to acquire the lock on itself, it does not
        go through the progress engine. Therefore, it is possible that
        one process always grants the lock to itself but never process

http://git.mpich.org/mpich.git/commitdiff/e92b774663d92158ddcd6514033fb930a43a9bf9

commit e92b774663d92158ddcd6514033fb930a43a9bf9
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Thu Dec 11 14:32:04 2014 -0600

    Bug-fix: check win_ptr->active_req_cnt in RMA sync calls
    
    No reviewer.

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 5f37408..e8ad62c 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -747,7 +747,6 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
     MPIU_Free(win_ptr->start_ranks_in_win_grp);
     win_ptr->start_ranks_in_win_grp = NULL;
 
-    MPIU_Assert(win_ptr->active_req_cnt == 0);
     MPIU_Assert(win_ptr->start_req == NULL);
 
     win_ptr->states.access_state = MPIDI_RMA_NONE;
@@ -756,6 +755,7 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
     /* ENDING synchronization: correctly decrement the following counter. */
     win_ptr->accumulated_ops_cnt = 0;
 
+    MPIU_Assert(win_ptr->active_req_cnt == 0);
 
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_COMPLETE);
@@ -1038,7 +1038,6 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
     } while (!remote_completed);
 
  finish_unlock:
-    MPIU_Assert(win_ptr->active_req_cnt == 0);
     if (target != NULL) {
         /* ENDING synchronization: correctly decrement the following counter. */
         win_ptr->accumulated_ops_cnt -= target->accumulated_ops_cnt;
@@ -1329,6 +1328,7 @@ int MPIDI_Win_lock_all(int assert, MPID_Win * win_ptr)
     /* BEGINNING synchronization: the following counter should be zero. */
     MPIU_Assert(win_ptr->accumulated_ops_cnt == 0);
 
+    MPIU_Assert(win_ptr->active_req_cnt == 0);
 
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_LOCK_ALL);
@@ -1467,7 +1467,6 @@ int MPIDI_Win_unlock_all(MPID_Win * win_ptr)
     MPIU_Assert(win_ptr->non_empty_slots == 0);
 
     win_ptr->lock_all_assert = 0;
-    MPIU_Assert(win_ptr->active_req_cnt == 0);
 
     win_ptr->states.access_state = MPIDI_RMA_NONE;
     num_passive_win--;
@@ -1477,6 +1476,7 @@ int MPIDI_Win_unlock_all(MPID_Win * win_ptr)
     /* ENDING synchronization: correctly decrement the following counter. */
     win_ptr->accumulated_ops_cnt = 0;
 
+    MPIU_Assert(win_ptr->active_req_cnt == 0);
 
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_UNLOCK_ALL);
@@ -1562,6 +1562,8 @@ int MPIDI_Win_flush_all(MPID_Win * win_ptr)
     /* ENDING synchronization: correctly decrement the following counter. */
     win_ptr->accumulated_ops_cnt = 0;
 
+    MPIU_Assert(win_ptr->active_req_cnt == 0);
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPIDI_STATE_MPIDI_WIN_FLUSH_ALL);
     return mpi_errno;

http://git.mpich.org/mpich.git/commitdiff/7b1a5e2dfd6985e21c0f79c329eeb3d92303c461

commit 7b1a5e2dfd6985e21c0f79c329eeb3d92303c461
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Thu Dec 11 14:27:44 2014 -0600

    Bug-fix: correctly modify win_ptr->accumulated_ops_cnt
    
    accumulated_ops_cnt is used to track no. of accumulated
    posted RMA operations between two synchronization calls,
    so that we can decide when to poke progress engine based
    on the current value of this counter.
    
    Here we initialize it to zero in the BEGINNING synchronization
    calls (Win_fence, Win_start, first Win_lock, Win_lock_all),
    and correctly decrement it in the ENDING synchronization calls
    (Win_fence, Win_complete, Win_unlock, Win_unlock_all,
    Win_flush, Win_flush_local, Win_flush_all, Win_flush_local_all).
    We also use a per-target counter to track single target case.
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpid_rma_oplist.h b/src/mpid/ch3/include/mpid_rma_oplist.h
index e4d4a2e..b0f3ec2 100644
--- a/src/mpid/ch3/include/mpid_rma_oplist.h
+++ b/src/mpid/ch3/include/mpid_rma_oplist.h
@@ -116,6 +116,7 @@ static inline MPIDI_RMA_Target_t *MPIDI_CH3I_Win_target_alloc(MPID_Win * win_ptr
     e->lock_type = MPID_LOCK_NONE;
     e->lock_mode = 0;
     e->outstanding_lock = 0;
+    e->accumulated_ops_cnt = 0;
     e->disable_flush_local = 0;
     e->win_complete_flag = 0;
     e->put_acc_issued = 0;
@@ -275,6 +276,10 @@ static inline int MPIDI_CH3I_Win_enqueue_op(MPID_Win * win_ptr,
     if (target->next_op_to_issue == NULL)
         target->next_op_to_issue = op;
 
+    /* Increment the counter for accumulated posted operations */
+    target->accumulated_ops_cnt++;
+    win_ptr->accumulated_ops_cnt++;
+
  fn_exit:
     return mpi_errno;
  fn_fail:
diff --git a/src/mpid/ch3/include/mpid_rma_types.h b/src/mpid/ch3/include/mpid_rma_types.h
index d182799..8037cc4 100644
--- a/src/mpid/ch3/include/mpid_rma_types.h
+++ b/src/mpid/ch3/include/mpid_rma_types.h
@@ -86,6 +86,7 @@ typedef struct MPIDI_RMA_Target {
     int lock_type; /* NONE, SHARED, EXCLUSIVE */
     int lock_mode;              /* e.g., MODE_NO_CHECK */
     int outstanding_lock;
+    int accumulated_ops_cnt;
     int disable_flush_local;
     int win_complete_flag;
     int put_acc_issued; /* indicate if PUT/ACC is issued in this epoch
diff --git a/src/mpid/ch3/include/mpidpre.h b/src/mpid/ch3/include/mpidpre.h
index 9e8b9f1..3d93b61 100644
--- a/src/mpid/ch3/include/mpidpre.h
+++ b/src/mpid/ch3/include/mpidpre.h
@@ -335,10 +335,9 @@ extern MPIDI_RMA_Pkt_orderings_t *MPIDI_RMA_Pkt_orderings;
         enum MPIDI_RMA_states exposure_state;                            \
     } states;                                                            \
     int non_empty_slots;                                                 \
-    int posted_ops_cnt; /* keep track of number of posted RMA operations \
-                           in current epoch (accumulated value, not      \
-                           current value) to control when to poke        \
-                           progress engine in RMA operation routines. */ \
+    int accumulated_ops_cnt; /* keep track of number of accumulated posted RMA operations \
+                            in current epoch to control when to poke     \
+                            progress engine in RMA operation routines. */\
     int active_req_cnt; /* keep track of number of active requests in    \
                            current epoch, i.e., number of issued but     \
                            incomplete RMA operations. */                 \
diff --git a/src/mpid/ch3/src/ch3u_rma_ops.c b/src/mpid/ch3/src/ch3u_rma_ops.c
index 8ec172d..4b8145b 100644
--- a/src/mpid/ch3/src/ch3u_rma_ops.c
+++ b/src/mpid/ch3/src/ch3u_rma_ops.c
@@ -187,9 +187,8 @@ int MPIDI_CH3I_Put(const void *origin_addr, int origin_count, MPI_Datatype
         mpi_errno = MPIDI_CH3I_RMA_Make_progress_target(win_ptr, target_rank, &made_progress);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-        win_ptr->posted_ops_cnt++;
         if (MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS >= 0 &&
-            win_ptr->posted_ops_cnt >= MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS) {
+            win_ptr->accumulated_ops_cnt >= MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS) {
             mpi_errno = poke_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
@@ -348,9 +347,8 @@ int MPIDI_CH3I_Get(void *origin_addr, int origin_count, MPI_Datatype
         mpi_errno = MPIDI_CH3I_RMA_Make_progress_target(win_ptr, target_rank, &made_progress);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-        win_ptr->posted_ops_cnt++;
         if (MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS >= 0 &&
-            win_ptr->posted_ops_cnt >= MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS) {
+            win_ptr->accumulated_ops_cnt >= MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS) {
             mpi_errno = poke_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
@@ -536,9 +534,8 @@ int MPIDI_CH3I_Accumulate(const void *origin_addr, int origin_count, MPI_Datatyp
         mpi_errno = MPIDI_CH3I_RMA_Make_progress_target(win_ptr, target_rank, &made_progress);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-        win_ptr->posted_ops_cnt++;
         if (MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS >= 0 &&
-            win_ptr->posted_ops_cnt >= MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS) {
+            win_ptr->accumulated_ops_cnt >= MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS) {
             mpi_errno = poke_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
@@ -769,9 +766,8 @@ int MPIDI_CH3I_Get_accumulate(const void *origin_addr, int origin_count,
         mpi_errno = MPIDI_CH3I_RMA_Make_progress_target(win_ptr, target_rank, &made_progress);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-        win_ptr->posted_ops_cnt++;
         if (MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS >= 0 &&
-            win_ptr->posted_ops_cnt >= MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS) {
+            win_ptr->accumulated_ops_cnt >= MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS) {
             mpi_errno = poke_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
@@ -1009,9 +1005,8 @@ int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
         mpi_errno = MPIDI_CH3I_RMA_Make_progress_target(win_ptr, target_rank, &made_progress);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-        win_ptr->posted_ops_cnt++;
         if (MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS >= 0 &&
-            win_ptr->posted_ops_cnt >= MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS) {
+            win_ptr->accumulated_ops_cnt >= MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS) {
             mpi_errno = poke_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
@@ -1175,9 +1170,8 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
         mpi_errno = MPIDI_CH3I_RMA_Make_progress_target(win_ptr, target_rank, &made_progress);
         if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-        win_ptr->posted_ops_cnt++;
         if (MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS >= 0 &&
-            win_ptr->posted_ops_cnt >= MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS) {
+            win_ptr->accumulated_ops_cnt >= MPIR_CVAR_CH3_RMA_OP_POKING_PROGRESS) {
             mpi_errno = poke_progress_engine();
             if (mpi_errno != MPI_SUCCESS)
                 MPIU_ERR_POP(mpi_errno);
diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index cf2e5fa..5f37408 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -307,11 +307,9 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
                         win_ptr->states.exposure_state != MPIDI_RMA_NONE,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
 
-    win_ptr->posted_ops_cnt = 0;
-
     if (assert & MPI_MODE_NOPRECEDE) {
         if (assert & MPI_MODE_NOSUCCEED) {
-            goto fn_exit;
+            goto finish_fence;
         }
         else {
             /* It is possible that there is a IBARRIER in MPI_WIN_FENCE with
@@ -345,7 +343,7 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
             win_ptr->states.access_state = MPIDI_RMA_FENCE_ISSUED;
             num_active_issued_win++;
 
-            goto fn_exit;
+            goto finish_fence;
         }
     }
 
@@ -416,7 +414,16 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
         win_ptr->states.access_state = MPIDI_RMA_FENCE_GRANTED;
     }
 
-    /* There should be no active requests. */
+ finish_fence:
+    if (assert & MPI_MODE_NOPRECEDE) {
+        /* BEGINNING synchronization: the following counter should be zero. */
+        MPIU_Assert(win_ptr->accumulated_ops_cnt == 0);
+    }
+    else {
+        /* ENDING synchronization: correctly decrement the following counter. */
+        win_ptr->accumulated_ops_cnt = 0;
+    }
+
     MPIU_Assert(win_ptr->active_req_cnt == 0);
 
   fn_exit:
@@ -631,7 +638,10 @@ int MPIDI_Win_start(MPID_Group * group_ptr, int assert, MPID_Win * win_ptr)
     win_ptr->states.access_state = MPIDI_RMA_PSCW_ISSUED;
     num_active_issued_win++;
 
-    MPIU_Assert(win_ptr->posted_ops_cnt == 0);
+ finish_start:
+    /* BEGINNING synchronization: the following counter should be zero. */
+    MPIU_Assert(win_ptr->accumulated_ops_cnt == 0);
+
     MPIU_Assert(win_ptr->active_req_cnt == 0);
 
  fn_exit:
@@ -737,12 +747,16 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
     MPIU_Free(win_ptr->start_ranks_in_win_grp);
     win_ptr->start_ranks_in_win_grp = NULL;
 
-    win_ptr->posted_ops_cnt = 0;
     MPIU_Assert(win_ptr->active_req_cnt == 0);
     MPIU_Assert(win_ptr->start_req == NULL);
 
     win_ptr->states.access_state = MPIDI_RMA_NONE;
 
+ finish_complete:
+    /* ENDING synchronization: correctly decrement the following counter. */
+    win_ptr->accumulated_ops_cnt = 0;
+
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_COMPLETE);
     return mpi_errno;
@@ -888,7 +902,7 @@ int MPIDI_Win_lock(int lock_type, int dest, int assert, MPID_Win * win_ptr)
     win_ptr->lock_epoch_count++;
 
     if (dest == MPI_PROC_NULL)
-        goto fn_exit;
+        goto finish_lock;
 
     if (win_ptr->shm_allocated == TRUE) {
         MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
@@ -924,6 +938,12 @@ int MPIDI_Win_lock(int lock_type, int dest, int assert, MPID_Win * win_ptr)
         }
     }
 
+ finish_lock:
+    if (win_ptr->lock_epoch_count == 1) {
+        /* BEGINNING synchronization: the following counter should be zero. */
+        MPIU_Assert(win_ptr->accumulated_ops_cnt == 0);
+    }
+
     /* Ensure ordering of load/store operations. */
     if (win_ptr->shm_allocated == TRUE) {
         OPA_read_write_barrier();
@@ -1017,13 +1037,19 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
         }
     } while (!remote_completed);
 
-    /* Cleanup the target. */
-    mpi_errno = MPIDI_CH3I_RMA_Cleanup_single_target(win_ptr, target);
-    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
  finish_unlock:
-    win_ptr->posted_ops_cnt = 0;
     MPIU_Assert(win_ptr->active_req_cnt == 0);
+    if (target != NULL) {
+        /* ENDING synchronization: correctly decrement the following counter. */
+        win_ptr->accumulated_ops_cnt -= target->accumulated_ops_cnt;
+        if (win_ptr->lock_epoch_count == 0) {
+            MPIU_Assert(win_ptr->accumulated_ops_cnt == 0);
+        }
+
+        /* Cleanup the target. */
+        mpi_errno = MPIDI_CH3I_RMA_Cleanup_single_target(win_ptr, target);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    }
 
     win_ptr->lock_epoch_count--;
     if (win_ptr->lock_epoch_count == 0) {
@@ -1079,23 +1105,23 @@ int MPIDI_Win_flush(int dest, MPID_Win *win_ptr)
     if (mpi_errno != MPI_SUCCESS)
         MPIU_ERR_POP(mpi_errno);
 
+    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, dest, &target);
+    if (mpi_errno != MPI_SUCCESS)
+        MPIU_ERR_POP(mpi_errno);
+    if (target == NULL)
+        goto finish_flush;
+
     if (rank == dest)
-        goto fn_exit;
+        goto finish_flush;
 
     if (win_ptr->shm_allocated) {
         MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
         MPIDI_Comm_get_vc(win_ptr->comm_ptr, dest, &target_vc);
         MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
         if (orig_vc->node_id == target_vc->node_id)
-            goto fn_exit;
+            goto finish_flush;
     }
 
-    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, dest, &target);
-    if (mpi_errno != MPI_SUCCESS)
-        MPIU_ERR_POP(mpi_errno);
-    if (target == NULL)
-        goto fn_exit;
-
     /* Set sync_flag in sync struct. */
     if (target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH) {
         target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH;
@@ -1123,6 +1149,13 @@ int MPIDI_Win_flush(int dest, MPID_Win *win_ptr)
         }
     } while (!remote_completed);
 
+ finish_flush:
+    if (target != NULL) {
+        /* ENDING synchronization: correctly decrement the following counters. */
+        win_ptr->accumulated_ops_cnt -= target->accumulated_ops_cnt;
+        target->accumulated_ops_cnt = 0;
+    }
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_FLUSH);
     return mpi_errno;
@@ -1165,23 +1198,23 @@ int MPIDI_Win_flush_local(int dest, MPID_Win * win_ptr)
     if (mpi_errno != MPI_SUCCESS)
         MPIU_ERR_POP(mpi_errno);
 
+    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, dest, &target);
+    if (mpi_errno != MPI_SUCCESS)
+        MPIU_ERR_POP(mpi_errno);
+    if (target == NULL)
+        goto finish_flush_local;
+
     if (rank == dest)
-        goto fn_exit;
+        goto finish_flush_local;
 
     if (win_ptr->shm_allocated) {
         MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
         MPIDI_Comm_get_vc(win_ptr->comm_ptr, dest, &target_vc);
         MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
         if (orig_vc->node_id == target_vc->node_id)
-            goto fn_exit;
+            goto finish_flush_local;
     }
 
-    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, dest, &target);
-    if (mpi_errno != MPI_SUCCESS)
-        MPIU_ERR_POP(mpi_errno);
-    if (target == NULL)
-        goto fn_exit;
-
     /* Set sync_flag in sync struct. */
     if (target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH_LOCAL)
         target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH_LOCAL;
@@ -1206,6 +1239,13 @@ int MPIDI_Win_flush_local(int dest, MPID_Win * win_ptr)
         }
     } while (!local_completed);
 
+ finish_flush_local:
+    if (target != NULL) {
+        /* ENDING synchronization: correctly decrement the following counters. */
+        win_ptr->accumulated_ops_cnt -= target->accumulated_ops_cnt;
+        target->accumulated_ops_cnt = 0;
+    }
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_FLUSH_LOCAL);
     return mpi_errno;
@@ -1285,6 +1325,11 @@ int MPIDI_Win_lock_all(int assert, MPID_Win * win_ptr)
         OPA_read_write_barrier();
     }
 
+ finish_lock_all:
+    /* BEGINNING synchronization: the following counter should be zero. */
+    MPIU_Assert(win_ptr->accumulated_ops_cnt == 0);
+
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_LOCK_ALL);
     return mpi_errno;
@@ -1422,13 +1467,17 @@ int MPIDI_Win_unlock_all(MPID_Win * win_ptr)
     MPIU_Assert(win_ptr->non_empty_slots == 0);
 
     win_ptr->lock_all_assert = 0;
-    win_ptr->posted_ops_cnt = 0;
     MPIU_Assert(win_ptr->active_req_cnt == 0);
 
     win_ptr->states.access_state = MPIDI_RMA_NONE;
     num_passive_win--;
     MPIU_Assert(num_passive_win >= 0);
 
+ finish_unlock_all:
+    /* ENDING synchronization: correctly decrement the following counter. */
+    win_ptr->accumulated_ops_cnt = 0;
+
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_UNLOCK_ALL);
     return mpi_errno;
@@ -1484,6 +1533,10 @@ int MPIDI_Win_flush_all(MPID_Win * win_ptr)
                 curr_target->sync.have_remote_incomplete_ops = 0;
                 curr_target->sync.outstanding_acks++;
             }
+
+            /* ENDING synchronization: correctly decrement the following counters. */
+            curr_target->accumulated_ops_cnt = 0;
+
             curr_target = curr_target->next;
         }
     }
@@ -1505,6 +1558,10 @@ int MPIDI_Win_flush_all(MPID_Win * win_ptr)
         }
     } while (!remote_completed);
 
+ finish_flush_all:
+    /* ENDING synchronization: correctly decrement the following counter. */
+    win_ptr->accumulated_ops_cnt = 0;
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPIDI_STATE_MPIDI_WIN_FLUSH_ALL);
     return mpi_errno;
@@ -1553,6 +1610,10 @@ int MPIDI_Win_flush_local_all(MPID_Win * win_ptr)
             if (curr_target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH_LOCAL) {
                 curr_target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH_LOCAL;
             }
+
+            /* ENDING synchronization: correctly decrement the following counters. */
+            curr_target->accumulated_ops_cnt = 0;
+
             curr_target = curr_target->next;
         }
     }
@@ -1574,6 +1635,10 @@ int MPIDI_Win_flush_local_all(MPID_Win * win_ptr)
         }
     } while (!local_completed);
 
+  finish_flush_local_all:
+    /* ENDING synchronization: correctly decrement the following counter. */
+    win_ptr->accumulated_ops_cnt = 0;
+
   fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_FLUSH_LOCAL_ALL);
     return mpi_errno;
diff --git a/src/mpid/ch3/src/mpid_rma.c b/src/mpid/ch3/src/mpid_rma.c
index f0a3ae4..0eb2621 100644
--- a/src/mpid/ch3/src/mpid_rma.c
+++ b/src/mpid/ch3/src/mpid_rma.c
@@ -337,7 +337,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
     (*win_ptr)->states.access_state = MPIDI_RMA_NONE;
     (*win_ptr)->states.exposure_state = MPIDI_RMA_NONE;
     (*win_ptr)->non_empty_slots = 0;
-    (*win_ptr)->posted_ops_cnt = 0;
+    (*win_ptr)->accumulated_ops_cnt = 0;
     (*win_ptr)->active_req_cnt = 0;
     (*win_ptr)->fence_sync_req = MPI_REQUEST_NULL;
     (*win_ptr)->start_req = NULL;

http://git.mpich.org/mpich.git/commitdiff/b155e7e0f8bc8f65420fda64146f03b949230d96

commit b155e7e0f8bc8f65420fda64146f03b949230d96
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Nov 19 15:36:01 2014 -0600

    Clean up unused attributes in RMA packet structs.
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index a0a668a..9d0840f 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -560,7 +560,6 @@ typedef struct MPIDI_CH3_Pkt_lock {
     MPI_Win target_win_handle;
     MPI_Win source_win_handle;
     int lock_type;
-    int target_rank;
     int origin_rank;
 } MPIDI_CH3_Pkt_lock_t;
 
@@ -569,18 +568,12 @@ typedef struct MPIDI_CH3_Pkt_unlock {
     MPIDI_CH3_Pkt_flags_t flags;
     MPI_Win target_win_handle;
     MPI_Win source_win_handle;
-    int lock_type;
-    int target_rank;
-    int origin_rank;
 } MPIDI_CH3_Pkt_unlock_t;
 
 typedef struct MPIDI_CH3_Pkt_flush {
     MPIDI_CH3_Pkt_type_t type;
     MPI_Win target_win_handle;
     MPI_Win source_win_handle;
-    int lock_type;
-    int target_rank;
-    int origin_rank;
 } MPIDI_CH3_Pkt_flush_t;
 
 typedef struct MPIDI_CH3_Pkt_decr_at_counter {

http://git.mpich.org/mpich.git/commitdiff/389aab1673b9e54b9172985f69931bf1d731f3d9

commit 389aab1673b9e54b9172985f69931bf1d731f3d9
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Mon Nov 17 14:56:51 2014 -0600

    Code-refactor: arrange RMA pkt structure.
    
    Arrange RMA packet definition and structures in
    src/mpid/ch3/include/mpidpkt.h in the following
    order:
    
    1. RMA operation packets: PUT, GET, ACC, GACC, CAS, FOP
    2. RMA operation response packets: GET_RESP, GACC_RESP, CAS_RESP, FOP_RESP
    3. RMA control packets: LOCK, UNLOCK, FLUSH, DECR_AT_COUNTER
    4. RMA control response packets: LOCK_ACK, FLUSH_ACK
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index 243c296..a0a668a 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -86,11 +86,11 @@ typedef enum {
     MPIDI_CH3_PKT_FOP_RESP,
     MPIDI_CH3_PKT_CAS_RESP,
     MPIDI_CH3_PKT_LOCK,
-    MPIDI_CH3_PKT_LOCK_GRANTED,
     MPIDI_CH3_PKT_UNLOCK,
     MPIDI_CH3_PKT_FLUSH,
-    MPIDI_CH3_PKT_FLUSH_ACK,
     MPIDI_CH3_PKT_DECR_AT_COUNTER,
+    MPIDI_CH3_PKT_LOCK_GRANTED,
+    MPIDI_CH3_PKT_FLUSH_ACK,
     /* RMA Packets end here */
     MPIDI_CH3_PKT_FLOW_CNTL_UPDATE,     /* FIXME: Unused */
     MPIDI_CH3_PKT_CLOSE,
@@ -377,213 +377,236 @@ MPIDI_CH3_PKT_DEFS
         }                                                               \
     }
 
+/* RMA packets start here */
+
+/********************************************************************************/
+/* RMA packet (from origin to target, including PUT, GET, ACC, GACC, CAS, FOP)  */
+/********************************************************************************/
 
 typedef struct MPIDI_CH3_Pkt_put {
     MPIDI_CH3_Pkt_type_t type;
     MPIDI_CH3_Pkt_flags_t flags;
+    MPI_Win target_win_handle;
+    MPI_Win source_win_handle;
+    /* Followings are to describe target data */
     void *addr;
     int count;
     MPI_Datatype datatype;
-    int dataloop_size;          /* for derived datatypes */
-    MPI_Win target_win_handle;  /* Used in the last RMA operation in each
-                                 * epoch for decrementing rma op counter in
-                                 * active target rma and for unlocking window
-                                 * in passive target rma. Otherwise set to NULL*/
-    MPI_Win source_win_handle;  /* Used in the last RMA operation in an
-                                 * epoch in the case of passive target rma
-                                 * with shared locks. Otherwise set to NULL*/
-    char data[MPIDI_RMA_IMMED_BYTES];
+    int dataloop_size;
+    /* Followings are to piggyback LOCK */
+    int lock_type;
+    int origin_rank;
+    /* Followings are to piggyback IMMED data */
     size_t immed_len;
-    int lock_type;      /* used when piggybacking LOCK message. */
-    int origin_rank;    /* used when piggybacking LOCK message. */
+    char data[MPIDI_RMA_IMMED_BYTES];
 } MPIDI_CH3_Pkt_put_t;
 
 typedef struct MPIDI_CH3_Pkt_get {
     MPIDI_CH3_Pkt_type_t type;
     MPIDI_CH3_Pkt_flags_t flags;
+    MPI_Win target_win_handle;
+    MPI_Win source_win_handle;
+    /* Followings are to describe target data */
     void *addr;
     int count;
     MPI_Datatype datatype;
-    int dataloop_size;          /* for derived datatypes */
+    int dataloop_size;
+    /* Following is to complete request at origin */
     MPI_Request request_handle;
-    MPI_Win target_win_handle;  /* Used in the last RMA operation in each
-                                 * epoch for decrementing rma op counter in
-                                 * active target rma and for unlocking window
-                                 * in passive target rma. Otherwise set to NULL*/
-    MPI_Win source_win_handle;  /* Used in the last RMA operation in an
-                                 * epoch in the case of passive target rma
-                                 * with shared locks. Otherwise set to NULL*/
-    int lock_type;   /* used when piggybacking LOCK message. */
-    int origin_rank; /* used when piggybacking LOCK message. */
+    /* Followings are to piggyback LOCK */
+    int lock_type;
+    int origin_rank;
 } MPIDI_CH3_Pkt_get_t;
 
-typedef struct MPIDI_CH3_Pkt_get_resp {
-    MPIDI_CH3_Pkt_type_t type;
-    MPI_Request request_handle;
-    /* followings are used to decrement ack_counter at origin */
-    int target_rank;
-    MPI_Win source_win_handle;
-    MPIDI_CH3_Pkt_flags_t flags;
-} MPIDI_CH3_Pkt_get_resp_t;
-
 typedef struct MPIDI_CH3_Pkt_accum {
     MPIDI_CH3_Pkt_type_t type;
     MPIDI_CH3_Pkt_flags_t flags;
+    MPI_Win target_win_handle;
+    MPI_Win source_win_handle;
+    /* Followings are to describe target data */
     void *addr;
     int count;
     MPI_Datatype datatype;
-    int dataloop_size;          /* for derived datatypes */
+    int dataloop_size;
+    /* Following is to specify ACC op */
     MPI_Op op;
-    MPI_Win target_win_handle;  /* Used in the last RMA operation in each
-                                 * epoch for decrementing rma op counter in
-                                 * active target rma and for unlocking window
-                                 * in passive target rma. Otherwise set to NULL*/
-    MPI_Win source_win_handle;  /* Used in the last RMA operation in an
-                                 * epoch in the case of passive target rma
-                                 * with shared locks. Otherwise set to NULL*/
-    char data[MPIDI_RMA_IMMED_BYTES];
+    /* Followings are to piggyback LOCK */
+    int lock_type;
+    int origin_rank;
+    /* Followings are to piggyback IMMED data */
     size_t immed_len;
-    int lock_type;    /* used when piggybacking LOCK message. */
-    int origin_rank;  /* used when piggybacking LOCK message. */
+    char data[MPIDI_RMA_IMMED_BYTES];
 } MPIDI_CH3_Pkt_accum_t;
 
 typedef struct MPIDI_CH3_Pkt_get_accum {
     MPIDI_CH3_Pkt_type_t type;
     MPIDI_CH3_Pkt_flags_t flags;
-    MPI_Request request_handle; /* For get_accumulate response */
+    MPI_Win target_win_handle;
+    MPI_Win source_win_handle;
+    /* Followings are to describe target data */
     void *addr;
     int count;
     MPI_Datatype datatype;
-    int dataloop_size;          /* for derived datatypes */
+    int dataloop_size;
+    /* Following is to describe ACC op */
     MPI_Op op;
-    MPI_Win target_win_handle;  /* Used in the last RMA operation in each
-                                 * epoch for decrementing rma op counter in
-                                 * active target rma and for unlocking window
-                                 * in passive target rma. Otherwise set to NULL*/
-    MPI_Win source_win_handle;  /* Used in the last RMA operation in an
-                                 * epoch in the case of passive target rma
-                                 * with shared locks. Otherwise set to NULL*/
-    char data[MPIDI_RMA_IMMED_BYTES];
+    /* Following is to complete request on origin */
+    MPI_Request request_handle;
+    /* Followings are to piggyback LOCK */
+    int lock_type;
+    int origin_rank;
+    /* Followings are to piggback IMMED data */
     size_t immed_len;
-    int lock_type;     /* used when piggybacking LOCK message. */
-    int origin_rank;   /* used when piggybacking LOCK message. */
+    char data[MPIDI_RMA_IMMED_BYTES];
 } MPIDI_CH3_Pkt_get_accum_t;
 
-typedef struct MPIDI_CH3_Pkt_get_accum_resp {
+typedef struct MPIDI_CH3_Pkt_fop {
     MPIDI_CH3_Pkt_type_t type;
-    MPI_Request request_handle;
-    /* followings are used to decrement ack_counter at origin */
-    int target_rank;
-    MPI_Win source_win_handle;
     MPIDI_CH3_Pkt_flags_t flags;
-} MPIDI_CH3_Pkt_get_accum_resp_t;
+    MPI_Win source_win_handle;
+    MPI_Win target_win_handle;
+    /* Followings are to describe target data */
+    void *addr;
+    MPI_Datatype datatype;
+    /* Following is to speicfy ACC op */
+    MPI_Op op;
+    /* Following is to complete request at origin */
+    MPI_Request request_handle;
+    /* Followings are to piggyback IMMED data */
+    int lock_type;
+    int origin_rank;
+    /* Followings are to piggyback IMMED data */
+    int immed_len;
+    char data[MPIDI_RMA_IMMED_BYTES];
+} MPIDI_CH3_Pkt_fop_t;
 
 typedef struct MPIDI_CH3_Pkt_cas {
     MPIDI_CH3_Pkt_type_t type;
     MPIDI_CH3_Pkt_flags_t flags;
-    MPI_Datatype datatype;
+    MPI_Win source_win_handle;
+    MPI_Win target_win_handle;
+    /* Followings are to describe target data */
     void *addr;
+    MPI_Datatype datatype;
+    /* Following is to complete request on origin */
     MPI_Request request_handle;
-    MPI_Win source_win_handle;
-    MPI_Win target_win_handle;  /* Used in the last RMA operation in each
-                                 * epoch for decrementing rma op counter in
-                                 * active target rma and for unlocking window
-                                 * in passive target rma. Otherwise set to NULL*/
+    /* Followings are to piggyback LOCK */
+    int lock_type;
+    int origin_rank;
+    /* Followings are to piggyback IMMED data */
     MPIDI_CH3_CAS_Immed_u origin_data;
     MPIDI_CH3_CAS_Immed_u compare_data;
-    int lock_type;     /* used when piggybacking LOCK message. */
-    int origin_rank;   /* used when piggybacking LOCK message. */
 } MPIDI_CH3_Pkt_cas_t;
 
-typedef struct MPIDI_CH3_Pkt_cas_resp {
+
+/*********************************************************************************/
+/* RMA response packet (from target to origin, including GET_RESP, GET_ACC_RESP, */
+/* CAS_RESP, FOP_RESP)                                                           */
+/*********************************************************************************/
+
+typedef struct MPIDI_CH3_Pkt_get_resp {
     MPIDI_CH3_Pkt_type_t type;
+    MPIDI_CH3_Pkt_flags_t flags;
+    /* Following is to complete request at origin */
     MPI_Request request_handle;
-    MPIDI_CH3_CAS_Immed_u data;
-    /* followings are used to decrement ack_counter at orign */
-    int target_rank;
+    /* TODO: we should add IMMED data here */
+    /* Followings are used to decrement ack_counter at origin */
     MPI_Win source_win_handle;
-    MPIDI_CH3_Pkt_flags_t flags;
-} MPIDI_CH3_Pkt_cas_resp_t;
+    int target_rank;
+} MPIDI_CH3_Pkt_get_resp_t;
 
-typedef struct MPIDI_CH3_Pkt_fop {
+typedef struct MPIDI_CH3_Pkt_get_accum_resp {
     MPIDI_CH3_Pkt_type_t type;
     MPIDI_CH3_Pkt_flags_t flags;
-    MPI_Datatype datatype;
-    void *addr;
-    MPI_Op op;
+    /* Following is to complete request at origin */
     MPI_Request request_handle;
+    /* TODO: we should add IMMED data here */
+    /* Followings are used to decrement ack_counter at origin */
     MPI_Win source_win_handle;
-    MPI_Win target_win_handle;  /* Used in the last RMA operation in each
-                                 * epoch for decrementing rma op counter in
-                                 * active target rma and for unlocking window
-                                 * in passive target rma. Otherwise set to NULL*/
-    char data[MPIDI_RMA_IMMED_BYTES];
-    int immed_len;
-    int lock_type;     /* used when piggybacking LOCK message. */
-    int origin_rank;   /* used when piggybacking LOCK message. */
-} MPIDI_CH3_Pkt_fop_t;
+    int target_rank;
+} MPIDI_CH3_Pkt_get_accum_resp_t;
 
 typedef struct MPIDI_CH3_Pkt_fop_resp {
     MPIDI_CH3_Pkt_type_t type;
+    MPIDI_CH3_Pkt_flags_t flags;
+    /* Following is to complete request at origin */
     MPI_Request request_handle;
-    char data[MPIDI_RMA_IMMED_BYTES];
-    int immed_len;
-    /* followings are used to decrement ack_counter at orign */
-    int target_rank;
+    /* Followings are used to decrement ack_counter at orign */
     MPI_Win source_win_handle;
-    MPIDI_CH3_Pkt_flags_t flags;
+    int target_rank;
+    /* Followings are to piggyback IMMED data */
+    int immed_len;
+    char data[MPIDI_RMA_IMMED_BYTES];
 } MPIDI_CH3_Pkt_fop_resp_t;
 
+typedef struct MPIDI_CH3_Pkt_cas_resp {
+    MPIDI_CH3_Pkt_type_t type;
+    MPIDI_CH3_Pkt_flags_t flags;
+    /* Following is to complete request at origin */
+    MPI_Request request_handle;
+    /* Followings are used to decrement ack_counter at orign */
+    MPI_Win source_win_handle;
+    int target_rank;
+    /* Following is to piggyback IMMED data */
+    MPIDI_CH3_CAS_Immed_u data;
+} MPIDI_CH3_Pkt_cas_resp_t;
+
+/*********************************************************************************/
+/* RMA control packet (from origin to target, including LOCK, UNLOCK, FLUSH)     */
+/*********************************************************************************/
+
 typedef struct MPIDI_CH3_Pkt_lock {
     MPIDI_CH3_Pkt_type_t type;
-    int lock_type;
     MPI_Win target_win_handle;
     MPI_Win source_win_handle;
-    int target_rank;            /* Used in unluck/flush response to look up the
-                                 * target state at the origin. */
+    int lock_type;
+    int target_rank;
     int origin_rank;
 } MPIDI_CH3_Pkt_lock_t;
 
 typedef struct MPIDI_CH3_Pkt_unlock {
     MPIDI_CH3_Pkt_type_t type;
-    int lock_type;
+    MPIDI_CH3_Pkt_flags_t flags;
     MPI_Win target_win_handle;
     MPI_Win source_win_handle;
-    int target_rank;            /* Used in unluck/flush response to look up the
-                                 * target state at the origin. */
+    int lock_type;
+    int target_rank;
     int origin_rank;
-    MPIDI_CH3_Pkt_flags_t flags;
 } MPIDI_CH3_Pkt_unlock_t;
 
 typedef struct MPIDI_CH3_Pkt_flush {
     MPIDI_CH3_Pkt_type_t type;
-    int lock_type;
     MPI_Win target_win_handle;
     MPI_Win source_win_handle;
-    int target_rank;            /* Used in unluck/flush response to look up the
-                                 * target state at the origin. */
+    int lock_type;
+    int target_rank;
     int origin_rank;
 } MPIDI_CH3_Pkt_flush_t;
 
+typedef struct MPIDI_CH3_Pkt_decr_at_counter {
+    MPIDI_CH3_Pkt_type_t type;
+    MPI_Win target_win_handle;
+} MPIDI_CH3_Pkt_decr_at_counter_t;
+
+/*********************************************************************************/
+/* RMA control response packet (from target to origin, including LOCK_GRANTED,   */
+/* FLUSH_ACK)                                                                    */
+/*********************************************************************************/
+
 typedef struct MPIDI_CH3_Pkt_lock_granted {
     MPIDI_CH3_Pkt_type_t type;
     MPI_Win source_win_handle;
-    int target_rank;            /* Used in flush_ack response to look up the
-                                 * target state at the origin. */
+    int target_rank;
 } MPIDI_CH3_Pkt_lock_granted_t;
 
 typedef struct MPIDI_CH3_Pkt_flush_ack {
     MPIDI_CH3_Pkt_type_t type;
-    MPI_Win source_win_handle;
-    int target_rank;            /* Used in flush_ack response to look up the
-                                 * target state at the origin. */
     MPIDI_CH3_Pkt_flags_t flags;
+    MPI_Win source_win_handle;
+    int target_rank;
 } MPIDI_CH3_Pkt_flush_ack_t;
 
-typedef struct MPIDI_CH3_Pkt_decr_at_counter {
-    MPIDI_CH3_Pkt_type_t type;
-    MPI_Win target_win_handle;
-} MPIDI_CH3_Pkt_decr_at_counter_t;
+/* RMA packets end here */
 
 typedef struct MPIDI_CH3_Pkt_close {
     MPIDI_CH3_Pkt_type_t type;
@@ -609,23 +632,25 @@ typedef union MPIDI_CH3_Pkt {
     MPIDI_CH3_Pkt_rndv_send_t rndv_send;
     MPIDI_CH3_Pkt_cancel_send_req_t cancel_send_req;
     MPIDI_CH3_Pkt_cancel_send_resp_t cancel_send_resp;
+    /* RMA packets start here */
     MPIDI_CH3_Pkt_put_t put;
     MPIDI_CH3_Pkt_get_t get;
-    MPIDI_CH3_Pkt_get_resp_t get_resp;
     MPIDI_CH3_Pkt_accum_t accum;
     MPIDI_CH3_Pkt_get_accum_t get_accum;
+    MPIDI_CH3_Pkt_fop_t fop;
+    MPIDI_CH3_Pkt_cas_t cas;
+    MPIDI_CH3_Pkt_get_resp_t get_resp;
+    MPIDI_CH3_Pkt_get_accum_resp_t get_accum_resp;
+    MPIDI_CH3_Pkt_fop_resp_t fop_resp;
+    MPIDI_CH3_Pkt_cas_resp_t cas_resp;
     MPIDI_CH3_Pkt_lock_t lock;
-    MPIDI_CH3_Pkt_lock_granted_t lock_granted;
     MPIDI_CH3_Pkt_unlock_t unlock;
     MPIDI_CH3_Pkt_flush_t flush;
-    MPIDI_CH3_Pkt_flush_ack_t flush_ack;
     MPIDI_CH3_Pkt_decr_at_counter_t decr_at_cnt;
+    MPIDI_CH3_Pkt_lock_granted_t lock_granted;
+    MPIDI_CH3_Pkt_flush_ack_t flush_ack;
+    /* RMA packets end here */
     MPIDI_CH3_Pkt_close_t close;
-    MPIDI_CH3_Pkt_cas_t cas;
-    MPIDI_CH3_Pkt_cas_resp_t cas_resp;
-    MPIDI_CH3_Pkt_fop_t fop;
-    MPIDI_CH3_Pkt_fop_resp_t fop_resp;
-    MPIDI_CH3_Pkt_get_accum_resp_t get_accum_resp;
     MPIDI_CH3_Pkt_revoke_t revoke;
 # if defined(MPIDI_CH3_PKT_DECL)
      MPIDI_CH3_PKT_DECL

http://git.mpich.org/mpich.git/commitdiff/a544067b0bf0396525200d22c323d7fb71a18175

commit a544067b0bf0396525200d22c323d7fb71a18175
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Mon Nov 17 10:25:05 2014 -0600

    Code-refactor: arrange RMA sync functions.
    
    Arrange RMA sync functions in src/mpid/ch3/src/ch3u_rma_sync.c
    in the following order:
    
    Win_fence
    Win_post
    Win_start
    Win_complete
    Win_wait
    Win_test
    Win_lock
    Win_unlock
    Win_flush
    Win_flush_local
    Win_lock_all
    Win_unlock_all
    Win_flush_all
    Win_flush_local_all
    Win_sync
    
    No reviewer.

diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index 4481c0e..23b58f3 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -538,6 +538,41 @@ static inline int finish_op_on_target(MPID_Win *win_ptr, MPIDI_VC_t *vc,
     goto fn_exit;
 }
 
+
+static inline int fill_ranks_in_win_grp(MPID_Win *win_ptr, MPID_Group *group_ptr,
+                                        int *ranks_in_win_grp)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int i, *ranks_in_grp;
+    MPID_Group *win_grp_ptr;
+    MPIU_CHKLMEM_DECL(1);
+    MPIDI_STATE_DECL(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
+
+    MPIDI_RMA_FUNC_ENTER(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
+
+    MPIU_CHKLMEM_MALLOC(ranks_in_grp, int *, group_ptr->size * sizeof(int),
+                        mpi_errno, "ranks_in_grp");
+    for (i = 0; i < group_ptr->size; i++) ranks_in_grp[i] = i;
+
+    mpi_errno = MPIR_Comm_group_impl(win_ptr->comm_ptr, &win_grp_ptr);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+    mpi_errno = MPIR_Group_translate_ranks_impl(group_ptr, group_ptr->size,
+                                                ranks_in_grp, win_grp_ptr, ranks_in_win_grp);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+    mpi_errno = MPIR_Group_free_impl(win_grp_ptr);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+  fn_exit:
+    MPIU_CHKLMEM_FREEALL();
+    MPIDI_RMA_FUNC_EXIT(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+
 static inline int wait_progress_engine(void)
 {
     int mpi_errno = MPI_SUCCESS;
diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 5ecf2cd..cf2e5fa 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -281,6 +281,11 @@ void MPIDI_CH3_RMA_Init_sync_pvars(void)
 #define SYNC_POST_TAG 100
 
 
+/********************************************************************************/
+/* Active Target synchronization (including WIN_FENCE, WIN_POST, WIN_START,     */
+/* WIN_COMPLETE, WIN_WAIT, WIN_TEST)                                            */
+/********************************************************************************/
+
 #undef FUNCNAME
 #define FUNCNAME MPIDI_Win_fence
 #undef FCNAME
@@ -424,39 +429,6 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
 }
 
 
-static int fill_ranks_in_win_grp(MPID_Win *win_ptr, MPID_Group *group_ptr, int *ranks_in_win_grp)
-{
-    int mpi_errno = MPI_SUCCESS;
-    int i, *ranks_in_grp;
-    MPID_Group *win_grp_ptr;
-    MPIU_CHKLMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
-
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
-
-    MPIU_CHKLMEM_MALLOC(ranks_in_grp, int *, group_ptr->size * sizeof(int),
-                        mpi_errno, "ranks_in_grp");
-    for (i = 0; i < group_ptr->size; i++) ranks_in_grp[i] = i;
-
-    mpi_errno = MPIR_Comm_group_impl(win_ptr->comm_ptr, &win_grp_ptr);
-    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-    mpi_errno = MPIR_Group_translate_ranks_impl(group_ptr, group_ptr->size,
-                                                ranks_in_grp, win_grp_ptr, ranks_in_win_grp);
-    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-    mpi_errno = MPIR_Group_free_impl(win_grp_ptr);
-    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-
 #undef FUNCNAME
 #define FUNCNAME MPIDI_Win_post
 #undef FCNAME
@@ -859,6 +831,12 @@ int MPIDI_Win_test(MPID_Win * win_ptr, int *flag)
 }
 
 
+/********************************************************************************/
+/* Passive Target synchronization (including WIN_LOCK, WIN_UNLOCK, WIN_FLUSH,   */
+/* WIN_FLUSH_LOCAL, WIN_LOCK_ALL, WIN_UNLOCK_ALL, WIN_FLUSH_ALL,                */
+/* WIN_FLUSH_LOCAL_ALL, WIN_SYNC)                                               */
+/********************************************************************************/
+
 #undef FUNCNAME
 #define FUNCNAME MPIDI_Win_lock
 #undef FCNAME
@@ -1065,82 +1043,6 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
 
 
 #undef FUNCNAME
-#define FUNCNAME MPIDI_Win_flush_all
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPIDI_Win_flush_all(MPID_Win * win_ptr)
-{
-    int i, made_progress = 0;
-    int local_completed = 0, remote_completed = 0;
-    MPIDI_RMA_Target_t *curr_target = NULL;
-    int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPIDI_STATE_MPIDI_WIN_FLUSH_ALL);
-
-    MPIDI_RMA_FUNC_ENTER(MPIDI_STATE_MPIDI_WIN_FLUSH_ALL);
-
-    MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
-                        win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
-                        win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_ISSUED &&
-                        win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_GRANTED,
-                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
-
-    /* Ensure ordering of load/store operations. */
-    if (win_ptr->shm_allocated == TRUE) {
-        OPA_read_write_barrier();
-    }
-
-    /* When the process tries to acquire the lock on itself, it does not
-       go through the progress engine. Therefore, it is possible that
-       one process always grants the lock to itself but never process
-       events coming from other processes. This may cause deadlock in
-       applications where the program execution on target process depends
-       on the happening of events from other processes. Here we poke
-       the progress engine once to avoid such issue.  */
-    mpi_errno = poke_progress_engine();
-    if (mpi_errno != MPI_SUCCESS)
-        MPIU_ERR_POP(mpi_errno);
-
-    /* Set sync_flag in sync struct. */
-    for (i = 0; i < win_ptr->num_slots; i++) {
-        curr_target = win_ptr->slots[i].target_list;
-        while (curr_target != NULL) {
-            if (curr_target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH) {
-                curr_target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH;
-                curr_target->sync.have_remote_incomplete_ops = 0;
-                curr_target->sync.outstanding_acks++;
-            }
-            curr_target = curr_target->next;
-        }
-    }
-
-    /* Issue out all operations. */
-    mpi_errno = MPIDI_CH3I_RMA_Make_progress_win(win_ptr, &made_progress);
-    if (mpi_errno != MPI_SUCCESS)
-        MPIU_ERR_POP(mpi_errno);
-
-    /* Wait for remote completion. */
-    do {
-        mpi_errno = MPIDI_CH3I_RMA_Cleanup_ops_win(win_ptr, &local_completed,
-                                                   &remote_completed);
-        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-        if (!remote_completed) {
-            mpi_errno = wait_progress_engine();
-            if (mpi_errno != MPI_SUCCESS)
-                MPIU_ERR_POP(mpi_errno);
-        }
-    } while (!remote_completed);
-
-  fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPIDI_STATE_MPIDI_WIN_FLUSH_ALL);
-    return mpi_errno;
-    /* --BEGIN ERROR HANDLING-- */
-  fn_fail:
-    goto fn_exit;
-    /* --END ERROR HANDLING-- */
-}
-
-
-#undef FUNCNAME
 #define FUNCNAME MPIDI_Win_flush
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
@@ -1315,75 +1217,6 @@ int MPIDI_Win_flush_local(int dest, MPID_Win * win_ptr)
 
 
 #undef FUNCNAME
-#define FUNCNAME MPIDI_Win_flush_local_all
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPIDI_Win_flush_local_all(MPID_Win * win_ptr)
-{
-    int i, made_progress = 0;
-    int local_completed = 0, remote_completed = 0;
-    MPIDI_RMA_Target_t *curr_target = NULL;
-    int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FLUSH_LOCAL_ALL);
-
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_FLUSH_LOCAL_ALL);
-
-    MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
-                        win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
-                        win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_ISSUED &&
-                        win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_GRANTED,
-                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
-
-    /* When the process tries to acquire the lock on itself, it does not
-       go through the progress engine. Therefore, it is possible that
-       one process always grants the lock to itself but never process
-       events coming from other processes. This may cause deadlock in
-       applications where the program execution on target process depends
-       on the happening of events from other processes. Here we poke
-       the progress engine once to avoid such issue.  */
-    mpi_errno = poke_progress_engine();
-    if (mpi_errno != MPI_SUCCESS)
-        MPIU_ERR_POP(mpi_errno);
-
-    /* Set sync_flag in sync struct. */
-    for (i = 0; i < win_ptr->num_slots; i++) {
-        curr_target = win_ptr->slots[i].target_list;
-        while (curr_target != NULL) {
-            if (curr_target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH_LOCAL) {
-                curr_target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH_LOCAL;
-            }
-            curr_target = curr_target->next;
-        }
-    }
-
-    /* issue out all operations. */
-    mpi_errno = MPIDI_CH3I_RMA_Make_progress_win(win_ptr, &made_progress);
-    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-    /* Wait for local completion. */
-    do {
-        mpi_errno = MPIDI_CH3I_RMA_Cleanup_ops_win(win_ptr, &local_completed,
-                                                   &remote_completed);
-        if (mpi_errno != MPI_SUCCESS)
-            MPIU_ERR_POP(mpi_errno);
-        if (!local_completed) {
-            mpi_errno = wait_progress_engine();
-            if (mpi_errno != MPI_SUCCESS)
-                MPIU_ERR_POP(mpi_errno);
-        }
-    } while (!local_completed);
-
-  fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_FLUSH_LOCAL_ALL);
-    return mpi_errno;
-    /* --BEGIN ERROR HANDLING-- */
-  fn_fail:
-    goto fn_exit;
-    /* --END ERROR HANDLING-- */
-}
-
-
-#undef FUNCNAME
 #define FUNCNAME MPIDI_Win_lock_all
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
@@ -1607,6 +1440,151 @@ int MPIDI_Win_unlock_all(MPID_Win * win_ptr)
 
 
 #undef FUNCNAME
+#define FUNCNAME MPIDI_Win_flush_all
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPIDI_Win_flush_all(MPID_Win * win_ptr)
+{
+    int i, made_progress = 0;
+    int local_completed = 0, remote_completed = 0;
+    MPIDI_RMA_Target_t *curr_target = NULL;
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPIDI_STATE_MPIDI_WIN_FLUSH_ALL);
+
+    MPIDI_RMA_FUNC_ENTER(MPIDI_STATE_MPIDI_WIN_FLUSH_ALL);
+
+    MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
+                        win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
+                        win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_ISSUED &&
+                        win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_GRANTED,
+                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
+
+    /* Ensure ordering of load/store operations. */
+    if (win_ptr->shm_allocated == TRUE) {
+        OPA_read_write_barrier();
+    }
+
+    /* When the process tries to acquire the lock on itself, it does not
+       go through the progress engine. Therefore, it is possible that
+       one process always grants the lock to itself but never process
+       events coming from other processes. This may cause deadlock in
+       applications where the program execution on target process depends
+       on the happening of events from other processes. Here we poke
+       the progress engine once to avoid such issue.  */
+    mpi_errno = poke_progress_engine();
+    if (mpi_errno != MPI_SUCCESS)
+        MPIU_ERR_POP(mpi_errno);
+
+    /* Set sync_flag in sync struct. */
+    for (i = 0; i < win_ptr->num_slots; i++) {
+        curr_target = win_ptr->slots[i].target_list;
+        while (curr_target != NULL) {
+            if (curr_target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH) {
+                curr_target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH;
+                curr_target->sync.have_remote_incomplete_ops = 0;
+                curr_target->sync.outstanding_acks++;
+            }
+            curr_target = curr_target->next;
+        }
+    }
+
+    /* Issue out all operations. */
+    mpi_errno = MPIDI_CH3I_RMA_Make_progress_win(win_ptr, &made_progress);
+    if (mpi_errno != MPI_SUCCESS)
+        MPIU_ERR_POP(mpi_errno);
+
+    /* Wait for remote completion. */
+    do {
+        mpi_errno = MPIDI_CH3I_RMA_Cleanup_ops_win(win_ptr, &local_completed,
+                                                   &remote_completed);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+        if (!remote_completed) {
+            mpi_errno = wait_progress_engine();
+            if (mpi_errno != MPI_SUCCESS)
+                MPIU_ERR_POP(mpi_errno);
+        }
+    } while (!remote_completed);
+
+  fn_exit:
+    MPIDI_RMA_FUNC_EXIT(MPIDI_STATE_MPIDI_WIN_FLUSH_ALL);
+    return mpi_errno;
+    /* --BEGIN ERROR HANDLING-- */
+  fn_fail:
+    goto fn_exit;
+    /* --END ERROR HANDLING-- */
+}
+
+
+#undef FUNCNAME
+#define FUNCNAME MPIDI_Win_flush_local_all
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPIDI_Win_flush_local_all(MPID_Win * win_ptr)
+{
+    int i, made_progress = 0;
+    int local_completed = 0, remote_completed = 0;
+    MPIDI_RMA_Target_t *curr_target = NULL;
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FLUSH_LOCAL_ALL);
+
+    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_FLUSH_LOCAL_ALL);
+
+    MPIU_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
+                        win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
+                        win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_ISSUED &&
+                        win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_GRANTED,
+                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
+
+    /* When the process tries to acquire the lock on itself, it does not
+       go through the progress engine. Therefore, it is possible that
+       one process always grants the lock to itself but never process
+       events coming from other processes. This may cause deadlock in
+       applications where the program execution on target process depends
+       on the happening of events from other processes. Here we poke
+       the progress engine once to avoid such issue.  */
+    mpi_errno = poke_progress_engine();
+    if (mpi_errno != MPI_SUCCESS)
+        MPIU_ERR_POP(mpi_errno);
+
+    /* Set sync_flag in sync struct. */
+    for (i = 0; i < win_ptr->num_slots; i++) {
+        curr_target = win_ptr->slots[i].target_list;
+        while (curr_target != NULL) {
+            if (curr_target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH_LOCAL) {
+                curr_target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH_LOCAL;
+            }
+            curr_target = curr_target->next;
+        }
+    }
+
+    /* issue out all operations. */
+    mpi_errno = MPIDI_CH3I_RMA_Make_progress_win(win_ptr, &made_progress);
+    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+    /* Wait for local completion. */
+    do {
+        mpi_errno = MPIDI_CH3I_RMA_Cleanup_ops_win(win_ptr, &local_completed,
+                                                   &remote_completed);
+        if (mpi_errno != MPI_SUCCESS)
+            MPIU_ERR_POP(mpi_errno);
+        if (!local_completed) {
+            mpi_errno = wait_progress_engine();
+            if (mpi_errno != MPI_SUCCESS)
+                MPIU_ERR_POP(mpi_errno);
+        }
+    } while (!local_completed);
+
+  fn_exit:
+    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_FLUSH_LOCAL_ALL);
+    return mpi_errno;
+    /* --BEGIN ERROR HANDLING-- */
+  fn_fail:
+    goto fn_exit;
+    /* --END ERROR HANDLING-- */
+}
+
+
+#undef FUNCNAME
 #define FUNCNAME MPIDI_Win_sync
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c |    7 +-
 src/mpid/ch3/channels/nemesis/src/ch3_progress.c  |    8 +-
 src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c   |   18 -
 src/mpid/ch3/include/mpid_rma_issue.h             |   83 ++-
 src/mpid/ch3/include/mpid_rma_lockqueue.h         |   66 ++
 src/mpid/ch3/include/mpid_rma_oplist.h            |   10 +-
 src/mpid/ch3/include/mpid_rma_types.h             |   11 +-
 src/mpid/ch3/include/mpidimpl.h                   |    8 +-
 src/mpid/ch3/include/mpidpkt.h                    |  422 +++++++----
 src/mpid/ch3/include/mpidpre.h                    |   17 +-
 src/mpid/ch3/include/mpidrma.h                    |  553 ++++++++++++--
 src/mpid/ch3/src/ch3u_handle_recv_pkt.c           |   10 +-
 src/mpid/ch3/src/ch3u_handle_recv_req.c           |  668 +++++++++++++++--
 src/mpid/ch3/src/ch3u_request.c                   |    2 +
 src/mpid/ch3/src/ch3u_rma_oplist.c                |  573 +++++++--------
 src/mpid/ch3/src/ch3u_rma_ops.c                   |  128 +---
 src/mpid/ch3/src/ch3u_rma_pkthandler.c            |  338 ++++++---
 src/mpid/ch3/src/ch3u_rma_sync.c                  |  813 ++++++++++++++-------
 src/mpid/ch3/src/ch3u_win_fns.c                   |   65 ++-
 src/mpid/ch3/src/mpid_rma.c                       |   87 ++--
 src/mpid/ch3/src/mpidi_printf.c                   |   10 +-
 src/mpid/ch3/src/mpidi_rma.c                      |   45 +-
 22 files changed, 2792 insertions(+), 1150 deletions(-)
 create mode 100644 src/mpid/ch3/include/mpid_rma_lockqueue.h


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list