[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.0.2-48-g9e68dcf

mysql vizuser noreply at mpich.org
Thu Feb 21 18:03:31 CST 2013


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  9e68dcf86dbd5ef70ea937f277aed512f14ce36e (commit)
       via  3aeb375de4f5860a1eb6c1ca4437977e557324d2 (commit)
       via  402bc1614bac71a43d4220a6ae432f86ed5cd82d (commit)
       via  7dee7463716e21c7902b8e8212cd4df32a413972 (commit)
       via  64b60170bc43d052426295356f2d7cb0d70705f4 (commit)
       via  625bf4fd529b67b6d15ebf1c1993cf10d91e1170 (commit)
       via  223fce451bc9753ee79df426366987e3226c51ee (commit)
       via  9c3a32fd9881db4a287df33f029e2eb64c83f3dc (commit)
       via  255fb4a63c664f6752d6a4b69c3680d6a0218197 (commit)
       via  422006da8dcd8ab6ee49169e41e98a33a0abd159 (commit)
       via  1c737a373c237cfddeb684ee30d3936b9fd6ebe4 (commit)
       via  4e67607f342ce4f8591c1ac95166153df72d02c0 (commit)
       via  90be9ee1506481dcab84b89ccb8b0dc186b8b37b (commit)
       via  478a80f5f7311f087e8eb69dd1dd65f83c5a3d0a (commit)
       via  c3f87fe32a5ec9208906d874d2b8563d188a7e71 (commit)
       via  90b3b2f83c4659fdcdb6bf06a410c8fb63583374 (commit)
       via  bba35589949c92505de9f8f5fd7bc5d9e2693b23 (commit)
       via  c5312557541a513ed36d4a2d9090f70cf3a2e949 (commit)
      from  617927fb6680aa0b5209fd29ab453e25c7ab3750 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/9e68dcf86dbd5ef70ea937f277aed512f14ce36e

commit 9e68dcf86dbd5ef70ea937f277aed512f14ce36e
Merge: 617927f 3aeb375
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Thu Feb 21 18:01:49 2013 -0600

    Merge branch 'packet-header-flags'
    
    This merge adds a flags field to the packet header in RMA packets.  The flags
    are used to piggyback RMA synchronization operations.


http://git.mpich.org/mpich.git/commitdiff/3aeb375de4f5860a1eb6c1ca4437977e557324d2

commit 3aeb375de4f5860a1eb6c1ca4437977e557324d2
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Thu Feb 21 17:40:53 2013 -0600

    Added missing CPPFLAGS in rma tests makefile
    
    Reviewer: goodell

diff --git a/test/mpi/rma/Makefile.am b/test/mpi/rma/Makefile.am
index e92c677..eb5b8d6 100644
--- a/test/mpi/rma/Makefile.am
+++ b/test/mpi/rma/Makefile.am
@@ -156,8 +156,8 @@ get_accumulate_double_derived_SOURCES = get_accumulate.c
 get_accumulate_int_derived_SOURCES      = get_accumulate.c
 
 mutex_bench_SOURCES              = mutex_bench.c mcs-mutex.c mcs-mutex.h
-mutex_bench_shared_CPPFLAGS      = -DUSE_WIN_SHARED
+mutex_bench_shared_CPPFLAGS      = -DUSE_WIN_SHARED $(AM_CPPFLAGS)
 mutex_bench_shared_SOURCES       = mutex_bench.c mcs-mutex.c mcs-mutex.h
 
 linked_list_bench_lock_shr_nocheck_SOURCES  = linked_list_bench_lock_shr.c
-linked_list_bench_lock_shr_nocheck_CPPFLAGS = -DUSE_MODE_NOCHECK
+linked_list_bench_lock_shr_nocheck_CPPFLAGS = -DUSE_MODE_NOCHECK $(AM_CPPFLAGS)

http://git.mpich.org/mpich.git/commitdiff/402bc1614bac71a43d4220a6ae432f86ed5cd82d

commit 402bc1614bac71a43d4220a6ae432f86ed5cd82d
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Tue Feb 12 11:07:58 2013 -0600

    Remove unnecessary flush for empty epochs
    
    I initially added a conservative flush message for empty epochs (mostly
    for documentation purposes).  This is not needed in the current
    implementation, since ops are not issued eagerly.  If/when eager ops are
    implemented, this patch should be reverted and additional window state
    tracking for this case should be added.  In the meantime, I am removing
    this code to improve performance.
    
    Reviewer: goodell

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 12a4031..1f6cbf9 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -2682,14 +2682,8 @@ static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
         if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
         *wait_for_rma_done_pkt = 1;
     }
-    else if (sync_flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) {
-        /* No communication operations were left to process, but the RMA epoch
-           is open.  Send a flush message to ensure remote completion. */
-        /* FIXME: This should be unnecessary for exclusive lock epochs */
-        mpi_errno = MPIDI_CH3I_Send_flush_msg(target_rank, win_ptr);
-        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-        *wait_for_rma_done_pkt = 1;
-    }
+    /* NOTE: Flush -- If RMA ops are issued eagerly, Send_flush_msg should be
+       called here and wait_for_rma_done_pkt should be set. */
 
     MPIU_Assert(MPIDI_CH3I_RMA_Ops_isempty(&win_ptr->targets[target_rank].rma_ops_list));
 

http://git.mpich.org/mpich.git/commitdiff/7dee7463716e21c7902b8e8212cd4df32a413972

commit 7dee7463716e21c7902b8e8212cd4df32a413972
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Sat Feb 9 08:30:45 2013 -0600

    Finished adding memory fences for shr mem win
    
    This patch adds a few missing memory fences to the window
    synchronization operations for shared memory windows.  This closes
    ticket #1729.
    
    Reviewer: goodell

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 5a5e260..12a4031 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -1324,6 +1324,11 @@ int MPIDI_Win_post(MPID_Group *post_grp_ptr, int assert, MPID_Win *win_ptr)
 	MPIU_INSTR_DURATION_END(winpost_clearlock);
     }
         
+    /* Ensure ordering of load/store operations. */
+    if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
+        OPA_read_write_barrier();
+    }
+
     post_grp_size = post_grp_ptr->size;
         
     /* initialize the completion counter */
@@ -1476,6 +1481,11 @@ int MPIDI_Win_start(MPID_Group *group_ptr, int assert, MPID_Win *win_ptr)
 	MPIU_INSTR_DURATION_END(winstart_clearlock);
     }
     
+    /* Ensure ordering of load/store operations. */
+    if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
+        OPA_read_write_barrier();
+    }
+
     win_ptr->start_group_ptr = group_ptr;
     MPIR_Group_add_ref( group_ptr );
     win_ptr->start_assert = assert;
@@ -1522,6 +1532,11 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
     comm_ptr = win_ptr->comm_ptr;
     comm_size = comm_ptr->local_size;
         
+    /* Ensure ordering of load/store operations. */
+    if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
+        OPA_read_write_barrier();
+    }
+
     /* Translate the ranks of the processes in
        start_group to ranks in win_ptr->comm_ptr */
     
@@ -1801,6 +1816,11 @@ int MPIDI_Win_wait(MPID_Win *win_ptr)
 	MPIU_INSTR_DURATION_END(winwait_wait);
     } 
 
+    /* Ensure ordering of load/store operations. */
+    if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
+        OPA_read_write_barrier();
+    }
+
  fn_exit:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_WAIT);
     return mpi_errno;
@@ -1833,12 +1853,17 @@ int MPIDI_Win_test(MPID_Win *win_ptr, int *flag)
 
     *flag = (win_ptr->my_counter) ? 0 : 1;
 
-    /* Track access epoch state */
     if (*flag) {
+        /* Track access epoch state */
         if (win_ptr->epoch_state == MPIDI_EPOCH_PSCW)
             win_ptr->epoch_state = MPIDI_EPOCH_START;
         else
             win_ptr->epoch_state = MPIDI_EPOCH_NONE;
+
+        /* Ensure ordering of load/store operations. */
+        if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
+            OPA_read_write_barrier();
+        }
     }
 
  fn_exit:
@@ -1906,7 +1931,6 @@ int MPIDI_Win_lock(int lock_type, int dest, int assert, MPID_Win *win_ptr)
         /* Lock must be taken immediately for shared memory windows because of
          * load/store access */
 
-        /* FIXME: We may be able to make this just a read or write barrier */
         OPA_read_write_barrier();
 
         mpi_errno = MPIDI_CH3I_Send_lock_msg(dest, lock_type, win_ptr);
@@ -1960,9 +1984,8 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
             win_ptr->epoch_state = MPIDI_EPOCH_NONE;
     }
 
-    /* Ensure that load/store operations are visible. */
+    /* Ensure ordering of load/store operations. */
     if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
-        /* FIXME: We may be able to make this just a read or write barrier */
         OPA_read_write_barrier();
     }
 
@@ -2188,10 +2211,9 @@ int MPIDI_Win_flush(int rank, MPID_Win *win_ptr)
        need to insert this read/write memory fence for shared memory windows. */
 
     /* For shared memory windows, all operations are done immediately, so there
-       is nothing to flush.  Ensure that load/store operations are visible and
+       is nothing to flush.  Ensure ordering of load/store operations and
        return. */
     if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
-        /* FIXME: We may be able to make this just a read or write barrier */
         OPA_read_write_barrier();
         goto fn_exit;
     }
@@ -2384,8 +2406,6 @@ int MPIDI_Win_lock_all(int assert, MPID_Win *win_ptr)
 
     if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
         /* Immediately lock all targets for load/store access */
-
-        /* FIXME: We may be able to make this just a read or write barrier */
         OPA_read_write_barrier();
 
         for (i = 0; i < MPIR_Comm_size(win_ptr->comm_ptr); i++) {

http://git.mpich.org/mpich.git/commitdiff/64b60170bc43d052426295356f2d7cb0d70705f4

commit 64b60170bc43d052426295356f2d7cb0d70705f4
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Fri Feb 8 11:28:36 2013 -0600

    Cleaned up error names for CH3 RMA implementation
    
    Moved RMA errors used in the CH3 RMA implementation into the ch3
    errnames.txt file.
    
    Reviewer: goodell

diff --git a/src/mpi/errhan/errnames.txt b/src/mpi/errhan/errnames.txt
index 2b9d49d..f44112e 100644
--- a/src/mpi/errhan/errnames.txt
+++ b/src/mpi/errhan/errnames.txt
@@ -706,8 +706,6 @@ is too big (> MPIU_SHMW_GHND_SZ)
 **windeserialize:deserializing win object
 **winnoprogress: Detected an error while in progress wait for RMA messages
 **winInvalidOp: Invalid RMA operation
-**winRMAmessage: RMA message operation failed
-**winRMArequest: RMA message operation failed; request returned error
 **open %s:open failed - %s
 **setenv:setenv failed
 **putenv:putenv failed
diff --git a/src/mpid/ch3/errnames.txt b/src/mpid/ch3/errnames.txt
index 9e55285..49fc6b7 100644
--- a/src/mpid/ch3/errnames.txt
+++ b/src/mpid/ch3/errnames.txt
@@ -40,3 +40,4 @@
 **ch3|sync_arg %d:Invalid RMA synchronization argument (%d)
 **ch3|rma_flags:Invalid combination of RMA packet flags
 **ch3|nocheck_invalid:MPI_MODE_NOCHECK was specified, but the lock was not available at the target
+**ch3|rma_msg:RMA message operation failed
diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index d497288..5a5e260 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -1919,7 +1919,7 @@ int MPIDI_Win_lock(int lock_type, int dest, int assert, MPID_Win *win_ptr)
         /* TODO: Make this mode of operation available through an assert
            argument or info key. */
         mpi_errno = MPIDI_CH3I_Send_lock_msg(dest, lock_type, win_ptr);
-        MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**winRMAmessage");
+        MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
     }
 
  fn_exit:
@@ -2711,7 +2711,7 @@ static int MPIDI_CH3I_Send_lock_msg(int dest, int lock_type, MPID_Win *win_ptr)
     MPIU_THREAD_CS_ENTER(CH3COMM,vc);
     mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_pkt, sizeof(*lock_pkt), &req);
     MPIU_THREAD_CS_EXIT(CH3COMM,vc);
-    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**winRMAmessage");
+    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
 
     /* release the request returned by iStartMsg */
     if (req != NULL) {
@@ -2846,7 +2846,7 @@ static int MPIDI_CH3I_Send_unlock_msg(int dest, MPID_Win *win_ptr) {
     MPIU_THREAD_CS_ENTER(CH3COMM,vc);
     mpi_errno = MPIDI_CH3_iStartMsg(vc, unlock_pkt, sizeof(*unlock_pkt), &req);
     MPIU_THREAD_CS_EXIT(CH3COMM,vc);
-    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**winRMAmessage");
+    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
 
     /* Release the request returned by iStartMsg */
     if (req != NULL) {
@@ -2887,7 +2887,7 @@ static int MPIDI_CH3I_Send_flush_msg(int dest, MPID_Win *win_ptr) {
     MPIU_THREAD_CS_ENTER(CH3COMM,vc);
     mpi_errno = MPIDI_CH3_iStartMsg(vc, flush_pkt, sizeof(*flush_pkt), &req);
     MPIU_THREAD_CS_EXIT(CH3COMM,vc);
-    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**winRMAmessage");
+    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
 
     /* Release the request returned by iStartMsg */
     if (req != NULL) {
@@ -3086,7 +3086,7 @@ static int MPIDI_CH3I_Send_lock_put_or_acc(MPID_Win *win_ptr, int target_rank)
                 if (mpi_errno != MPI_SUCCESS)
                 {
 		    MPID_Progress_end(&progress_state);
-		    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winRMAmessage");
+		    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rma_msg");
                 }
                 /* --END ERROR HANDLING-- */
             }
@@ -3095,7 +3095,7 @@ static int MPIDI_CH3I_Send_lock_put_or_acc(MPID_Win *win_ptr, int target_rank)
         
         mpi_errno = request->status.MPI_ERROR;
         if (mpi_errno != MPI_SUCCESS) {
-	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winRMAmessage");
+	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rma_msg");
         }
                 
         MPID_Request_release(request);
@@ -3206,7 +3206,7 @@ static int MPIDI_CH3I_Send_lock_get(MPID_Win *win_ptr, int target_rank)
             if (mpi_errno != MPI_SUCCESS)
             {
 		MPID_Progress_end(&progress_state);
-		MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winRMAmessage");
+		MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rma_msg");
             }
             /* --END ERROR HANDLING-- */
         }
@@ -3215,7 +3215,7 @@ static int MPIDI_CH3I_Send_lock_get(MPID_Win *win_ptr, int target_rank)
     
     mpi_errno = rreq->status.MPI_ERROR;
     if (mpi_errno != MPI_SUCCESS) {
-	MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winRMAmessage");
+	MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rma_msg");
     }
             
     /* if origin datatype was a derived datatype, it will get freed when the 
@@ -4766,7 +4766,7 @@ int MPIDI_CH3_PktHandler_Unlock( MPIDI_VC_t *vc ATTRIBUTE((unused)),
 
     MPID_Win_get_ptr(unlock_pkt->target_win_handle, win_ptr);
     mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
-    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**winRMAmessage");
+    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
 
     MPIDI_CH3_Progress_signal_completion();
 
@@ -4810,7 +4810,7 @@ int MPIDI_CH3_PktHandler_Flush( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
         MPIU_THREAD_CS_ENTER(CH3COMM,vc);
         mpi_errno = MPIDI_CH3_iStartMsg(vc, flush_pkt, sizeof(*flush_pkt), &req);
         MPIU_THREAD_CS_EXIT(CH3COMM,vc);
-        MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**winRMAmessage");
+        MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
 
         /* Release the request returned by iStartMsg */
         if (req != NULL) {
@@ -4862,7 +4862,7 @@ static int MPIDI_CH3I_RMAListComplete( MPID_Win *win_ptr,
 		    /* --BEGIN ERROR HANDLING-- */
 		    if (mpi_errno != MPI_SUCCESS) {
 			MPID_Progress_end(&progress_state);
-			MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winRMAmessage");
+			MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rma_msg");
 		    }
 		    /* --END ERROR HANDLING-- */
 		    MPID_Request_release(curr_ptr->request);
@@ -4945,7 +4945,7 @@ static int MPIDI_CH3I_RMAListPartialComplete( MPID_Win *win_ptr,
 		/* --BEGIN ERROR HANDLING-- */
 		if (mpi_errno != MPI_SUCCESS) {
 		    MPID_Progress_end(&progress_state);
-		    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winRMAmessage");
+		    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rma_msg");
 		}
 		/* --END ERROR HANDLING-- */
 		MPID_Request_release(curr_ptr->request);
@@ -5087,7 +5087,7 @@ int MPIDI_CH3_Finish_rma_op_target(MPIDI_VC_t *vc, MPID_Win *win_ptr, int is_rma
             MPIU_THREAD_CS_ENTER(CH3COMM,vc);
             mpi_errno = MPIDI_CH3_iStartMsg(vc, flush_pkt, sizeof(*flush_pkt), &req);
             MPIU_THREAD_CS_EXIT(CH3COMM,vc);
-            MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**winRMAmessage");
+            MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
 
             /* Release the request returned by iStartMsg */
             if (req != NULL) {

http://git.mpich.org/mpich.git/commitdiff/625bf4fd529b67b6d15ebf1c1993cf10d91e1170

commit 625bf4fd529b67b6d15ebf1c1993cf10d91e1170
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Thu Feb 7 16:48:24 2013 -0600

    Add a MODE_NOCHECK version of the llist bench test
    
    Additional testing for shared lock with MODE_NOCHECK.
    
    Reviewer: goodell

diff --git a/test/mpi/rma/Makefile.am b/test/mpi/rma/Makefile.am
index 579721a..e92c677 100644
--- a/test/mpi/rma/Makefile.am
+++ b/test/mpi/rma/Makefile.am
@@ -91,6 +91,7 @@ noinst_PROGRAMS =          \
     linked_list_bench_lock_all  \
     linked_list_bench_lock_excl \
     linked_list_bench_lock_shr  \
+    linked_list_bench_lock_shr_nocheck  \
     fetch_and_op_char      \
     fetch_and_op_short     \
     fetch_and_op_int       \
@@ -157,3 +158,6 @@ get_accumulate_int_derived_SOURCES      = get_accumulate.c
 mutex_bench_SOURCES              = mutex_bench.c mcs-mutex.c mcs-mutex.h
 mutex_bench_shared_CPPFLAGS      = -DUSE_WIN_SHARED
 mutex_bench_shared_SOURCES       = mutex_bench.c mcs-mutex.c mcs-mutex.h
+
+linked_list_bench_lock_shr_nocheck_SOURCES  = linked_list_bench_lock_shr.c
+linked_list_bench_lock_shr_nocheck_CPPFLAGS = -DUSE_MODE_NOCHECK
diff --git a/test/mpi/rma/linked_list_bench_lock_shr.c b/test/mpi/rma/linked_list_bench_lock_shr.c
index 49c3e04..62315ed 100644
--- a/test/mpi/rma/linked_list_bench_lock_shr.c
+++ b/test/mpi/rma/linked_list_bench_lock_shr.c
@@ -131,7 +131,11 @@ int main(int argc, char **argv) {
                 if (verbose)
                     printf("%d: Appending to <%d, %p>\n", procid, tail_ptr.rank, (void*) tail_ptr.disp);
 
+#ifdef USE_MODE_NOCHECK
+                MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);
+#else
                 MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, 0, llist_win);
+#endif
                 MPI_Accumulate(&new_elem_ptr, sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank,
                                (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next), sizeof(llist_ptr_t),
                                MPI_BYTE, MPI_REPLACE, llist_win);
@@ -146,7 +150,11 @@ int main(int argc, char **argv) {
             {
                 llist_ptr_t next_tail_ptr;
 
+#ifdef USE_MODE_NOCHECK
+                MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);
+#else
                 MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, 0, llist_win);
+#endif
                 MPI_Get_accumulate( NULL, 0, MPI_DATATYPE_NULL, &next_tail_ptr,
                                     sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank,
                                     (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next),
diff --git a/test/mpi/rma/testlist b/test/mpi/rma/testlist
index 20c6c03..772be24 100644
--- a/test/mpi/rma/testlist
+++ b/test/mpi/rma/testlist
@@ -95,5 +95,6 @@ pscw_ordering 4 mpiversion=3.0
 linked_list_bench_lock_all 4 mpiversion=3.0
 linked_list_bench_lock_excl 4 mpiversion=3.0
 linked_list_bench_lock_shr 4 mpiversion=3.0
+linked_list_bench_lock_shr_nocheck 4 mpiversion=3.0
 mutex_bench 4 mpiversion=3.0
 mutex_bench_shared 4 mpiversion=3.0

http://git.mpich.org/mpich.git/commitdiff/223fce451bc9753ee79df426366987e3226c51ee

commit 223fce451bc9753ee79df426366987e3226c51ee
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Thu Feb 7 16:29:22 2013 -0600

    Implemented lock op piggybacking for MODE_NOCHECK
    
    When the MPI_MODE_NOCHECK assertion is given to a passive target lock
    operation, we defer acquisition of the lock and piggyback the request on
    the first RMA op to the target.  This eliminates a round-trip
    lock-request message.
    
    Reviewer: goodell

diff --git a/src/mpid/ch3/errnames.txt b/src/mpid/ch3/errnames.txt
index 3cde8bd..9e55285 100644
--- a/src/mpid/ch3/errnames.txt
+++ b/src/mpid/ch3/errnames.txt
@@ -39,3 +39,4 @@
 **ch3|sync_arg:Invalid RMA synchronization argument
 **ch3|sync_arg %d:Invalid RMA synchronization argument (%d)
 **ch3|rma_flags:Invalid combination of RMA packet flags
+**ch3|nocheck_invalid:MPI_MODE_NOCHECK was specified, but the lock was not available at the target
diff --git a/src/mpid/ch3/include/mpidimpl.h b/src/mpid/ch3/include/mpidimpl.h
index 0185480..48b8663 100644
--- a/src/mpid/ch3/include/mpidimpl.h
+++ b/src/mpid/ch3/include/mpidimpl.h
@@ -1214,6 +1214,7 @@ int MPIDI_CH3I_Release_lock(MPID_Win * win_ptr);
 int MPIDI_CH3I_Try_acquire_win_lock(MPID_Win * win_ptr, int requested_lock);
 int MPIDI_CH3I_Send_lock_granted_pkt(MPIDI_VC_t * vc, MPID_Win *win_ptr, int source_win_hdl);
 int MPIDI_CH3I_Send_pt_rma_done_pkt(MPIDI_VC_t * vc, MPID_Win *win_ptr, int source_win_hdl);
+int MPIDI_CH3_Start_rma_op_target(MPID_Win *win_ptr, MPIDI_CH3_Pkt_flags_t flags);
 int MPIDI_CH3_Finish_rma_op_target(MPIDI_VC_t *vc, MPID_Win *win_ptr, int is_rma_update,
                                    MPIDI_CH3_Pkt_flags_t flags, MPI_Win source_win_handle);
 
diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index 0bdae9c..935c03b 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -116,7 +116,10 @@ typedef uint16_t MPIDI_CH3_Pkt_flags_t;
 #define MPIDI_CH3_PKT_FLAG_RMA_UNLOCK           2  /* ..............X. */
 #define MPIDI_CH3_PKT_FLAG_RMA_FLUSH            4  /* .............X.. */
 #define MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK          8  /* ............X... */
-#define MPIDI_CH3_PKT_FLAG_RMA_AT_COMPLETE      16 /* ...........X.... */
+#define MPIDI_CH3_PKT_FLAG_RMA_AT_COMPLETE     16  /* ...........X.... */
+#define MPIDI_CH3_PKT_FLAG_RMA_NOCHECK         32  /* ..........X..... */
+#define MPIDI_CH3_PKT_FLAG_RMA_SHARED          64  /* .........X...... */
+#define MPIDI_CH3_PKT_FLAG_RMA_EXCLUSIVE      128  /* ........X....... */
 
 typedef struct MPIDI_CH3_Pkt_send
 {
diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index f6dd454..d497288 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -1915,7 +1915,7 @@ int MPIDI_Win_lock(int lock_type, int dest, int assert, MPID_Win *win_ptr)
         mpi_errno = MPIDI_CH3I_Wait_for_lock_granted(win_ptr, dest);
         if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
     }
-    else if (MPIR_PARAM_RMA_LOCK_IMMED) {
+    else if (MPIR_PARAM_RMA_LOCK_IMMED && ((assert & MPI_MODE_NOCHECK) == 0)) {
         /* TODO: Make this mode of operation available through an assert
            argument or info key. */
         mpi_errno = MPIDI_CH3I_Send_lock_msg(dest, lock_type, win_ptr);
@@ -2040,16 +2040,23 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
         
     if (single_op_opt == 0) {
 
-        /* Send a lock packet over to the target. wait for the lock_granted
-           reply. Then do all the RMA ops. */
+        /* Send a lock packet over to the target and wait for the lock_granted
+           reply. If the user gave MODE_NOCHECK, we will piggyback the lock
+           request on the first RMA op.  Then do all the RMA ops. */
 
-        if (win_ptr->targets[dest].remote_lock_state == MPIDI_CH3_WIN_LOCK_CALLED) {
-            mpi_errno = MPIDI_CH3I_Send_lock_msg(dest, win_ptr->targets[dest].remote_lock_mode, win_ptr);
-            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+        if ((win_ptr->targets[dest].remote_lock_assert & MPI_MODE_NOCHECK) == 0)
+        {
+            if (win_ptr->targets[dest].remote_lock_state == MPIDI_CH3_WIN_LOCK_CALLED) {
+                mpi_errno = MPIDI_CH3I_Send_lock_msg(dest, win_ptr->targets[dest].remote_lock_mode,
+                                                     win_ptr);
+                if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            }
         }
 
-        mpi_errno = MPIDI_CH3I_Wait_for_lock_granted(win_ptr, dest);
-        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+        if (win_ptr->targets[dest].remote_lock_state == MPIDI_CH3_WIN_LOCK_REQUESTED) {
+            mpi_errno = MPIDI_CH3I_Wait_for_lock_granted(win_ptr, dest);
+            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+        }
 
 	/* Now do all the RMA operations */
         mpi_errno = MPIDI_CH3I_Do_passive_target_rma(win_ptr, dest, &wait_for_rma_done_pkt,
@@ -2487,27 +2494,29 @@ static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_DO_PASSIVE_TARGET_RMA);
 
     MPIU_Assert(win_ptr->targets[target_rank].remote_lock_state == MPIDI_CH3_WIN_LOCK_GRANTED ||
-                win_ptr->targets[target_rank].remote_lock_state == MPIDI_CH3_WIN_LOCK_FLUSH);
-
-    if (win_ptr->targets[target_rank].remote_lock_mode == MPI_LOCK_EXCLUSIVE) {
-        /* Exclusive lock -- no need to wait for rma done pkt at the end.  This
-           is because the target won't grant another process access to the
-           window until all of our operations complete at that target.  Thus,
-           there is no third-party communication issue. */
+                win_ptr->targets[target_rank].remote_lock_state == MPIDI_CH3_WIN_LOCK_FLUSH ||
+                (win_ptr->targets[target_rank].remote_lock_state == MPIDI_CH3_WIN_LOCK_CALLED &&
+                 win_ptr->targets[target_rank].remote_lock_assert & MPI_MODE_NOCHECK));
+
+    if (win_ptr->targets[target_rank].remote_lock_mode == MPI_LOCK_EXCLUSIVE &&
+        win_ptr->targets[target_rank].remote_lock_state != MPIDI_CH3_WIN_LOCK_CALLED) {
+        /* Exclusive lock already held -- no need to wait for rma done pkt at
+           the end.  This is because the target won't grant another process
+           access to the window until all of our operations complete at that
+           target.  Thus, there is no third-party communication issue. */
         *wait_for_rma_done_pkt = 0;
     }
     else if (MPIDI_CH3I_RMA_Ops_isempty(&win_ptr->targets[target_rank].rma_ops_list)) {
-        /* Shared lock -- The ops list is empty.  Any issued ops are already
-           remote complete; done packet is not needed for safe third party
-           communication. */
+        /* The ops list is empty -- NOTE: we assume this is because the epoch
+           was flushed.  Any issued ops are already remote complete; done
+           packet is not needed for safe third party communication. */
         *wait_for_rma_done_pkt = 0;
     }
     else {
         MPIDI_RMA_Op_t *tail = MPIDI_CH3I_RMA_Ops_tail(&win_ptr->targets[target_rank].rma_ops_list);
 
-        /* shared lock. check if any of the rma ops is a get. If so, move it 
-           to the end of the list and do it last, in which case an rma done 
-           pkt is not needed. If there is no get, rma done pkt is needed */
+        /* Check if we can piggyback the RMA done acknowlegdement on the last
+           operation in the epoch. */
 
         if (tail->type == MPIDI_RMA_GET ||
             tail->type == MPIDI_RMA_COMPARE_AND_SWAP ||
@@ -2519,9 +2528,10 @@ static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
             *wait_for_rma_done_pkt = 0;
         }
         else {
-            /* go through the list and move the first get operation 
-               (if there is one) to the end.  Note that the first
-	       operation may be a lock, so we can skip it */
+            /* Check if there is a get operation, which can be be performed
+               moved to the end to piggyback the RMA done acknowledgement.  Go
+               through the list and move the first get operation (if there is
+               one) to the end. */
             
             *wait_for_rma_done_pkt = 1;
             curr_ptr = MPIDI_CH3I_RMA_Ops_head(&win_ptr->targets[target_rank].rma_ops_list);
@@ -2566,11 +2576,33 @@ static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
         MPIU_Assert(nops > 0);
         MPIU_Assert(curr_ptr->target_rank == target_rank);
 
+        /* Piggyback the lock operation on the first op */
+        if (win_ptr->targets[target_rank].remote_lock_state == MPIDI_CH3_WIN_LOCK_CALLED)
+        {
+            MPIU_Assert(win_ptr->targets[target_rank].remote_lock_assert & MPI_MODE_NOCHECK);
+            flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK | MPIDI_CH3_PKT_FLAG_RMA_NOCHECK;
+
+            switch (win_ptr->targets[target_rank].remote_lock_mode) {
+                case MPI_LOCK_SHARED:
+                    flags |= MPIDI_CH3_PKT_FLAG_RMA_SHARED;
+                    break;
+                case MPI_LOCK_EXCLUSIVE:
+                    flags |= MPIDI_CH3_PKT_FLAG_RMA_EXCLUSIVE;
+                    break;
+                default:
+                    MPIU_Assert(0);
+                    break;
+            }
+
+            win_ptr->targets[target_rank].remote_lock_state = MPIDI_CH3_WIN_LOCK_GRANTED;
+        }
+
+        /* Piggyback the unlock/flush operation on the last op */
         if (curr_ptr->next == NULL) {
             if (sync_flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK) {
-                flags = MPIDI_CH3_PKT_FLAG_RMA_UNLOCK;
+                flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK;
             } else if (sync_flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) {
-                flags = MPIDI_CH3_PKT_FLAG_RMA_FLUSH;
+                flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH;
             } else {
                 MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_RMA_SYNC, "**ch3|sync_arg",
                                      "**ch3|sync_arg %d", sync_flags);
@@ -3267,6 +3299,7 @@ int MPIDI_CH3_PktHandler_Put( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     int complete = 0;
     char *data_buf = NULL;
     MPIDI_msg_sz_t data_len;
+    MPID_Win *win_ptr;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_PUT);
     
@@ -3276,8 +3309,6 @@ int MPIDI_CH3_PktHandler_Put( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 
     if (put_pkt->count == 0)
     {
-	MPID_Win *win_ptr;
-	
 	/* it's a 0-byte message sent just to decrement the
 	   completion counter. This happens only in
 	   post/start/complete/wait sync model; therefore, no need
@@ -3291,7 +3322,11 @@ int MPIDI_CH3_PktHandler_Put( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 	*rreqp = NULL;
         goto fn_exit;
     }
-        
+
+    MPIU_Assert(put_pkt->target_win_handle != MPI_WIN_NULL);
+    MPID_Win_get_ptr(put_pkt->target_win_handle, win_ptr);
+    mpi_errno = MPIDI_CH3_Start_rma_op_target(win_ptr, put_pkt->flags);
+
     data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
     data_buf = (char *)pkt + sizeof(MPIDI_CH3_Pkt_t);
 
@@ -3430,6 +3465,7 @@ int MPIDI_CH3_PktHandler_Get( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     int complete;
     char *data_buf = NULL;
     MPIDI_msg_sz_t data_len;
+    MPID_Win *win_ptr;
     int mpi_errno = MPI_SUCCESS;
     int type_size;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET);
@@ -3437,7 +3473,11 @@ int MPIDI_CH3_PktHandler_Get( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET);
     
     MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"received get pkt");
-    
+
+    MPIU_Assert(get_pkt->target_win_handle != MPI_WIN_NULL);
+    MPID_Win_get_ptr(get_pkt->target_win_handle, win_ptr);
+    mpi_errno = MPIDI_CH3_Start_rma_op_target(win_ptr, get_pkt->flags);
+
     data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
     data_buf = (char *)pkt + sizeof(MPIDI_CH3_Pkt_t);
     
@@ -3560,6 +3600,7 @@ int MPIDI_CH3_PktHandler_Accumulate( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     int complete = 0;
     char *data_buf = NULL;
     MPIDI_msg_sz_t data_len;
+    MPID_Win *win_ptr;
     int mpi_errno = MPI_SUCCESS;
     int type_size;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACCUMULATE);
@@ -3569,6 +3610,10 @@ int MPIDI_CH3_PktHandler_Accumulate( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"received accumulate pkt");
 
     MPIU_INSTR_DURATION_START(rmapkt_acc);
+    MPIU_Assert(accum_pkt->target_win_handle != MPI_WIN_NULL);
+    MPID_Win_get_ptr(accum_pkt->target_win_handle, win_ptr);
+    mpi_errno = MPIDI_CH3_Start_rma_op_target(win_ptr, accum_pkt->flags);
+
     data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
     data_buf = (char *)pkt + sizeof(MPIDI_CH3_Pkt_t);
     
@@ -3730,6 +3775,9 @@ int MPIDI_CH3_PktHandler_Accumulate_Immed( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"received accumulate immedidate pkt");
 
     MPIU_INSTR_DURATION_START(rmapkt_acc_immed);
+    MPIU_Assert(accum_pkt->target_win_handle != MPI_WIN_NULL);
+    MPID_Win_get_ptr(accum_pkt->target_win_handle, win_ptr);
+    mpi_errno = MPIDI_CH3_Start_rma_op_target(win_ptr, accum_pkt->flags);
 
     /* return the number of bytes processed in this function */
     /* data_len == 0 (all within packet) */
@@ -3809,7 +3857,11 @@ int MPIDI_CH3_PktHandler_CAS( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_CAS);
 
     MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"received CAS pkt");
+
     MPIU_INSTR_DURATION_START(rmapkt_cas);
+    MPIU_Assert(cas_pkt->target_win_handle != MPI_WIN_NULL);
+    MPID_Win_get_ptr(cas_pkt->target_win_handle, win_ptr);
+    mpi_errno = MPIDI_CH3_Start_rma_op_target(win_ptr, cas_pkt->flags);
 
     /* return the number of bytes processed in this function */
     /* data_len == 0 (all within packet) */
@@ -3905,6 +3957,7 @@ int MPIDI_CH3_PktHandler_FOP( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     int mpi_errno = MPI_SUCCESS;
     MPIDI_CH3_Pkt_fop_t *fop_pkt = &pkt->fop;
     MPID_Request *req;
+    MPID_Win *win_ptr;
     int len, data_complete = 0;
     MPIU_CHKPMEM_DECL(1);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOP);
@@ -3912,7 +3965,11 @@ int MPIDI_CH3_PktHandler_FOP( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOP);
 
     MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"received FOP pkt");
+
     MPIU_INSTR_DURATION_START(rmapkt_fop);
+    MPIU_Assert(fop_pkt->target_win_handle != MPI_WIN_NULL);
+    MPID_Win_get_ptr(fop_pkt->target_win_handle, win_ptr);
+    mpi_errno = MPIDI_CH3_Start_rma_op_target(win_ptr, fop_pkt->flags);
 
     req = MPID_Request_create();
     MPIU_ERR_CHKANDJUMP(req == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");
@@ -4920,6 +4977,47 @@ static int MPIDI_CH3I_RMAListPartialComplete( MPID_Win *win_ptr,
 
 
 #undef FUNCNAME
+#define FUNCNAME MPIDI_CH3_Start_rma_op_target
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPIDI_CH3_Start_rma_op_target(MPID_Win *win_ptr, MPIDI_CH3_Pkt_flags_t flags)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_START_RMA_OP_TARGET);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_START_RMA_OP_TARGET);
+
+    /* Lock with NOCHECK is piggybacked on this message.  We should be able to
+     * immediately grab the lock.  Otherwise, there is a synchronization error. */
+    if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK &&
+        flags & MPIDI_CH3_PKT_FLAG_RMA_NOCHECK)
+    {
+        int lock_acquired;
+        int lock_mode;
+
+        if (flags & MPIDI_CH3_PKT_FLAG_RMA_SHARED) {
+            lock_mode = MPI_LOCK_SHARED;
+        } else if (flags & MPIDI_CH3_PKT_FLAG_RMA_EXCLUSIVE) {
+            lock_mode = MPI_LOCK_EXCLUSIVE;
+        } else {
+            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_RMA_SYNC, "**ch3|rma_flags");
+        }
+
+        lock_acquired = MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_mode);
+        MPIU_ERR_CHKANDJUMP(!lock_acquired, mpi_errno, MPI_ERR_RMA_SYNC, "**ch3|nocheck_invalid");
+    }
+
+fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_START_RMA_OP_TARGET);
+    return mpi_errno;
+    /* --BEGIN ERROR HANDLING-- */
+fn_fail:
+    goto fn_exit;
+    /* --END ERROR HANDLING-- */
+}
+
+
+#undef FUNCNAME
 #define FUNCNAME MPIDI_CH3_Finish_rma_op_target
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)

http://git.mpich.org/mpich.git/commitdiff/9c3a32fd9881db4a287df33f029e2eb64c83f3dc

commit 9c3a32fd9881db4a287df33f029e2eb64c83f3dc
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Fri Feb 1 16:32:07 2013 -0600

    Flush piggybacking
    
    This patch adds piggybacking of flush synchronization on top of the last
    operation in an RMA epoch.
    
    Reviewer: goodell

diff --git a/src/mpid/ch3/errnames.txt b/src/mpid/ch3/errnames.txt
index f43d204..3cde8bd 100644
--- a/src/mpid/ch3/errnames.txt
+++ b/src/mpid/ch3/errnames.txt
@@ -32,3 +32,10 @@
 **ch3|close_progress:an error occurred while the device was waiting for all open connections to close
 **ch3|pmi_finalize:PMI_Finalize failed
 **ch3|pmi_finalize %d:PMI_Finalize failed, error %d
+
+#
+# RMA errors
+#
+**ch3|sync_arg:Invalid RMA synchronization argument
+**ch3|sync_arg %d:Invalid RMA synchronization argument (%d)
+**ch3|rma_flags:Invalid combination of RMA packet flags
diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 4988002..f6dd454 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -121,7 +121,7 @@ static int MPIDI_CH3I_Send_immed_rmw_msg(MPIDI_RMA_Op_t *, MPID_Win *,
                                          MPI_Win, MPI_Win, MPID_Request ** );
 static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
                                             int *wait_for_rma_done_pkt,
-                                            int unlock_target);
+                                            MPIDI_CH3_Pkt_flags_t sync_flags);
 static int MPIDI_CH3I_Send_lock_put_or_acc(MPID_Win *, int);
 static int MPIDI_CH3I_Send_lock_get(MPID_Win *, int);
 static int MPIDI_CH3I_RMAListComplete(MPID_Win *win_ptr,
@@ -2053,7 +2053,7 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
 
 	/* Now do all the RMA operations */
         mpi_errno = MPIDI_CH3I_Do_passive_target_rma(win_ptr, dest, &wait_for_rma_done_pkt,
-                                                     1 /* unlock the target */);
+                                                     MPIDI_CH3_PKT_FLAG_RMA_UNLOCK);
 	if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
     }
         
@@ -2233,7 +2233,7 @@ int MPIDI_Win_flush(int rank, MPID_Win *win_ptr)
 
     win_ptr->targets[rank].remote_lock_state = MPIDI_CH3_WIN_LOCK_FLUSH;
     mpi_errno = MPIDI_CH3I_Do_passive_target_rma(win_ptr, rank, &wait_for_rma_done_pkt,
-                                                 0 /* don't unlock the target */);
+                                                 MPIDI_CH3_PKT_FLAG_RMA_FLUSH);
     if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
     /* If the lock is a shared lock or we have done the single op optimization,
@@ -2244,9 +2244,6 @@ int MPIDI_Win_flush(int rank, MPID_Win *win_ptr)
         /* wait until the "pt rma done" packet is received from the target.
            This packet resets the remote_lock_state flag. */
 
-        MPIDI_CH3I_Send_flush_msg(rank, win_ptr);
-
-        /* poke the progress engine until remote_lock_state flag is reset */
         if (win_ptr->targets[rank].remote_lock_state != MPIDI_CH3_WIN_LOCK_GRANTED)
         {
             MPID_Progress_state progress_state;
@@ -2479,7 +2476,7 @@ int MPIDI_Win_sync(MPID_Win *win_ptr)
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
-                                            int *wait_for_rma_done_pkt, int unlock_target)
+                                            int *wait_for_rma_done_pkt, MPIDI_CH3_Pkt_flags_t sync_flags)
 {
     int mpi_errno = MPI_SUCCESS, nops;
     MPIDI_RMA_Op_t *curr_ptr;
@@ -2570,18 +2567,23 @@ static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
         MPIU_Assert(curr_ptr->target_rank == target_rank);
 
         if (curr_ptr->next == NULL) {
-            if (unlock_target) {
+            if (sync_flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK) {
                 flags = MPIDI_CH3_PKT_FLAG_RMA_UNLOCK;
+            } else if (sync_flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) {
+                flags = MPIDI_CH3_PKT_FLAG_RMA_FLUSH;
+            } else {
+                MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_RMA_SYNC, "**ch3|sync_arg",
+                                     "**ch3|sync_arg %d", sync_flags);
+            }
 
-                /* Inform the target that we want an acknowledgement when the
-                 * unlock has completed. */
-                if (*wait_for_rma_done_pkt) {
-                    flags |= MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK;
-                }
+            /* Inform the target that we want an acknowledgement when the
+             * unlock has completed. */
+            if (*wait_for_rma_done_pkt) {
+                flags |= MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK;
             }
-        }
 
-        source_win_handle = win_ptr->handle;
+            source_win_handle = win_ptr->handle;
+        }
 
         /* Track passive target write operations.  This is used during Win_free
          * to ensure that all writes to a given target have completed at that
@@ -2621,11 +2623,20 @@ static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
 	MPIU_INSTR_STMT(list_block=MPIU_INSTR_GET_VAR(winunlock_block));
         mpi_errno = MPIDI_CH3I_RMAListComplete(win_ptr, &win_ptr->targets[target_rank].rma_ops_list);
     }
-    else if (unlock_target) {
+    else if (sync_flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK) {
         /* No communication operations were left to process, but the RMA epoch
            is open.  Send an unlock message to release the lock at the target.  */
         mpi_errno = MPIDI_CH3I_Send_unlock_msg(target_rank, win_ptr);
-        MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**winRMAmessage");
+        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+        *wait_for_rma_done_pkt = 1;
+    }
+    else if (sync_flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) {
+        /* No communication operations were left to process, but the RMA epoch
+           is open.  Send a flush message to ensure remote completion. */
+        /* FIXME: This should be unnecessary for exclusive lock epochs */
+        mpi_errno = MPIDI_CH3I_Send_flush_msg(target_rank, win_ptr);
+        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+        *wait_for_rma_done_pkt = 1;
     }
 
     MPIU_Assert(MPIDI_CH3I_RMA_Ops_isempty(&win_ptr->targets[target_rank].rma_ops_list));
@@ -4934,8 +4945,8 @@ int MPIDI_CH3_Finish_rma_op_target(MPIDI_VC_t *vc, MPID_Win *win_ptr, int is_rma
     if (flags & MPIDI_CH3_PKT_FLAG_RMA_AT_COMPLETE) {
         MPIU_Assert(win_ptr->current_lock_type == MPID_LOCK_NONE);
 
-        /* FIXME: MT: Accesses to my_counter should be done atomically */
         win_ptr->my_counter -= 1;
+        MPIU_Assert(win_ptr->my_counter >= 0);
 
         /* Signal the local process when the op counter reaches 0. */
         if (win_ptr->my_counter == 0)
@@ -4949,7 +4960,7 @@ int MPIDI_CH3_Finish_rma_op_target(MPIDI_VC_t *vc, MPID_Win *win_ptr, int is_rma
         MPIU_Assert(win_ptr->current_lock_type != MPID_LOCK_NONE);
 
         if (flags & MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK) {
-            MPIU_Assert(source_win_handle != MPI_WIN_NULL);
+            MPIU_Assert(source_win_handle != MPI_WIN_NULL && vc != NULL);
             mpi_errno = MPIDI_CH3I_Send_pt_rma_done_pkt(vc, win_ptr, source_win_handle);
             if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
         }
@@ -4961,6 +4972,31 @@ int MPIDI_CH3_Finish_rma_op_target(MPIDI_VC_t *vc, MPID_Win *win_ptr, int is_rma
            wake it up, so it can attempt to grab the lock. */
         MPIDI_CH3_Progress_signal_completion();
     }
+    else if (flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) {
+        if (flags & MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK) {
+            MPIDI_CH3_Pkt_t upkt;
+            MPIDI_CH3_Pkt_flush_t *flush_pkt = &upkt.flush;
+            MPID_Request *req = NULL;
+
+            MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"received piggybacked flush request");
+            MPIU_Assert(source_win_handle != MPI_WIN_NULL && vc != NULL);
+
+            MPIDI_Pkt_init(flush_pkt, MPIDI_CH3_PKT_FLUSH);
+            flush_pkt->source_win_handle = source_win_handle;
+            flush_pkt->target_win_handle = MPI_WIN_NULL;
+            flush_pkt->target_rank = win_ptr->comm_ptr->rank;
+
+            MPIU_THREAD_CS_ENTER(CH3COMM,vc);
+            mpi_errno = MPIDI_CH3_iStartMsg(vc, flush_pkt, sizeof(*flush_pkt), &req);
+            MPIU_THREAD_CS_EXIT(CH3COMM,vc);
+            MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**winRMAmessage");
+
+            /* Release the request returned by iStartMsg */
+            if (req != NULL) {
+                MPID_Request_release(req);
+            }
+        }
+    }
 
 fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_FINISH_RMA_OP_TARGET);

http://git.mpich.org/mpich.git/commitdiff/255fb4a63c664f6752d6a4b69c3680d6a0218197

commit 255fb4a63c664f6752d6a4b69c3680d6a0218197
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Fri Feb 1 12:46:20 2013 -0600

    Removed unused single_op_opt field from MPID_Request
    
    The single_op_opt flag in the request object was previously used to
    track whether an operation is a lock-op-unlock type, for the purposes of
    completion.  Tracking this state has been merged into the packet header
    flags, so the single_op_opt flag is no longer needed.
    
    Reviewer: goodell

diff --git a/src/mpid/ch3/include/mpidpre.h b/src/mpid/ch3/include/mpidpre.h
index df567ff..fd8e846 100644
--- a/src/mpid/ch3/include/mpidpre.h
+++ b/src/mpid/ch3/include/mpidpre.h
@@ -380,7 +380,6 @@ typedef struct MPIDI_Request {
     MPI_Request request_handle;
     MPI_Win     target_win_handle;
     MPI_Win     source_win_handle;
-    int single_op_opt;   /* to indicate a lock-put-unlock optimization case */
     MPIDI_CH3_Pkt_flags_t flags; /* flags that were included in the original RMA packet header */
     struct MPIDI_Win_lock_queue *lock_queue_entry; /* for single lock-put-unlock optimization */
     MPI_Request resp_request_handle; /* Handle for get_accumulate response */
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index 82011a8..872cfb8 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -1167,7 +1167,6 @@ static int do_simple_get(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_queue)
     req->dev.target_win_handle = win_ptr->handle;
     req->dev.source_win_handle = lock_queue->source_win_handle;
     req->dev.flags = lock_queue->pt_single_op->flags;
-    req->dev.single_op_opt = 1;
     
     MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_GET_RESP); 
     req->kind = MPID_REQUEST_SEND;
diff --git a/src/mpid/ch3/src/ch3u_request.c b/src/mpid/ch3/src/ch3u_request.c
index 2921fae..5842b7b 100644
--- a/src/mpid/ch3/src/ch3u_request.c
+++ b/src/mpid/ch3/src/ch3u_request.c
@@ -82,7 +82,6 @@ MPID_Request * MPID_Request_create(void)
 	   request for RMA operations */
 	req->dev.target_win_handle = MPI_WIN_NULL;
 	req->dev.source_win_handle = MPI_WIN_NULL;
-	req->dev.single_op_opt	   = 0;
 	req->dev.lock_queue_entry  = NULL;
 	req->dev.dtype_info	   = NULL;
 	req->dev.dataloop	   = NULL;
diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index d5efb5d..4988002 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -4207,7 +4207,6 @@ int MPIDI_CH3_PktHandler_LockPutUnlock( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 	req->dev.user_buf = lock_put_unlock_pkt->addr;
 	req->dev.source_win_handle = lock_put_unlock_pkt->source_win_handle;
         req->dev.flags = lock_put_unlock_pkt->flags;
-	req->dev.single_op_opt = 1;
     }
     
     else {
@@ -4345,7 +4344,6 @@ int MPIDI_CH3_PktHandler_LockGetUnlock( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 	req = MPID_Request_create();
 	req->dev.target_win_handle = lock_get_unlock_pkt->target_win_handle;
 	req->dev.source_win_handle = lock_get_unlock_pkt->source_win_handle;
-	req->dev.single_op_opt = 1;
         req->dev.flags = lock_get_unlock_pkt->flags;
 	
 	MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_GET_RESP); 

http://git.mpich.org/mpich.git/commitdiff/422006da8dcd8ab6ee49169e41e98a33a0abd159

commit 422006da8dcd8ab6ee49169e41e98a33a0abd159
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Wed Jan 30 16:50:56 2013 -0600

    Cleanup of FOP packet header
    
    Removed source_win_handle from the packet header, since it's no longer
    needed.
    
    Reviewer: goodell

diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index 7d47874..0bdae9c 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -322,9 +322,6 @@ typedef struct MPIDI_CH3_Pkt_fop
                                 * epoch for decrementing rma op counter in
                                 * active target rma and for unlocking window 
                                 * in passive target rma. Otherwise set to NULL*/
-    MPI_Win source_win_handle; /* Used in the last RMA operation in an
-                                * epoch in the case of passive target rma
-                                * with shared locks. Otherwise set to NULL*/
     int origin_data[MPIDI_RMA_FOP_IMMED_INTS];
 }
 MPIDI_CH3_Pkt_fop_t;
diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 1f89383..d5efb5d 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -1048,7 +1048,6 @@ static int MPIDI_CH3I_Send_immed_rmw_msg(MPIDI_RMA_Op_t *rma_op,
         fop_pkt->flags = flags;
         fop_pkt->datatype = rma_op->target_datatype;
         fop_pkt->target_win_handle = target_win_handle;
-        fop_pkt->source_win_handle = source_win_handle;
         fop_pkt->request_handle = resp_req->handle;
         fop_pkt->op = rma_op->op;
 
@@ -3915,7 +3914,6 @@ int MPIDI_CH3_PktHandler_FOP( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     req->dev.op = fop_pkt->op;
     req->dev.real_user_buf = fop_pkt->addr;
     req->dev.target_win_handle = fop_pkt->target_win_handle;
-    req->dev.source_win_handle = fop_pkt->source_win_handle;
     req->dev.request_handle = fop_pkt->request_handle;
     req->dev.flags = fop_pkt->flags;
 

http://git.mpich.org/mpich.git/commitdiff/1c737a373c237cfddeb684ee30d3936b9fd6ebe4

commit 1c737a373c237cfddeb684ee30d3936b9fd6ebe4
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Wed Jan 30 16:16:13 2013 -0600

    Simplify get completion
    
    We can simplify completion of get operations at the target, since
    Finish_rma_op should now be called unconditionally.  Flags are now used
    to determing whether an acknowledgement should be sent, so the
    source_window_handle check is removed.
    
    Reviewer: goodell

diff --git a/src/mpid/ch3/src/ch3u_handle_send_req.c b/src/mpid/ch3/src/ch3u_handle_send_req.c
index b749d1d..47026d5 100644
--- a/src/mpid/ch3/src/ch3u_handle_send_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_send_req.c
@@ -48,15 +48,12 @@ int MPIDI_CH3_ReqHandler_GetSendRespComplete( MPIDI_VC_t *vc ATTRIBUTE((unused))
 					      int *complete )
 {
     int mpi_errno = MPI_SUCCESS;
+    MPID_Win *win_ptr;
 
-    /* FIXME: Should this test be an MPIU_Assert? */
-    if (sreq->dev.source_win_handle != MPI_WIN_NULL) {
-	MPID_Win *win_ptr;
-	MPID_Win_get_ptr(sreq->dev.target_win_handle, win_ptr);
+    MPID_Win_get_ptr(sreq->dev.target_win_handle, win_ptr);
 
-        mpi_errno = MPIDI_CH3_Finish_rma_op_target(NULL, win_ptr, FALSE, sreq->dev.flags, MPI_WIN_NULL);
-        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-    }
+    mpi_errno = MPIDI_CH3_Finish_rma_op_target(NULL, win_ptr, FALSE, sreq->dev.flags, MPI_WIN_NULL);
+    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
     /* mark data transfer as complete and decrement CC */
     MPIDI_CH3U_Request_complete(sreq);

http://git.mpich.org/mpich.git/commitdiff/4e67607f342ce4f8591c1ac95166153df72d02c0

commit 4e67607f342ce4f8591c1ac95166153df72d02c0
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Wed Jan 30 12:56:13 2013 -0600

    RMA sync. piggybacking from origin->target
    
    This patch uses packet header flags to piggyback the unlock operation on other
    RMA operations.  For most operations, there is no net change.  However, FOP and
    GACC, unlock piggybacking was previously not implemented.
    
    Reviewer: goodell

diff --git a/src/mpid/ch3/include/mpidimpl.h b/src/mpid/ch3/include/mpidimpl.h
index c79861a..0185480 100644
--- a/src/mpid/ch3/include/mpidimpl.h
+++ b/src/mpid/ch3/include/mpidimpl.h
@@ -1215,7 +1215,7 @@ int MPIDI_CH3I_Try_acquire_win_lock(MPID_Win * win_ptr, int requested_lock);
 int MPIDI_CH3I_Send_lock_granted_pkt(MPIDI_VC_t * vc, MPID_Win *win_ptr, int source_win_hdl);
 int MPIDI_CH3I_Send_pt_rma_done_pkt(MPIDI_VC_t * vc, MPID_Win *win_ptr, int source_win_hdl);
 int MPIDI_CH3_Finish_rma_op_target(MPIDI_VC_t *vc, MPID_Win *win_ptr, int is_rma_update,
-                                   int end_epoch, MPI_Win source_win_handle, int force_done_pkt);
+                                   MPIDI_CH3_Pkt_flags_t flags, MPI_Win source_win_handle);
 
 #define MPIDI_CH3I_DATATYPE_IS_PREDEFINED(type, predefined) \
     if ((HANDLE_GET_KIND(type) == HANDLE_KIND_BUILTIN) || \
diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index f1b2c2a..7d47874 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -90,7 +90,6 @@ enum MPIDI_CH3_Pkt_types
     MPIDI_CH3_PKT_ACCUM_IMMED,     /* optimization for short accumulate */
     /* FIXME: Add PUT, GET_IMMED packet types */
     MPIDI_CH3_PKT_CAS,
-    MPIDI_CH3_PKT_CAS_UNLOCK,
     MPIDI_CH3_PKT_CAS_RESP,
     MPIDI_CH3_PKT_FOP,
     MPIDI_CH3_PKT_FOP_RESP,
@@ -298,10 +297,6 @@ typedef struct MPIDI_CH3_Pkt_cas
                                 * epoch for decrementing rma op counter in
                                 * active target rma and for unlocking window 
                                 * in passive target rma. Otherwise set to NULL*/
-
-                               /* source_win_handle is omitted here to reduce
-                                * the packet size.  If this is the last CAS
-                                * packet, the type will be set to CAS_UNLOCK */
     MPIDI_CH3_CAS_Immed_u origin_data;
     MPIDI_CH3_CAS_Immed_u compare_data;
 }
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index ad8789d..26d1b17 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -91,6 +91,7 @@ typedef struct MPIDI_PT_single_op {
     void *data;  /* for queued puts and accumulates, data is copied here */
     MPI_Request request_handle;  /* for gets */
     int data_recd;  /* to indicate if the data has been received */
+    MPIDI_CH3_Pkt_flags_t flags;
 } MPIDI_PT_single_op;
 
 typedef struct MPIDI_Win_lock_queue {
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_pkt.c b/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
index 8ea67a8..8934f9e 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
@@ -598,8 +598,6 @@ int MPIDI_CH3_PktHandler_Init( MPIDI_CH3_PktHandler_Fcn *pktArray[],
 	MPIDI_CH3_PktHandler_Accumulate_Immed;
     pktArray[MPIDI_CH3_PKT_CAS] =
         MPIDI_CH3_PktHandler_CAS;
-    pktArray[MPIDI_CH3_PKT_CAS_UNLOCK] =
-        MPIDI_CH3_PktHandler_CAS;
     pktArray[MPIDI_CH3_PKT_CAS_RESP] =
         MPIDI_CH3_PktHandler_CASResp;
     pktArray[MPIDI_CH3_PKT_FOP] =
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index 66ca1e0..82011a8 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -152,8 +152,8 @@ int MPIDI_CH3_ReqHandler_PutAccumRespComplete( MPIDI_VC_t *vc,
     
     MPID_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);
     
-    mpi_errno = MPIDI_CH3_Finish_rma_op_target(vc, win_ptr, TRUE, rreq->dev.source_win_handle != MPI_WIN_NULL,
-                                               rreq->dev.source_win_handle, rreq->dev.single_op_opt);
+    mpi_errno = MPIDI_CH3_Finish_rma_op_target(vc, win_ptr, TRUE, rreq->dev.flags,
+                                               rreq->dev.source_win_handle);
     if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
     /* mark data transfer as complete and decrement CC */
@@ -353,6 +353,7 @@ int MPIDI_CH3_ReqHandler_GetRespDerivedDTComplete( MPIDI_VC_t *vc,
     sreq->dev.datatype_ptr = new_dtp;
     sreq->dev.target_win_handle = rreq->dev.target_win_handle;
     sreq->dev.source_win_handle = rreq->dev.source_win_handle;
+    sreq->dev.flags = rreq->dev.flags;
     
     MPIDI_Pkt_init(get_resp_pkt, MPIDI_CH3_PKT_GET_RESP);
     get_resp_pkt->request_handle = rreq->dev.request_handle;    
@@ -418,7 +419,6 @@ int MPIDI_CH3_ReqHandler_SinglePutAccumComplete( MPIDI_VC_t *vc,
     if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, 
 					lock_queue_entry->lock_type) == 1)
     {
-        MPI_Win source_win_handle = lock_queue_entry->source_win_handle;
 	
 	if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_PT_SINGLE_PUT) {
 	    /* copy the data over */
@@ -447,11 +447,14 @@ int MPIDI_CH3_ReqHandler_SinglePutAccumComplete( MPIDI_VC_t *vc,
 	}                    
 	*curr_ptr_ptr = curr_ptr->next;
 	
+        mpi_errno = MPIDI_CH3_Finish_rma_op_target(vc, win_ptr, TRUE,
+                                                   lock_queue_entry->pt_single_op->flags,
+                                                   lock_queue_entry->source_win_handle);
+
 	MPIU_Free(lock_queue_entry->pt_single_op->data);
 	MPIU_Free(lock_queue_entry->pt_single_op);
 	MPIU_Free(lock_queue_entry);
 	
-        mpi_errno = MPIDI_CH3_Finish_rma_op_target(vc, win_ptr, TRUE, TRUE, source_win_handle, TRUE);
         if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
     }
     else {
@@ -551,8 +554,8 @@ int MPIDI_CH3_ReqHandler_FOPComplete( MPIDI_VC_t *vc,
 
     MPID_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);
 
-    mpi_errno = MPIDI_CH3_Finish_rma_op_target(vc, win_ptr, TRUE, rreq->dev.source_win_handle != MPI_WIN_NULL,
-                                               rreq->dev.source_win_handle, FALSE);
+    mpi_errno = MPIDI_CH3_Finish_rma_op_target(vc, win_ptr, TRUE, rreq->dev.flags,
+                                               rreq->dev.source_win_handle);
     if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
     *complete = 1;
@@ -1163,6 +1166,7 @@ static int do_simple_get(MPID_Win *win_ptr, MPIDI_Win_lock_queue *lock_queue)
     }
     req->dev.target_win_handle = win_ptr->handle;
     req->dev.source_win_handle = lock_queue->source_win_handle;
+    req->dev.flags = lock_queue->pt_single_op->flags;
     req->dev.single_op_opt = 1;
     
     MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_GET_RESP); 
diff --git a/src/mpid/ch3/src/ch3u_handle_send_req.c b/src/mpid/ch3/src/ch3u_handle_send_req.c
index 9d101e9..b749d1d 100644
--- a/src/mpid/ch3/src/ch3u_handle_send_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_send_req.c
@@ -52,15 +52,9 @@ int MPIDI_CH3_ReqHandler_GetSendRespComplete( MPIDI_VC_t *vc ATTRIBUTE((unused))
     /* FIXME: Should this test be an MPIU_Assert? */
     if (sreq->dev.source_win_handle != MPI_WIN_NULL) {
 	MPID_Win *win_ptr;
-	/* Last RMA operation (get) from source. If active target RMA,
-	   decrement window counter. If passive target RMA, 
-	   release lock on window and grant next lock in the 
-	   lock queue if there is any; no need to send rma done 
-	   packet since the last operation is a get. */
-	
 	MPID_Win_get_ptr(sreq->dev.target_win_handle, win_ptr);
 
-        mpi_errno = MPIDI_CH3_Finish_rma_op_target(vc, win_ptr, FALSE, TRUE, MPI_WIN_NULL, FALSE);
+        mpi_errno = MPIDI_CH3_Finish_rma_op_target(NULL, win_ptr, FALSE, sreq->dev.flags, MPI_WIN_NULL);
         if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
     }
 
diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index b230ec1..1f89383 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -347,20 +347,20 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
         curr_ptr = MPIDI_CH3I_RMA_Ops_head(ops_list);
 	while (curr_ptr != NULL)
 	{
+            MPIDI_CH3_Pkt_flags_t flags = MPIDI_CH3_PKT_FLAG_NONE;
+
 	    /* The completion counter at the target is decremented only on 
-	       the last RMA operation. We indicate the last operation by 
-	       passing the source_win_handle only on the last operation. 
-	       Otherwise, we pass NULL */
+	       the last RMA operation. */
 	    if (curr_ops_cnt[curr_ptr->target_rank] ==
-		nops_to_proc[curr_ptr->target_rank] - 1) 
-		source_win_handle = win_ptr->handle;
-	    else 
-		source_win_handle = MPI_WIN_NULL;
-	    
+                nops_to_proc[curr_ptr->target_rank] - 1) {
+                flags = MPIDI_CH3_PKT_FLAG_RMA_AT_COMPLETE;
+            }
+
+            source_win_handle = win_ptr->handle;
 	    target_win_handle = win_ptr->all_win_handles[curr_ptr->target_rank];
 
 #define MPIDI_CH3I_TRACK_RMA_WRITE(op_ptr_, win_ptr_) /* Not used by active mode */
-            MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, MPIDI_CH3_PKT_FLAG_NONE,
+            MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, flags,
                                     source_win_handle, target_win_handle, mpi_errno);
 #undef MPIDI_CH3I_TRACK_RMA_WRITE
 
@@ -1012,13 +1012,7 @@ static int MPIDI_CH3I_Send_immed_rmw_msg(MPIDI_RMA_Op_t *rma_op,
 
         MPIU_Assert(len <= sizeof(MPIDI_CH3_CAS_Immed_u));
 
-        /* If this is the last operation, it also unlocks the window 
-           at the target. */
-        if (source_win_handle != MPI_WIN_NULL) {
-            MPIDI_Pkt_init(cas_pkt, MPIDI_CH3_PKT_CAS_UNLOCK);
-        } else {
-            MPIDI_Pkt_init(cas_pkt, MPIDI_CH3_PKT_CAS);
-        }
+        MPIDI_Pkt_init(cas_pkt, MPIDI_CH3_PKT_CAS);
 
         cas_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
             win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
@@ -1640,20 +1634,20 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
     curr_ptr = MPIDI_CH3I_RMA_Ops_head(ops_list);
     while (curr_ptr != NULL)
     {
+        MPIDI_CH3_Pkt_flags_t flags = MPIDI_CH3_PKT_FLAG_NONE;
+
 	/* The completion counter at the target is decremented only on 
-	   the last RMA operation. We indicate the last operation by 
-	   passing the source_win_handle only on the last operation. 
-	   Otherwise, we pass NULL */
+	   the last RMA operation. */
 	if (curr_ops_cnt[curr_ptr->target_rank] ==
-	    nops_to_proc[curr_ptr->target_rank] - 1) 
-	    source_win_handle = win_ptr->handle;
-	else 
-	    source_win_handle = MPI_WIN_NULL;
-	
+	    nops_to_proc[curr_ptr->target_rank] - 1) {
+            flags = MPIDI_CH3_PKT_FLAG_RMA_AT_COMPLETE;
+        }
+
+        source_win_handle = win_ptr->handle;
 	target_win_handle = win_ptr->all_win_handles[curr_ptr->target_rank];
 
 #define MPIDI_CH3I_TRACK_RMA_WRITE(op_ptr_, win_ptr_) /* Not used by active mode */
-            MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, MPIDI_CH3_PKT_FLAG_NONE,
+            MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, flags,
                                     source_win_handle, target_win_handle, mpi_errno);
 #undef MPIDI_CH3I_TRACK_RMA_WRITE
 
@@ -1700,7 +1694,7 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
 	    MPID_Request *request;
 	    
 	    MPIDI_Pkt_init(put_pkt, MPIDI_CH3_PKT_PUT);
-            put_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
+            put_pkt->flags = MPIDI_CH3_PKT_FLAG_RMA_AT_COMPLETE;
 	    put_pkt->addr = NULL;
 	    put_pkt->count = 0;
 	    put_pkt->datatype = MPI_INT;
@@ -2572,18 +2566,23 @@ static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
     {
         MPIDI_CH3_Pkt_flags_t flags = MPIDI_CH3_PKT_FLAG_NONE;
 
-        /* To indicate the last RMA operation, we pass the
-           source_win_handle only on the last operation. Otherwise, 
-           we pass MPI_WIN_NULL. */
-
         /* Assertion: (curr_ptr != NULL) => (nops > 0) */
         MPIU_Assert(nops > 0);
         MPIU_Assert(curr_ptr->target_rank == target_rank);
 
-        if (curr_ptr->next == NULL && unlock_target)
-            source_win_handle = win_ptr->handle;
-        else 
-            source_win_handle = MPI_WIN_NULL;
+        if (curr_ptr->next == NULL) {
+            if (unlock_target) {
+                flags = MPIDI_CH3_PKT_FLAG_RMA_UNLOCK;
+
+                /* Inform the target that we want an acknowledgement when the
+                 * unlock has completed. */
+                if (*wait_for_rma_done_pkt) {
+                    flags |= MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK;
+                }
+            }
+        }
+
+        source_win_handle = win_ptr->handle;
 
         /* Track passive target write operations.  This is used during Win_free
          * to ensure that all writes to a given target have completed at that
@@ -2895,6 +2894,8 @@ static int MPIDI_CH3I_Send_lock_put_or_acc(MPID_Win *win_ptr, int target_rank)
 
     if (rma_op->type == MPIDI_RMA_PUT) {
         MPIDI_Pkt_init(lock_put_unlock_pkt, MPIDI_CH3_PKT_LOCK_PUT_UNLOCK);
+        lock_put_unlock_pkt->flags = MPIDI_CH3_PKT_FLAG_RMA_LOCK |
+            MPIDI_CH3_PKT_FLAG_RMA_UNLOCK | MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK;
         lock_put_unlock_pkt->target_win_handle = 
             win_ptr->all_win_handles[rma_op->target_rank];
         lock_put_unlock_pkt->source_win_handle = win_ptr->handle;
@@ -2913,6 +2914,8 @@ static int MPIDI_CH3I_Send_lock_put_or_acc(MPID_Win *win_ptr, int target_rank)
     
     else if (rma_op->type == MPIDI_RMA_ACCUMULATE) {        
         MPIDI_Pkt_init(lock_accum_unlock_pkt, MPIDI_CH3_PKT_LOCK_ACCUM_UNLOCK);
+        lock_accum_unlock_pkt->flags = MPIDI_CH3_PKT_FLAG_RMA_LOCK |
+            MPIDI_CH3_PKT_FLAG_RMA_UNLOCK | MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK;
         lock_accum_unlock_pkt->target_win_handle = 
             win_ptr->all_win_handles[rma_op->target_rank];
         lock_accum_unlock_pkt->source_win_handle = win_ptr->handle;
@@ -2931,6 +2934,8 @@ static int MPIDI_CH3I_Send_lock_put_or_acc(MPID_Win *win_ptr, int target_rank)
     }
     else if (rma_op->type == MPIDI_RMA_ACC_CONTIG) {
         MPIDI_Pkt_init(lock_accum_unlock_pkt, MPIDI_CH3_PKT_LOCK_ACCUM_UNLOCK);
+        lock_accum_unlock_pkt->flags = MPIDI_CH3_PKT_FLAG_RMA_LOCK |
+            MPIDI_CH3_PKT_FLAG_RMA_UNLOCK | MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK;
         lock_accum_unlock_pkt->target_win_handle = 
             win_ptr->all_win_handles[rma_op->target_rank];
         lock_accum_unlock_pkt->source_win_handle = win_ptr->handle;
@@ -3115,6 +3120,8 @@ static int MPIDI_CH3I_Send_lock_get(MPID_Win *win_ptr, int target_rank)
     }
 
     MPIDI_Pkt_init(lock_get_unlock_pkt, MPIDI_CH3_PKT_LOCK_GET_UNLOCK);
+    lock_get_unlock_pkt->flags = MPIDI_CH3_PKT_FLAG_RMA_LOCK |
+        MPIDI_CH3_PKT_FLAG_RMA_UNLOCK; /* FIXME | MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK; */
     lock_get_unlock_pkt->target_win_handle = 
         win_ptr->all_win_handles[rma_op->target_rank];
     lock_get_unlock_pkt->source_win_handle = win_ptr->handle;
@@ -3267,7 +3274,7 @@ int MPIDI_CH3_PktHandler_Put( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 	   to check lock queue. */
 	if (put_pkt->target_win_handle != MPI_WIN_NULL) {
             MPID_Win_get_ptr(put_pkt->target_win_handle, win_ptr);
-            mpi_errno = MPIDI_CH3_Finish_rma_op_target(NULL, win_ptr, TRUE, TRUE, MPI_WIN_NULL, FALSE);
+            mpi_errno = MPIDI_CH3_Finish_rma_op_target(NULL, win_ptr, TRUE, put_pkt->flags, MPI_WIN_NULL);
             if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 	}
         *buflen = sizeof(MPIDI_CH3_Pkt_t);
@@ -3285,6 +3292,7 @@ int MPIDI_CH3_PktHandler_Put( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     req->dev.user_count = put_pkt->count;
     req->dev.target_win_handle = put_pkt->target_win_handle;
     req->dev.source_win_handle = put_pkt->source_win_handle;
+    req->dev.flags = put_pkt->flags;
 	
     MPIDI_CH3I_DATATYPE_IS_PREDEFINED(put_pkt->datatype, predefined);
     if (predefined)
@@ -3426,6 +3434,7 @@ int MPIDI_CH3_PktHandler_Get( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     req = MPID_Request_create();
     req->dev.target_win_handle = get_pkt->target_win_handle;
     req->dev.source_win_handle = get_pkt->source_win_handle;
+    req->dev.flags = get_pkt->flags;
     
     MPIDI_CH3I_DATATYPE_IS_PREDEFINED(get_pkt->datatype, predefined);
     if (predefined)
@@ -3562,6 +3571,7 @@ int MPIDI_CH3_PktHandler_Accumulate( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     req->dev.real_user_buf = accum_pkt->addr;
     req->dev.target_win_handle = accum_pkt->target_win_handle;
     req->dev.source_win_handle = accum_pkt->source_win_handle;
+    req->dev.flags = accum_pkt->flags;
 
     if (accum_pkt->type == MPIDI_CH3_PKT_GET_ACCUM) {
         req->dev.resp_request_handle = accum_pkt->request_handle;
@@ -3755,8 +3765,8 @@ int MPIDI_CH3_PktHandler_Accumulate_Immed( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 	MPID_Win_get_ptr(accum_pkt->target_win_handle, win_ptr);
 
         mpi_errno = MPIDI_CH3_Finish_rma_op_target(vc, win_ptr, TRUE,
-                                                   accum_pkt->source_win_handle != MPI_WIN_NULL,
-                                                   accum_pkt->source_win_handle, FALSE);
+                                                   accum_pkt->flags,
+                                                   accum_pkt->source_win_handle);
         if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
     }
 
@@ -3826,8 +3836,8 @@ int MPIDI_CH3_PktHandler_CAS( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     MPID_Win_get_ptr(cas_pkt->target_win_handle, win_ptr);
 
     mpi_errno = MPIDI_CH3_Finish_rma_op_target(NULL, win_ptr, TRUE,
-                                               pkt->type == MPIDI_CH3_PKT_CAS_UNLOCK,
-                                               MPI_WIN_NULL, FALSE);
+                                               cas_pkt->flags,
+                                               MPI_WIN_NULL);
     if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
 fn_exit:
@@ -3907,6 +3917,7 @@ int MPIDI_CH3_PktHandler_FOP( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     req->dev.target_win_handle = fop_pkt->target_win_handle;
     req->dev.source_win_handle = fop_pkt->source_win_handle;
     req->dev.request_handle = fop_pkt->request_handle;
+    req->dev.flags = fop_pkt->flags;
 
     MPID_Datatype_get_size_macro(req->dev.datatype, len);
     MPIU_Assert(len <= sizeof(MPIDI_CH3_FOP_Immed_u));
@@ -4197,6 +4208,7 @@ int MPIDI_CH3_PktHandler_LockPutUnlock( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 	req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_PutAccumRespComplete;
 	req->dev.user_buf = lock_put_unlock_pkt->addr;
 	req->dev.source_win_handle = lock_put_unlock_pkt->source_win_handle;
+        req->dev.flags = lock_put_unlock_pkt->flags;
 	req->dev.single_op_opt = 1;
     }
     
@@ -4239,6 +4251,7 @@ int MPIDI_CH3_PktHandler_LockPutUnlock( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 	new_ptr->vc = vc;
 	
 	new_ptr->pt_single_op->type = MPIDI_RMA_PUT;
+	new_ptr->pt_single_op->flags = lock_put_unlock_pkt->flags;
 	new_ptr->pt_single_op->addr = lock_put_unlock_pkt->addr;
 	new_ptr->pt_single_op->count = lock_put_unlock_pkt->count;
 	new_ptr->pt_single_op->datatype = lock_put_unlock_pkt->datatype;
@@ -4335,6 +4348,7 @@ int MPIDI_CH3_PktHandler_LockGetUnlock( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 	req->dev.target_win_handle = lock_get_unlock_pkt->target_win_handle;
 	req->dev.source_win_handle = lock_get_unlock_pkt->source_win_handle;
 	req->dev.single_op_opt = 1;
+        req->dev.flags = lock_get_unlock_pkt->flags;
 	
 	MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_GET_RESP); 
 	req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GetSendRespComplete;
@@ -4400,6 +4414,7 @@ int MPIDI_CH3_PktHandler_LockGetUnlock( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 	new_ptr->vc = vc;
 	
 	new_ptr->pt_single_op->type = MPIDI_RMA_GET;
+	new_ptr->pt_single_op->flags = lock_get_unlock_pkt->flags;
 	new_ptr->pt_single_op->addr = lock_get_unlock_pkt->addr;
 	new_ptr->pt_single_op->count = lock_get_unlock_pkt->count;
 	new_ptr->pt_single_op->datatype = lock_get_unlock_pkt->datatype;
@@ -4453,6 +4468,7 @@ int MPIDI_CH3_PktHandler_LockAccumUnlock( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     req->dev.recv_data_sz = type_size * lock_accum_unlock_pkt->count;
     req->dev.user_count = lock_accum_unlock_pkt->count;
     req->dev.target_win_handle = lock_accum_unlock_pkt->target_win_handle;
+    req->dev.flags = lock_accum_unlock_pkt->flags;
     
     /* queue the information */
     
@@ -4493,6 +4509,7 @@ int MPIDI_CH3_PktHandler_LockAccumUnlock( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     new_ptr->vc = vc;
     
     new_ptr->pt_single_op->type = MPIDI_RMA_ACCUMULATE;
+    new_ptr->pt_single_op->flags = lock_accum_unlock_pkt->flags;
     new_ptr->pt_single_op->addr = lock_accum_unlock_pkt->addr;
     new_ptr->pt_single_op->count = lock_accum_unlock_pkt->count;
     new_ptr->pt_single_op->datatype = lock_accum_unlock_pkt->datatype;
@@ -4900,7 +4917,7 @@ static int MPIDI_CH3I_RMAListPartialComplete( MPID_Win *win_ptr,
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 int MPIDI_CH3_Finish_rma_op_target(MPIDI_VC_t *vc, MPID_Win *win_ptr, int is_rma_update,
-                                   int end_epoch, MPI_Win source_win_handle, int force_done_pkt)
+                                   MPIDI_CH3_Pkt_flags_t flags, MPI_Win source_win_handle)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_FINISH_RMA_OP_TARGET);
@@ -4916,40 +4933,37 @@ int MPIDI_CH3_Finish_rma_op_target(MPIDI_VC_t *vc, MPID_Win *win_ptr, int is_rma
     if (win_ptr->current_lock_type != MPID_LOCK_NONE && is_rma_update)
         win_ptr->my_pt_rma_puts_accs++;
 
-    if (end_epoch) {
-        /* Last RMA operation from source. If active target RMA, decrement
-           window counter. If passive target RMA, release lock on window and
-           grant next lock in the lock queue if there is any. If it's a shared
-           lock or a lock-put-unlock type of optimization, we also need to send
-           an ack to the source. */
-
-        if (win_ptr->current_lock_type == MPID_LOCK_NONE) {
-            /* FIXME: MT: Accesses to my_counter should be done atomically */
-            win_ptr->my_counter -= 1;
-
-            /* Active target: Signal the local process when the op counter
-             * reaches 0. */
-            if (win_ptr->my_counter == 0)
-                MPIDI_CH3_Progress_signal_completion();
-        }
-        else {
-            if (source_win_handle != MPI_WIN_NULL &&
-                ((win_ptr->current_lock_type == MPI_LOCK_SHARED || force_done_pkt)))
-            {
-                mpi_errno = MPIDI_CH3I_Send_pt_rma_done_pkt(vc, win_ptr, source_win_handle);
-                if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-            }
+    /* Last RMA operation from source. If active target RMA, decrement window
+       counter. */
+    if (flags & MPIDI_CH3_PKT_FLAG_RMA_AT_COMPLETE) {
+        MPIU_Assert(win_ptr->current_lock_type == MPID_LOCK_NONE);
 
-            if (end_epoch) {
-                mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
-                if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+        /* FIXME: MT: Accesses to my_counter should be done atomically */
+        win_ptr->my_counter -= 1;
 
-                /* Passive target: The local process may be waiting for the
-                 * lock.  Signal completion to wake it up, so it can attempt to
-                 * grab the lock. */
-                MPIDI_CH3_Progress_signal_completion();
-            }
+        /* Signal the local process when the op counter reaches 0. */
+        if (win_ptr->my_counter == 0)
+            MPIDI_CH3_Progress_signal_completion();
+    }
+
+    /* If passive target RMA, release lock on window and grant next lock in the
+       lock queue if there is any.  If requested by the origin, send an ack back
+       to indicate completion at the target. */
+    else if (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK) {
+        MPIU_Assert(win_ptr->current_lock_type != MPID_LOCK_NONE);
+
+        if (flags & MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK) {
+            MPIU_Assert(source_win_handle != MPI_WIN_NULL);
+            mpi_errno = MPIDI_CH3I_Send_pt_rma_done_pkt(vc, win_ptr, source_win_handle);
+            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
         }
+
+        mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
+        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+
+        /* The local process may be waiting for the lock.  Signal completion to
+           wake it up, so it can attempt to grab the lock. */
+        MPIDI_CH3_Progress_signal_completion();
     }
 
 fn_exit:

http://git.mpich.org/mpich.git/commitdiff/90be9ee1506481dcab84b89ccb8b0dc186b8b37b

commit 90be9ee1506481dcab84b89ccb8b0dc186b8b37b
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Tue Jan 29 13:49:58 2013 -0600

    Added flags to MPID_Request
    
    Added a flags field to MPID_Request that we can use to stash flags from
    suspended RMA ops and retrieve them later when we complete the operation.
    
    Reviewer: goodell

diff --git a/src/mpid/ch3/include/mpidpre.h b/src/mpid/ch3/include/mpidpre.h
index de73708..df567ff 100644
--- a/src/mpid/ch3/include/mpidpre.h
+++ b/src/mpid/ch3/include/mpidpre.h
@@ -381,6 +381,7 @@ typedef struct MPIDI_Request {
     MPI_Win     target_win_handle;
     MPI_Win     source_win_handle;
     int single_op_opt;   /* to indicate a lock-put-unlock optimization case */
+    MPIDI_CH3_Pkt_flags_t flags; /* flags that were included in the original RMA packet header */
     struct MPIDI_Win_lock_queue *lock_queue_entry; /* for single lock-put-unlock optimization */
     MPI_Request resp_request_handle; /* Handle for get_accumulate response */
 
diff --git a/src/mpid/ch3/src/ch3u_request.c b/src/mpid/ch3/src/ch3u_request.c
index d52eb82..2921fae 100644
--- a/src/mpid/ch3/src/ch3u_request.c
+++ b/src/mpid/ch3/src/ch3u_request.c
@@ -87,6 +87,7 @@ MPID_Request * MPID_Request_create(void)
 	req->dev.dtype_info	   = NULL;
 	req->dev.dataloop	   = NULL;
 	req->dev.iov_offset        = 0;
+        req->dev.flags             = MPIDI_CH3_PKT_FLAG_NONE;
         req->dev.resp_request_handle = MPI_REQUEST_NULL;
 #ifdef MPIDI_CH3_REQUEST_INIT
 	MPIDI_CH3_REQUEST_INIT(req);

http://git.mpich.org/mpich.git/commitdiff/478a80f5f7311f087e8eb69dd1dd65f83c5a3d0a

commit 478a80f5f7311f087e8eb69dd1dd65f83c5a3d0a
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Thu Jan 24 17:19:07 2013 -0600

    Integrated flags into RMA op processing
    
    This change extends RMA op processing to pass around flags as needed.  It
    doesn't yet utilize the flags.
    
    Reviewer: goodell

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 96c8469..b230ec1 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -102,18 +102,22 @@ static int MPIDI_CH3I_Send_flush_msg(int dest, MPID_Win *win_ptr);
 static int MPIDI_CH3I_Wait_for_lock_granted(MPID_Win *win_ptr, int target_rank);
 static int MPIDI_CH3I_Acquire_local_lock(MPID_Win *win_ptr, int lock_mode);
 static int MPIDI_CH3I_Send_rma_msg(MPIDI_RMA_Op_t * rma_op, MPID_Win * win_ptr,
+                                   MPIDI_CH3_Pkt_flags_t flags,
 				   MPI_Win source_win_handle, 
 				   MPI_Win target_win_handle, 
 				   MPIDI_RMA_dtype_info * dtype_info, 
 				   void ** dataloop, MPID_Request ** request);
 static int MPIDI_CH3I_Recv_rma_msg(MPIDI_RMA_Op_t * rma_op, MPID_Win * win_ptr,
+                                   MPIDI_CH3_Pkt_flags_t flags,
 				   MPI_Win source_win_handle, 
 				   MPI_Win target_win_handle, 
 				   MPIDI_RMA_dtype_info * dtype_info, 
 				   void ** dataloop, MPID_Request ** request); 
 static int MPIDI_CH3I_Send_contig_acc_msg(MPIDI_RMA_Op_t *, MPID_Win *,
+                                          MPIDI_CH3_Pkt_flags_t flags,
 					  MPI_Win, MPI_Win, MPID_Request ** );
 static int MPIDI_CH3I_Send_immed_rmw_msg(MPIDI_RMA_Op_t *, MPID_Win *,
+                                         MPIDI_CH3_Pkt_flags_t flags,
                                          MPI_Win, MPI_Win, MPID_Request ** );
 static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
                                             int *wait_for_rma_done_pkt,
@@ -135,14 +139,14 @@ static int create_datatype(const MPIDI_RMA_dtype_info *dtype_info,
 /* Issue an RMA operation -- Before calling this macro, you must define the
  * MPIDI_CH3I_TRACK_RMA_WRITE helper macro.  This macro defines any extra action
  * that should be taken when a write (put/acc) operation is encountered. */
-#define MPIDI_CH3I_ISSUE_RMA_OP(op_ptr_, win_ptr_, source_win_handle_, target_win_handle_,err_) \
+#define MPIDI_CH3I_ISSUE_RMA_OP(op_ptr_, win_ptr_, flags_, source_win_handle_, target_win_handle_,err_) \
     do {                                                                                        \
     switch ((op_ptr_)->type)                                                                    \
     {                                                                                           \
         case (MPIDI_RMA_PUT):                                                                   \
         case (MPIDI_RMA_ACCUMULATE):                                                            \
             MPIDI_CH3I_TRACK_RMA_WRITE(op_ptr_, win_ptr_);                                      \
-            (err_) = MPIDI_CH3I_Send_rma_msg((op_ptr_), (win_ptr_), (source_win_handle_),       \
+            (err_) = MPIDI_CH3I_Send_rma_msg((op_ptr_), (win_ptr_), (flags_), (source_win_handle_), \
                                                 (target_win_handle_), &(op_ptr_)->dtype_info,   \
                                                 &(op_ptr_)->dataloop, &(op_ptr_)->request);     \
             if (err_) { MPIU_ERR_POP(err_); }                                                   \
@@ -158,12 +162,12 @@ static int create_datatype(const MPIDI_RMA_dtype_info *dtype_info,
                 (op_ptr_)->origin_count    = (op_ptr_)->result_count;                           \
                 (op_ptr_)->origin_datatype = (op_ptr_)->result_datatype;                        \
                                                                                                 \
-                (err_) = MPIDI_CH3I_Recv_rma_msg((op_ptr_), (win_ptr_), (source_win_handle_),   \
+                (err_) = MPIDI_CH3I_Recv_rma_msg((op_ptr_), (win_ptr_), (flags_), (source_win_handle_), \
                                                     (target_win_handle_), &(op_ptr_)->dtype_info,\
                                                     &(op_ptr_)->dataloop, &(op_ptr_)->request); \
             } else {                                                                            \
                 MPIDI_CH3I_TRACK_RMA_WRITE(op_ptr_, win_ptr_);                                  \
-                (err_) = MPIDI_CH3I_Send_rma_msg((op_ptr_), (win_ptr_), (source_win_handle_),   \
+                (err_) = MPIDI_CH3I_Send_rma_msg((op_ptr_), (win_ptr_), (flags_), (source_win_handle_), \
                                                     (target_win_handle_), &(op_ptr_)->dtype_info,\
                                                     &(op_ptr_)->dataloop, &(op_ptr_)->request); \
             }                                                                                   \
@@ -171,13 +175,13 @@ static int create_datatype(const MPIDI_RMA_dtype_info *dtype_info,
             break;                                                                              \
         case MPIDI_RMA_ACC_CONTIG:                                                              \
             MPIDI_CH3I_TRACK_RMA_WRITE(op_ptr_, win_ptr_);                                      \
-            (err_) = MPIDI_CH3I_Send_contig_acc_msg((op_ptr_), (win_ptr_),                      \
+            (err_) = MPIDI_CH3I_Send_contig_acc_msg((op_ptr_), (win_ptr_), (flags_),            \
                                                        (source_win_handle_), (target_win_handle_),\
                                                        &(op_ptr_)->request );                   \
             if (err_) { MPIU_ERR_POP(err_); }                                                   \
             break;                                                                              \
         case (MPIDI_RMA_GET):                                                                   \
-            (err_) = MPIDI_CH3I_Recv_rma_msg((op_ptr_), (win_ptr_),                             \
+            (err_) = MPIDI_CH3I_Recv_rma_msg((op_ptr_), (win_ptr_), (flags_),                   \
                                                 (source_win_handle_), (target_win_handle_),     \
                                                 &(op_ptr_)->dtype_info,                         \
                                                 &(op_ptr_)->dataloop, &(op_ptr_)->request);     \
@@ -186,7 +190,7 @@ static int create_datatype(const MPIDI_RMA_dtype_info *dtype_info,
         case (MPIDI_RMA_COMPARE_AND_SWAP):                                                      \
         case (MPIDI_RMA_FETCH_AND_OP):                                                          \
             MPIDI_CH3I_TRACK_RMA_WRITE(op_ptr_, win_ptr_);                                      \
-            (err_) = MPIDI_CH3I_Send_immed_rmw_msg((op_ptr_), (win_ptr_),                       \
+            (err_) = MPIDI_CH3I_Send_immed_rmw_msg((op_ptr_), (win_ptr_), (flags_),             \
                                                       (source_win_handle_), (target_win_handle_),\
                                                       &(op_ptr_)->request );                    \
             if (err_) { MPIU_ERR_POP(err_); }                                                   \
@@ -356,7 +360,8 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
 	    target_win_handle = win_ptr->all_win_handles[curr_ptr->target_rank];
 
 #define MPIDI_CH3I_TRACK_RMA_WRITE(op_ptr_, win_ptr_) /* Not used by active mode */
-            MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, source_win_handle, target_win_handle, mpi_errno);
+            MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, MPIDI_CH3_PKT_FLAG_NONE,
+                                    source_win_handle, target_win_handle, mpi_errno);
 #undef MPIDI_CH3I_TRACK_RMA_WRITE
 
 	    i++;
@@ -531,6 +536,7 @@ static int create_datatype(const MPIDI_RMA_dtype_info *dtype_info,
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 static int MPIDI_CH3I_Send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
+                                   MPIDI_CH3_Pkt_flags_t flags,
 				   MPI_Win source_win_handle, 
 				   MPI_Win target_win_handle, 
 				   MPIDI_RMA_dtype_info *dtype_info, 
@@ -559,7 +565,7 @@ static int MPIDI_CH3I_Send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
         MPIDI_Pkt_init(put_pkt, MPIDI_CH3_PKT_PUT);
         put_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
             win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
-        put_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
+        put_pkt->flags = flags;
         put_pkt->count = rma_op->target_count;
         put_pkt->datatype = rma_op->target_datatype;
         put_pkt->dataloop_size = 0;
@@ -598,7 +604,7 @@ static int MPIDI_CH3I_Send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
         MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_GET_ACCUM);
         accum_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
             win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
-        accum_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
+        accum_pkt->flags = flags;
         accum_pkt->count = rma_op->target_count;
         accum_pkt->datatype = rma_op->target_datatype;
         accum_pkt->dataloop_size = 0;
@@ -615,7 +621,7 @@ static int MPIDI_CH3I_Send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
         MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_ACCUMULATE);
         accum_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
             win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
-        accum_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
+        accum_pkt->flags = flags;
         accum_pkt->count = rma_op->target_count;
         accum_pkt->datatype = rma_op->target_datatype;
         accum_pkt->dataloop_size = 0;
@@ -847,6 +853,7 @@ static int MPIDI_CH3I_Send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 static int MPIDI_CH3I_Send_contig_acc_msg(MPIDI_RMA_Op_t *rma_op,
 					  MPID_Win *win_ptr,
+                                          MPIDI_CH3_Pkt_flags_t flags,
 					  MPI_Win source_win_handle, 
 					  MPI_Win target_win_handle, 
 					  MPID_Request **request) 
@@ -875,7 +882,7 @@ static int MPIDI_CH3I_Send_contig_acc_msg(MPIDI_RMA_Op_t *rma_op,
 	MPIDI_Pkt_init(accumi_pkt, MPIDI_CH3_PKT_ACCUM_IMMED);
 	accumi_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
 	    win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
-        accumi_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
+        accumi_pkt->flags = flags;
 	accumi_pkt->count = rma_op->target_count;
 	accumi_pkt->datatype = rma_op->target_datatype;
 	accumi_pkt->op = rma_op->op;
@@ -902,7 +909,7 @@ static int MPIDI_CH3I_Send_contig_acc_msg(MPIDI_RMA_Op_t *rma_op,
     MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_ACCUMULATE);
     accum_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
 	win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
-    accum_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
+    accum_pkt->flags = flags;
     accum_pkt->count = rma_op->target_count;
     accum_pkt->datatype = rma_op->target_datatype;
     accum_pkt->dataloop_size = 0;
@@ -961,6 +968,7 @@ static int MPIDI_CH3I_Send_contig_acc_msg(MPIDI_RMA_Op_t *rma_op,
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 static int MPIDI_CH3I_Send_immed_rmw_msg(MPIDI_RMA_Op_t *rma_op,
                                          MPID_Win *win_ptr,
+                                         MPIDI_CH3_Pkt_flags_t flags,
                                          MPI_Win source_win_handle, 
                                          MPI_Win target_win_handle, 
                                          MPID_Request **request) 
@@ -1014,7 +1022,7 @@ static int MPIDI_CH3I_Send_immed_rmw_msg(MPIDI_RMA_Op_t *rma_op,
 
         cas_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
             win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
-        cas_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
+        cas_pkt->flags = flags;
         cas_pkt->datatype = rma_op->target_datatype;
         cas_pkt->target_win_handle = target_win_handle;
         cas_pkt->request_handle = resp_req->handle;
@@ -1043,7 +1051,7 @@ static int MPIDI_CH3I_Send_immed_rmw_msg(MPIDI_RMA_Op_t *rma_op,
 
         fop_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
             win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
-        fop_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
+        fop_pkt->flags = flags;
         fop_pkt->datatype = rma_op->target_datatype;
         fop_pkt->target_win_handle = target_win_handle;
         fop_pkt->source_win_handle = source_win_handle;
@@ -1119,6 +1127,7 @@ fn_fail:
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 static int MPIDI_CH3I_Recv_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
+                                   MPIDI_CH3_Pkt_flags_t flags,
 				   MPI_Win source_win_handle, 
 				   MPI_Win target_win_handle, 
 				   MPIDI_RMA_dtype_info *dtype_info, 
@@ -1168,7 +1177,7 @@ static int MPIDI_CH3I_Recv_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
     MPIDI_Pkt_init(get_pkt, MPIDI_CH3_PKT_GET);
     get_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
         win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
-    get_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
+    get_pkt->flags = flags;
     get_pkt->count = rma_op->target_count;
     get_pkt->datatype = rma_op->target_datatype;
     get_pkt->request_handle = req->handle;
@@ -1644,7 +1653,8 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
 	target_win_handle = win_ptr->all_win_handles[curr_ptr->target_rank];
 
 #define MPIDI_CH3I_TRACK_RMA_WRITE(op_ptr_, win_ptr_) /* Not used by active mode */
-            MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, source_win_handle, target_win_handle, mpi_errno);
+            MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, MPIDI_CH3_PKT_FLAG_NONE,
+                                    source_win_handle, target_win_handle, mpi_errno);
 #undef MPIDI_CH3I_TRACK_RMA_WRITE
 
 	i++;
@@ -2560,6 +2570,8 @@ static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
 
     while (curr_ptr != NULL)
     {
+        MPIDI_CH3_Pkt_flags_t flags = MPIDI_CH3_PKT_FLAG_NONE;
+
         /* To indicate the last RMA operation, we pass the
            source_win_handle only on the last operation. Otherwise, 
            we pass MPI_WIN_NULL. */
@@ -2579,7 +2591,8 @@ static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
 #define MPIDI_CH3I_TRACK_RMA_WRITE(op_, win_ptr_) \
         do { (win_ptr_)->pt_rma_puts_accs[(op_)->target_rank]++; } while (0)
 
-        MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, source_win_handle, target_win_handle, mpi_errno);
+        MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, flags, source_win_handle,
+                                target_win_handle, mpi_errno);
 #undef MPIDI_CH3I_TRACK_RMA_WRITE
 
 	/* If the request is null, we can remove it immediately */

http://git.mpich.org/mpich.git/commitdiff/c3f87fe32a5ec9208906d874d2b8563d188a7e71

commit c3f87fe32a5ec9208906d874d2b8563d188a7e71
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Thu Jan 24 17:07:13 2013 -0600

    Added flags field to RMA op packet headers
    
    Added flags field to RMA operation packets that are sent from origin to target.
    This will be used to piggyback RMA synchronization operations.
    
    Reviewer: goodell

diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index 5e63753..f1b2c2a 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -109,6 +109,15 @@ enum MPIDI_CH3_Pkt_types
 };
 
 typedef int16_t MPIDI_CH3_Pkt_type_t;
+typedef uint16_t MPIDI_CH3_Pkt_flags_t;
+
+                                                   /* Flag vector bits:*/
+#define MPIDI_CH3_PKT_FLAG_NONE                 0
+#define MPIDI_CH3_PKT_FLAG_RMA_LOCK             1  /* ...............X */
+#define MPIDI_CH3_PKT_FLAG_RMA_UNLOCK           2  /* ..............X. */
+#define MPIDI_CH3_PKT_FLAG_RMA_FLUSH            4  /* .............X.. */
+#define MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK          8  /* ............X... */
+#define MPIDI_CH3_PKT_FLAG_RMA_AT_COMPLETE      16 /* ...........X.... */
 
 typedef struct MPIDI_CH3_Pkt_send
 {
@@ -189,6 +198,7 @@ MPIDI_CH3_PKT_DEFS
 typedef struct MPIDI_CH3_Pkt_put
 {
     MPIDI_CH3_Pkt_type_t type;
+    MPIDI_CH3_Pkt_flags_t flags;
     void *addr;
     int count;
     MPI_Datatype datatype;
@@ -206,6 +216,7 @@ MPIDI_CH3_Pkt_put_t;
 typedef struct MPIDI_CH3_Pkt_get
 {
     MPIDI_CH3_Pkt_type_t type;
+    MPIDI_CH3_Pkt_flags_t flags;
     void *addr;
     int count;
     MPI_Datatype datatype;
@@ -231,6 +242,7 @@ MPIDI_CH3_Pkt_get_resp_t;
 typedef struct MPIDI_CH3_Pkt_accum
 {
     MPIDI_CH3_Pkt_type_t type;
+    MPIDI_CH3_Pkt_flags_t flags;
     MPI_Request request_handle; /* For get_accumulate response */
     void *addr;
     int count;
@@ -257,6 +269,7 @@ MPIDI_CH3_Pkt_get_accum_resp_t;
 typedef struct MPIDI_CH3_Pkt_accum_immed
 {
     MPIDI_CH3_Pkt_type_t type;
+    MPIDI_CH3_Pkt_flags_t flags;
     void *addr;
     int count;
     /* FIXME: Compress datatype/op into a single word (immedate mode) */
@@ -277,6 +290,7 @@ MPIDI_CH3_Pkt_accum_immed_t;
 typedef struct MPIDI_CH3_Pkt_cas
 {
     MPIDI_CH3_Pkt_type_t type;
+    MPIDI_CH3_Pkt_flags_t flags;
     MPI_Datatype datatype;
     void *addr;
     MPI_Request request_handle;
@@ -304,6 +318,7 @@ MPIDI_CH3_Pkt_cas_resp_t;
 typedef struct MPIDI_CH3_Pkt_fop
 {
     MPIDI_CH3_Pkt_type_t type;
+    MPIDI_CH3_Pkt_flags_t flags;
     MPI_Datatype datatype;
     void *addr;
     MPI_Op op;
@@ -354,6 +369,7 @@ typedef MPIDI_CH3_Pkt_lock_t MPIDI_CH3_Pkt_flush_t;
 typedef struct MPIDI_CH3_Pkt_lock_put_unlock
 {
     MPIDI_CH3_Pkt_type_t type;
+    MPIDI_CH3_Pkt_flags_t flags;
     MPI_Win target_win_handle;
     MPI_Win source_win_handle;
     int lock_type;
@@ -366,6 +382,7 @@ MPIDI_CH3_Pkt_lock_put_unlock_t;
 typedef struct MPIDI_CH3_Pkt_lock_get_unlock
 {
     MPIDI_CH3_Pkt_type_t type;
+    MPIDI_CH3_Pkt_flags_t flags;
     MPI_Win target_win_handle;
     MPI_Win source_win_handle;
     int lock_type;
@@ -379,6 +396,7 @@ MPIDI_CH3_Pkt_lock_get_unlock_t;
 typedef struct MPIDI_CH3_Pkt_lock_accum_unlock
 {
     MPIDI_CH3_Pkt_type_t type;
+    MPIDI_CH3_Pkt_flags_t flags;
     MPI_Win target_win_handle;
     MPI_Win source_win_handle;
     int lock_type;
diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 904dc34..96c8469 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -559,7 +559,7 @@ static int MPIDI_CH3I_Send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
         MPIDI_Pkt_init(put_pkt, MPIDI_CH3_PKT_PUT);
         put_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
             win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
-
+        put_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
         put_pkt->count = rma_op->target_count;
         put_pkt->datatype = rma_op->target_datatype;
         put_pkt->dataloop_size = 0;
@@ -598,6 +598,7 @@ static int MPIDI_CH3I_Send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
         MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_GET_ACCUM);
         accum_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
             win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
+        accum_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
         accum_pkt->count = rma_op->target_count;
         accum_pkt->datatype = rma_op->target_datatype;
         accum_pkt->dataloop_size = 0;
@@ -614,6 +615,7 @@ static int MPIDI_CH3I_Send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
         MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_ACCUMULATE);
         accum_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
             win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
+        accum_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
         accum_pkt->count = rma_op->target_count;
         accum_pkt->datatype = rma_op->target_datatype;
         accum_pkt->dataloop_size = 0;
@@ -873,6 +875,7 @@ static int MPIDI_CH3I_Send_contig_acc_msg(MPIDI_RMA_Op_t *rma_op,
 	MPIDI_Pkt_init(accumi_pkt, MPIDI_CH3_PKT_ACCUM_IMMED);
 	accumi_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
 	    win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
+        accumi_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
 	accumi_pkt->count = rma_op->target_count;
 	accumi_pkt->datatype = rma_op->target_datatype;
 	accumi_pkt->op = rma_op->op;
@@ -899,6 +902,7 @@ static int MPIDI_CH3I_Send_contig_acc_msg(MPIDI_RMA_Op_t *rma_op,
     MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_ACCUMULATE);
     accum_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
 	win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
+    accum_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
     accum_pkt->count = rma_op->target_count;
     accum_pkt->datatype = rma_op->target_datatype;
     accum_pkt->dataloop_size = 0;
@@ -1010,6 +1014,7 @@ static int MPIDI_CH3I_Send_immed_rmw_msg(MPIDI_RMA_Op_t *rma_op,
 
         cas_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
             win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
+        cas_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
         cas_pkt->datatype = rma_op->target_datatype;
         cas_pkt->target_win_handle = target_win_handle;
         cas_pkt->request_handle = resp_req->handle;
@@ -1038,6 +1043,7 @@ static int MPIDI_CH3I_Send_immed_rmw_msg(MPIDI_RMA_Op_t *rma_op,
 
         fop_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
             win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
+        fop_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
         fop_pkt->datatype = rma_op->target_datatype;
         fop_pkt->target_win_handle = target_win_handle;
         fop_pkt->source_win_handle = source_win_handle;
@@ -1162,6 +1168,7 @@ static int MPIDI_CH3I_Recv_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
     MPIDI_Pkt_init(get_pkt, MPIDI_CH3_PKT_GET);
     get_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
         win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
+    get_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
     get_pkt->count = rma_op->target_count;
     get_pkt->datatype = rma_op->target_datatype;
     get_pkt->request_handle = req->handle;
@@ -1683,6 +1690,7 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
 	    MPID_Request *request;
 	    
 	    MPIDI_Pkt_init(put_pkt, MPIDI_CH3_PKT_PUT);
+            put_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
 	    put_pkt->addr = NULL;
 	    put_pkt->count = 0;
 	    put_pkt->datatype = MPI_INT;

http://git.mpich.org/mpich.git/commitdiff/90b3b2f83c4659fdcdb6bf06a410c8fb63583374

commit 90b3b2f83c4659fdcdb6bf06a410c8fb63583374
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Wed Jan 30 17:01:49 2013 -0600

    Converted packet type field to 16-bits
    
    Convert the packet type field from an enum to a uint16.  This change was also
    applied to packet types defined by Nemesis.  Downstream netmod developers will
    also have to make this change if they defined new packet types.
    
    Reviewer: goodell

diff --git a/src/mpid/ch3/channels/nemesis/include/mpid_nem_impl.h b/src/mpid/ch3/channels/nemesis/include/mpid_nem_impl.h
index 7bd314a..f0096f5 100644
--- a/src/mpid/ch3/channels/nemesis/include/mpid_nem_impl.h
+++ b/src/mpid/ch3/channels/nemesis/include/mpid_nem_impl.h
@@ -61,7 +61,7 @@ typedef enum MPID_nem_pkt_type
 
 typedef struct MPID_nem_pkt_lmt_rts
 {
-    MPID_nem_pkt_type_t type;
+    MPIDI_CH3_Pkt_type_t type;
     MPIDI_Message_match match;
     MPI_Request sender_req_id;
     MPIDI_msg_sz_t data_sz;
@@ -71,7 +71,7 @@ MPID_nem_pkt_lmt_rts_t;
 
 typedef struct MPID_nem_pkt_lmt_cts
 {
-    MPID_nem_pkt_type_t type;
+    MPIDI_CH3_Pkt_type_t type;
     MPI_Request sender_req_id;
     MPI_Request receiver_req_id;
     MPIDI_msg_sz_t data_sz;
@@ -81,14 +81,14 @@ MPID_nem_pkt_lmt_cts_t;
 
 typedef struct MPID_nem_pkt_lmt_done
 {
-    MPID_nem_pkt_type_t type;
+    MPIDI_CH3_Pkt_type_t type;
     MPI_Request req_id;
 }
 MPID_nem_pkt_lmt_done_t;
 
 typedef struct MPID_nem_pkt_lmt_cookie
 {
-    MPID_nem_pkt_type_t type;
+    MPIDI_CH3_Pkt_type_t type;
     int from_sender;
     MPI_Request sender_req_id;
     MPI_Request receiver_req_id;
@@ -98,14 +98,14 @@ MPID_nem_pkt_lmt_cookie_t;
 
 typedef struct MPID_nem_pkt_ckpt_marker
 {
-    MPID_nem_pkt_type_t type;
+    MPIDI_CH3_Pkt_type_t type;
     int wave; /* used for debugging */
 }
 MPID_nem_pkt_ckpt_marker_t;
 
 typedef struct MPID_nem_pkt_netmod
 {
-    MPID_nem_pkt_type_t type;
+    MPIDI_CH3_Pkt_type_t type;
     unsigned subtype;
 }
 MPID_nem_pkt_netmod_t;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_impl.h b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_impl.h
index 8e2edc8..aab5019 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_impl.h
@@ -194,7 +194,7 @@ typedef enum MPIDI_nem_tcp_pkt_type {
 #ifdef ENABLE_CHECKPOINTING
 typedef struct MPIDI_nem_tcp_pkt_unpause
 {
-    MPID_nem_pkt_type_t type;
+    MPIDI_CH3_Pkt_type_t type;
     unsigned subtype;
 } MPIDI_nem_tcp_pkt_unpause_t;
 #endif
diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index 1637ab7..5e63753 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -9,6 +9,13 @@
 
 #include "oputil.h"
 
+#ifdef HAVE_STDINT_H
+#  include <stdint.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+#  include <inttypes.h>
+#endif
+
 /* Enable the use of data within the message packet for small messages */
 #define USE_EAGER_SHORT
 #define MPIDI_EAGER_SHORT_INTS 4
@@ -49,13 +56,11 @@ typedef union {
 } MPIDI_CH3_FOP_Immed_u;
 
 /*
- * MPIDI_CH3_Pkt_type_t
- *
  * Predefined packet types.  This simplifies some of the code.
  */
 /* FIXME: Having predefined names makes it harder to add new message types,
    such as different RMA types. */
-typedef enum MPIDI_CH3_Pkt_type
+enum MPIDI_CH3_Pkt_types
 {
     MPIDI_CH3_PKT_EAGER_SEND = 0,
 #if defined(USE_EAGER_SHORT)
@@ -101,8 +106,9 @@ typedef enum MPIDI_CH3_Pkt_type
 # endif    
     , MPIDI_CH3_PKT_END_ALL,
     MPIDI_CH3_PKT_INVALID = -1 /* forces a signed enum to quash warnings */
-}
-MPIDI_CH3_Pkt_type_t;
+};
+
+typedef int16_t MPIDI_CH3_Pkt_type_t;
 
 typedef struct MPIDI_CH3_Pkt_send
 {

http://git.mpich.org/mpich.git/commitdiff/bba35589949c92505de9f8f5fd7bc5d9e2693b23

commit bba35589949c92505de9f8f5fd7bc5d9e2693b23
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Mon Jan 28 14:29:43 2013 -0600

    Consolidated RMA op finalization code
    
    This patch consolidates the synchronization and tracking of RMA operations into
    a single routine that is called whenever we complete an operation.  The only
    exception are lock-op-unlock operations that are completed from within the lock
    operation processing code.
    
    This code is pretty ugly, but it will get cleaner once packet flags are been
    added.
    
    Reviewer: goodell

diff --git a/src/mpid/ch3/include/mpidimpl.h b/src/mpid/ch3/include/mpidimpl.h
index ff4ed72..c79861a 100644
--- a/src/mpid/ch3/include/mpidimpl.h
+++ b/src/mpid/ch3/include/mpidimpl.h
@@ -1214,6 +1214,8 @@ int MPIDI_CH3I_Release_lock(MPID_Win * win_ptr);
 int MPIDI_CH3I_Try_acquire_win_lock(MPID_Win * win_ptr, int requested_lock);
 int MPIDI_CH3I_Send_lock_granted_pkt(MPIDI_VC_t * vc, MPID_Win *win_ptr, int source_win_hdl);
 int MPIDI_CH3I_Send_pt_rma_done_pkt(MPIDI_VC_t * vc, MPID_Win *win_ptr, int source_win_hdl);
+int MPIDI_CH3_Finish_rma_op_target(MPIDI_VC_t *vc, MPID_Win *win_ptr, int is_rma_update,
+                                   int end_epoch, MPI_Win source_win_handle, int force_done_pkt);
 
 #define MPIDI_CH3I_DATATYPE_IS_PREDEFINED(type, predefined) \
     if ((HANDLE_GET_KIND(type) == HANDLE_KIND_BUILTIN) || \
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index 2eaa530..66ca1e0 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -152,36 +152,10 @@ int MPIDI_CH3_ReqHandler_PutAccumRespComplete( MPIDI_VC_t *vc,
     
     MPID_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);
     
-    /* if passive target RMA, increment counter */
-    if (win_ptr->current_lock_type != MPID_LOCK_NONE)
-	win_ptr->my_pt_rma_puts_accs++;
-    
-    if (rreq->dev.source_win_handle != MPI_WIN_NULL) {
-	/* Last RMA operation from source. If active
-	   target RMA, decrement window counter. If
-	   passive target RMA, release lock on window and
-	   grant next lock in the lock queue if there is
-	   any. If it's a shared lock or a lock-put-unlock
-	   type of optimization, we also need to send an
-	   ack to the source. */ 
-	
-	if (win_ptr->current_lock_type == MPID_LOCK_NONE) {
-	    /* FIXME: MT: this has to be done atomically */
-	    win_ptr->my_counter -= 1;
-	}
-	else {
-	    if ((win_ptr->current_lock_type == MPI_LOCK_SHARED) ||
-		(rreq->dev.single_op_opt == 1)) {
-                mpi_errno = MPIDI_CH3I_Send_pt_rma_done_pkt(vc, win_ptr,
-				    rreq->dev.source_win_handle);
-		if (mpi_errno) {
-		    MPIU_ERR_POP(mpi_errno);
-		}
-	    }
-	    mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
-	}
-    }
-    
+    mpi_errno = MPIDI_CH3_Finish_rma_op_target(vc, win_ptr, TRUE, rreq->dev.source_win_handle != MPI_WIN_NULL,
+                                               rreq->dev.source_win_handle, rreq->dev.single_op_opt);
+    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+
     /* mark data transfer as complete and decrement CC */
     MPIDI_CH3U_Request_complete(rreq);
     *complete = TRUE;
@@ -444,6 +418,7 @@ int MPIDI_CH3_ReqHandler_SinglePutAccumComplete( MPIDI_VC_t *vc,
     if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, 
 					lock_queue_entry->lock_type) == 1)
     {
+        MPI_Win source_win_handle = lock_queue_entry->source_win_handle;
 	
 	if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_PT_SINGLE_PUT) {
 	    /* copy the data over */
@@ -462,16 +437,6 @@ int MPIDI_CH3_ReqHandler_SinglePutAccumComplete( MPIDI_VC_t *vc,
 	    MPIU_ERR_POP(mpi_errno);
 	}
 	
-	/* increment counter */
-	win_ptr->my_pt_rma_puts_accs++;
-	
-	/* send done packet */
-        mpi_errno = MPIDI_CH3I_Send_pt_rma_done_pkt(vc, win_ptr,
-				    lock_queue_entry->source_win_handle);
-	if (mpi_errno) {
-	    MPIU_ERR_POP(mpi_errno);
-	}
-	
 	/* free lock_queue_entry including data buffer and remove 
 	   it from the queue. */
 	curr_ptr = (MPIDI_Win_lock_queue *) win_ptr->lock_queue;
@@ -486,8 +451,8 @@ int MPIDI_CH3_ReqHandler_SinglePutAccumComplete( MPIDI_VC_t *vc,
 	MPIU_Free(lock_queue_entry->pt_single_op);
 	MPIU_Free(lock_queue_entry);
 	
-	/* Release lock and grant next lock if there is one. */
-	mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
+        mpi_errno = MPIDI_CH3_Finish_rma_op_target(vc, win_ptr, TRUE, TRUE, source_win_handle, TRUE);
+        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
     }
     else {
 	/* could not acquire lock. mark data recd as 1 */
@@ -586,30 +551,9 @@ int MPIDI_CH3_ReqHandler_FOPComplete( MPIDI_VC_t *vc,
 
     MPID_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);
 
-    /* if passive target RMA, increment counter */
-    if (win_ptr->current_lock_type != MPID_LOCK_NONE)
-        win_ptr->my_pt_rma_puts_accs++;
-
-    if (rreq->dev.source_win_handle != MPI_WIN_NULL) {
-        /* Last RMA operation from source. If active
-           target RMA, decrement window counter. If
-           passive target RMA, release lock on window and
-           grant next lock in the lock queue if there is
-           any. If it's a shared lock or a lock-put-unlock
-           type of optimization, we also need to send an
-           ack to the source. */ 
-        if (win_ptr->current_lock_type == MPID_LOCK_NONE) {
-            /* FIXME: MT: this has to be done atomically */
-            win_ptr->my_counter -= 1;
-            MPIDI_CH3_Progress_signal_completion();
-        }
-        else {
-            mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
-            /* Without the following signal_completion call, we 
-               sometimes hang */
-            MPIDI_CH3_Progress_signal_completion();
-        }
-    }
+    mpi_errno = MPIDI_CH3_Finish_rma_op_target(vc, win_ptr, TRUE, rreq->dev.source_win_handle != MPI_WIN_NULL,
+                                               rreq->dev.source_win_handle, FALSE);
+    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
     *complete = 1;
 
@@ -1021,6 +965,12 @@ int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
 			    
 			    /* if put or accumulate, send rma done packet and release lock. */
 			    if (single_op->type != MPIDI_RMA_GET) {
+                                /* NOTE: Only *queued* single_op operations are completed here.
+                                   Lock-op-unlock/single_op RMA ops can also be completed as
+                                   they arrive within various packet/request handlers via
+                                   MPIDI_CH3_Finish_rma_op_target().  That call cannot be used
+                                   here, because it would enter this function recursively. */
+
 				/* increment counter */
 				win_ptr->my_pt_rma_puts_accs++;
 				
diff --git a/src/mpid/ch3/src/ch3u_handle_send_req.c b/src/mpid/ch3/src/ch3u_handle_send_req.c
index 0d7b6bd..9d101e9 100644
--- a/src/mpid/ch3/src/ch3u_handle_send_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_send_req.c
@@ -59,20 +59,19 @@ int MPIDI_CH3_ReqHandler_GetSendRespComplete( MPIDI_VC_t *vc ATTRIBUTE((unused))
 	   packet since the last operation is a get. */
 	
 	MPID_Win_get_ptr(sreq->dev.target_win_handle, win_ptr);
-	if (win_ptr->current_lock_type == MPID_LOCK_NONE) {
-	    /* FIXME: MT: this has to be done atomically */
-	    win_ptr->my_counter -= 1;
-	}
-	else {
-	    mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
-	}
+
+        mpi_errno = MPIDI_CH3_Finish_rma_op_target(vc, win_ptr, FALSE, TRUE, MPI_WIN_NULL, FALSE);
+        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
     }
 
     /* mark data transfer as complete and decrement CC */
     MPIDI_CH3U_Request_complete(sreq);
     *complete = TRUE;
 
+ fn_exit:
     return mpi_errno;
+ fn_fail:
+    goto fn_exit;
 }
 
 int MPIDI_CH3_ReqHandler_SendReloadIOV( MPIDI_VC_t *vc ATTRIBUTE((unused)), MPID_Request *sreq, 
diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 4a5ef1b..904dc34 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -1960,6 +1960,9 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
 	 * in the queue, and return. */
         MPIU_Assert(MPIDI_CH3I_RMA_Ops_isempty(&win_ptr->targets[dest].rma_ops_list));
 
+        /* NOTE: We don't need to signal completion here becase a thread in the
+         * same processes cannot lock the window again while it is already
+         * locked. */
 	mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
         if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
         win_ptr->targets[dest].remote_lock_state = MPIDI_CH3_WIN_LOCK_NONE;
@@ -3242,12 +3245,11 @@ int MPIDI_CH3_PktHandler_Put( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 	   post/start/complete/wait sync model; therefore, no need
 	   to check lock queue. */
 	if (put_pkt->target_win_handle != MPI_WIN_NULL) {
-	    MPID_Win_get_ptr(put_pkt->target_win_handle, win_ptr);
-	    /* FIXME: MT: this has to be done atomically */
-	    win_ptr->my_counter -= 1;
+            MPID_Win_get_ptr(put_pkt->target_win_handle, win_ptr);
+            mpi_errno = MPIDI_CH3_Finish_rma_op_target(NULL, win_ptr, TRUE, TRUE, MPI_WIN_NULL, FALSE);
+            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 	}
         *buflen = sizeof(MPIDI_CH3_Pkt_t);
-	MPIDI_CH3_Progress_signal_completion();	
 	*rreqp = NULL;
         goto fn_exit;
     }
@@ -3730,41 +3732,11 @@ int MPIDI_CH3_PktHandler_Accumulate_Immed( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 	/* Here is the code executed in PutAccumRespComplete after the
 	   accumulation operation */
 	MPID_Win_get_ptr(accum_pkt->target_win_handle, win_ptr);
-	
-	/* if passive target RMA, increment counter */
-	if (win_ptr->current_lock_type != MPID_LOCK_NONE)
-	    win_ptr->my_pt_rma_puts_accs++;
-	
-	if (accum_pkt->source_win_handle != MPI_WIN_NULL) {
-	    /* Last RMA operation from source. If active
-	       target RMA, decrement window counter. If
-	       passive target RMA, release lock on window and
-	       grant next lock in the lock queue if there is
-	       any. If it's a shared lock or a lock-put-unlock
-	       type of optimization, we also need to send an
-	       ack to the source. */ 
-	    if (win_ptr->current_lock_type == MPID_LOCK_NONE) {
-		/* FIXME: MT: this has to be done atomically */
-		win_ptr->my_counter -= 1;
-		MPIDI_CH3_Progress_signal_completion();
-	    }
-	    else {
-		if ((win_ptr->current_lock_type == MPI_LOCK_SHARED) ||
-		    (/*rreq->dev.single_op_opt*/ 0 == 1)) {
-                    mpi_errno = MPIDI_CH3I_Send_pt_rma_done_pkt(vc, win_ptr,
-					accum_pkt->source_win_handle);
-		    if (mpi_errno) {
-			    MPIU_ERR_POP(mpi_errno);
-		    }
-		}
-		mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
-		/* Without the following signal_completion call, we 
-		   sometimes hang */
-		MPIDI_CH3_Progress_signal_completion();
-	    }
-	}
 
-	goto fn_exit;
+        mpi_errno = MPIDI_CH3_Finish_rma_op_target(vc, win_ptr, TRUE,
+                                                   accum_pkt->source_win_handle != MPI_WIN_NULL,
+                                                   accum_pkt->source_win_handle, FALSE);
+        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
     }
 
  fn_exit:
@@ -3832,31 +3804,10 @@ int MPIDI_CH3_PktHandler_CAS( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 
     MPID_Win_get_ptr(cas_pkt->target_win_handle, win_ptr);
 
-    /* if passive target RMA, increment counter */
-    if (win_ptr->current_lock_type != MPID_LOCK_NONE)
-        win_ptr->my_pt_rma_puts_accs++;
-
-    /* Send RMA done packet?  FIXME: Can the cas_resp handler handle this? */
-    if (cas_pkt->type == MPIDI_CH3_PKT_CAS_UNLOCK) {
-        /* Last RMA operation from source. If active
-           target RMA, decrement window counter. If
-           passive target RMA, release lock on window and
-           grant next lock in the lock queue if there is
-           any. If it's a shared lock or a lock-put-unlock
-           type of optimization, we also need to send an
-           ack to the source. */ 
-        if (win_ptr->current_lock_type == MPID_LOCK_NONE) {
-            /* FIXME: MT: this has to be done atomically */
-            win_ptr->my_counter -= 1;
-            MPIDI_CH3_Progress_signal_completion();
-        }
-        else {
-            mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
-            /* Without the following signal_completion call, we 
-               sometimes hang */
-            MPIDI_CH3_Progress_signal_completion();
-        }
-    }
+    mpi_errno = MPIDI_CH3_Finish_rma_op_target(NULL, win_ptr, TRUE,
+                                               pkt->type == MPIDI_CH3_PKT_CAS_UNLOCK,
+                                               MPI_WIN_NULL, FALSE);
+    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
 fn_exit:
     MPIU_INSTR_DURATION_END(rmapkt_cas);
@@ -4922,6 +4873,74 @@ static int MPIDI_CH3I_RMAListPartialComplete( MPID_Win *win_ptr,
     return mpi_errno;
 }
 
+
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3_Finish_rma_op_target
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPIDI_CH3_Finish_rma_op_target(MPIDI_VC_t *vc, MPID_Win *win_ptr, int is_rma_update,
+                                   int end_epoch, MPI_Win source_win_handle, int force_done_pkt)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_FINISH_RMA_OP_TARGET);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_FINISH_RMA_OP_TARGET);
+
+    /* This function should be called by the target process after each RMA
+       operation is completed, to update synchronization state. */
+
+    /* If this is a passive target RMA update operation, increment counter.  This is
+       needed in Win_free to ensure that all ops are completed before a window
+       is freed. */
+    if (win_ptr->current_lock_type != MPID_LOCK_NONE && is_rma_update)
+        win_ptr->my_pt_rma_puts_accs++;
+
+    if (end_epoch) {
+        /* Last RMA operation from source. If active target RMA, decrement
+           window counter. If passive target RMA, release lock on window and
+           grant next lock in the lock queue if there is any. If it's a shared
+           lock or a lock-put-unlock type of optimization, we also need to send
+           an ack to the source. */
+
+        if (win_ptr->current_lock_type == MPID_LOCK_NONE) {
+            /* FIXME: MT: Accesses to my_counter should be done atomically */
+            win_ptr->my_counter -= 1;
+
+            /* Active target: Signal the local process when the op counter
+             * reaches 0. */
+            if (win_ptr->my_counter == 0)
+                MPIDI_CH3_Progress_signal_completion();
+        }
+        else {
+            if (source_win_handle != MPI_WIN_NULL &&
+                ((win_ptr->current_lock_type == MPI_LOCK_SHARED || force_done_pkt)))
+            {
+                mpi_errno = MPIDI_CH3I_Send_pt_rma_done_pkt(vc, win_ptr, source_win_handle);
+                if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            }
+
+            if (end_epoch) {
+                mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
+                if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+
+                /* Passive target: The local process may be waiting for the
+                 * lock.  Signal completion to wake it up, so it can attempt to
+                 * grab the lock. */
+                MPIDI_CH3_Progress_signal_completion();
+            }
+        }
+    }
+
+fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_FINISH_RMA_OP_TARGET);
+    return mpi_errno;
+    /* --BEGIN ERROR HANDLING-- */
+fn_fail:
+    goto fn_exit;
+    /* --END ERROR HANDLING-- */
+}
+
+
 /* ------------------------------------------------------------------------ */
 /* 
  * For debugging, we provide the following functions for printing the 

http://git.mpich.org/mpich.git/commitdiff/c5312557541a513ed36d4a2d9090f70cf3a2e949

commit c5312557541a513ed36d4a2d9090f70cf3a2e949
Author: James Dinan <dinan at mcs.anl.gov>
Date:   Tue Feb 5 08:48:06 2013 -0600

    Temporarily reverted is_gacc_op bugfix
    
    Partially reverted [0b3640682] in preparation for incorporating new
    piggybacking infrastructure.  This temporarily re-introduces that bug
    and it will be fixed again with the new piggybacking patch.
    
    Reviwer: goodell

diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index 2352da7..2eaa530 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -82,7 +82,6 @@ int MPIDI_CH3_ReqHandler_PutAccumRespComplete( MPIDI_VC_t *vc,
 					       int *complete )
 {
     int mpi_errno = MPI_SUCCESS;
-    int is_gacc_op = 0;
     MPID_Win *win_ptr;
     MPIU_CHKPMEM_DECL(1);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTACCUMRESPCOMPLETE);
@@ -97,7 +96,6 @@ int MPIDI_CH3_ReqHandler_PutAccumRespComplete( MPIDI_VC_t *vc,
         MPID_Request *resp_req;
         MPID_IOV iov[MPID_IOV_LIMIT];
 
-        is_gacc_op = 1;
         MPIDI_Pkt_init(get_accum_resp_pkt, MPIDI_CH3_PKT_GET_ACCUM_RESP);
         get_accum_resp_pkt->request_handle = rreq->dev.resp_request_handle;
 
@@ -172,7 +170,7 @@ int MPIDI_CH3_ReqHandler_PutAccumRespComplete( MPIDI_VC_t *vc,
 	    win_ptr->my_counter -= 1;
 	}
 	else {
-	    if ((win_ptr->current_lock_type == MPI_LOCK_SHARED && !is_gacc_op) ||
+	    if ((win_ptr->current_lock_type == MPI_LOCK_SHARED) ||
 		(rreq->dev.single_op_opt == 1)) {
                 mpi_errno = MPIDI_CH3I_Send_pt_rma_done_pkt(vc, win_ptr,
 				    rreq->dev.source_win_handle);

-----------------------------------------------------------------------

Summary of changes:
 src/mpi/errhan/errnames.txt                        |    2 -
 .../ch3/channels/nemesis/include/mpid_nem_impl.h   |   12 +-
 .../ch3/channels/nemesis/netmod/tcp/tcp_impl.h     |    2 +-
 src/mpid/ch3/errnames.txt                          |    9 +
 src/mpid/ch3/include/mpidimpl.h                    |    3 +
 src/mpid/ch3/include/mpidpkt.h                     |   45 ++-
 src/mpid/ch3/include/mpidpre.h                     |    2 +-
 src/mpid/ch3/include/mpidrma.h                     |    1 +
 src/mpid/ch3/src/ch3u_handle_recv_pkt.c            |    2 -
 src/mpid/ch3/src/ch3u_handle_recv_req.c            |   89 +---
 src/mpid/ch3/src/ch3u_handle_send_req.c            |   26 +-
 src/mpid/ch3/src/ch3u_request.c                    |    2 +-
 src/mpid/ch3/src/ch3u_rma_sync.c                   |  526 ++++++++++++++------
 test/mpi/rma/Makefile.am                           |    6 +-
 test/mpi/rma/linked_list_bench_lock_shr.c          |    8 +
 test/mpi/rma/testlist                              |    1 +
 16 files changed, 458 insertions(+), 278 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list