[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.1-111-g274a5a7

Service Account noreply at mpich.org
Thu Jul 17 17:59:48 CDT 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  274a5a70275081880d29f149ff0a24cc5ad9c8c3 (commit)
      from  006a54bd1d330d16f9491dbd59dd3e8486cf7ae2 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/274a5a70275081880d29f149ff0a24cc5ad9c8c3

commit 274a5a70275081880d29f149ff0a24cc5ad9c8c3
Author: Pavan Balaji <balaji at anl.gov>
Date:   Wed Jul 16 22:31:07 2014 -0500

    Simplified RMA_Op structure.
    
    We were creating duplicating information in the operation structure
    and in the packet structure when the message is actually issued.
    Since most of the information is the same anyway, this patch just
    embeds a packet structure into the operation structure.
    
    Signed-off-by: Xin Zhao <xinzhao3 at illinois.edu>

diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index 935c03b..65b7bde 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -62,7 +62,7 @@ typedef union {
    such as different RMA types. */
 enum MPIDI_CH3_Pkt_types
 {
-    MPIDI_CH3_PKT_EAGER_SEND = 0,
+    MPIDI_CH3_PKT_EAGER_SEND = 53,
 #if defined(USE_EAGER_SHORT)
     MPIDI_CH3_PKT_EAGERSHORT_SEND,
 #endif /* defined(USE_EAGER_SHORT) */
@@ -197,6 +197,43 @@ MPIDI_CH3_Pkt_cancel_send_resp_t;
 MPIDI_CH3_PKT_DEFS
 #endif
 
+#define MPIDI_CH3_PKT_RMA_GET_TARGET_DATATYPE(pkt_, datatype_)  \
+    {                                                           \
+        switch(pkt_.type) {                                     \
+        case (MPIDI_CH3_PKT_PUT):                               \
+        {                                                       \
+            MPIDI_CH3_Pkt_put_t put_pkt_ = pkt_.put;            \
+            datatype_ = put_pkt_.datatype;                      \
+            break;                                              \
+        }                                                       \
+        case (MPIDI_CH3_PKT_GET):                               \
+        {                                                       \
+            MPIDI_CH3_Pkt_get_t get_pkt_ = pkt_.get;            \
+            datatype_ = get_pkt_.datatype;                      \
+            break;                                              \
+        }                                                       \
+        case (MPIDI_CH3_PKT_ACCUMULATE):                        \
+        case (MPIDI_CH3_PKT_GET_ACCUM):                         \
+        {                                                       \
+            MPIDI_CH3_Pkt_accum_t acc_pkt_ = pkt_.accum;        \
+            datatype_ = acc_pkt_.datatype;                      \
+            break;                                              \
+        }                                                       \
+        case (MPIDI_CH3_PKT_CAS):                               \
+        {                                                       \
+            MPIDI_CH3_Pkt_cas_t cas_pkt_ = pkt_.cas;            \
+            datatype_ = cas_pkt_.datatype;                      \
+            break;                                              \
+        }                                                       \
+        case (MPIDI_CH3_PKT_FOP):                               \
+        {                                                       \
+            MPIDI_CH3_Pkt_fop_t fop_pkt_ = pkt_.fop;            \
+            datatype_ = fop_pkt_.datatype;                      \
+            break;                                              \
+        }                                                       \
+        }                                                       \
+    }
+
 typedef struct MPIDI_CH3_Pkt_put
 {
     MPIDI_CH3_Pkt_type_t type;
@@ -296,6 +333,7 @@ typedef struct MPIDI_CH3_Pkt_cas
     MPI_Datatype datatype;
     void *addr;
     MPI_Request request_handle;
+    MPI_Win source_win_handle;
     MPI_Win target_win_handle; /* Used in the last RMA operation in each
                                 * epoch for decrementing rma op counter in
                                 * active target rma and for unlocking window 
@@ -321,6 +359,7 @@ typedef struct MPIDI_CH3_Pkt_fop
     void *addr;
     MPI_Op op;
     MPI_Request request_handle;
+    MPI_Win source_win_handle;
     MPI_Win target_win_handle; /* Used in the last RMA operation in each
                                 * epoch for decrementing rma op counter in
                                 * active target rma and for unlocking window 
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index 84bac45..38c9ee4 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -13,17 +13,6 @@ MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_wincreate_allgather);
 MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_winfree_rs);
 MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_winfree_complete);
 
-typedef enum MPIDI_RMA_Op_type {
-    MPIDI_RMA_PUT               = 23,
-    MPIDI_RMA_GET               = 24,
-    MPIDI_RMA_ACCUMULATE        = 25,
- /* REMOVED: MPIDI_RMA_LOCK     = 26, */
-    MPIDI_RMA_ACC_CONTIG        = 27,
-    MPIDI_RMA_GET_ACCUMULATE    = 28,
-    MPIDI_RMA_COMPARE_AND_SWAP  = 29,
-    MPIDI_RMA_FETCH_AND_OP      = 30
-} MPIDI_RMA_Op_type_t;
-
 /* Special case RMA operations */
 
 enum MPIDI_RMA_Datatype {
@@ -62,33 +51,29 @@ typedef struct MPIDI_RMA_dtype_info { /* for derived datatypes */
 typedef struct MPIDI_RMA_Op {
     struct MPIDI_RMA_Op *prev;  /* pointer to next element in list */
     struct MPIDI_RMA_Op *next;  /* pointer to next element in list */
-    /* FIXME: It would be better to setup the packet that will be sent, at 
-       least in most cases (if, as a result of the sync/ops/sync sequence,
-       a different packet type is needed, it can be extracted from the 
-       information otherwise stored). */
-    MPIDI_RMA_Op_type_t type;
+
     void *origin_addr;
     int origin_count;
     MPI_Datatype origin_datatype;
-    int target_rank;
-    MPI_Aint target_disp;
-    int target_count;
-    MPI_Datatype target_datatype;
-    MPI_Op op;  /* for accumulate */
-    /* Used to complete operations */
-    struct MPID_Request *request;
-    MPIDI_RMA_dtype_info dtype_info;
-    void *dataloop;
+
+    void *compare_addr;
+    MPI_Datatype compare_datatype;
+
     void *result_addr;
     int result_count;
     MPI_Datatype result_datatype;
-    void *compare_addr;
-    int compare_count;
-    MPI_Datatype compare_datatype;
+
+    struct MPID_Request *request;
+    MPIDI_RMA_dtype_info dtype_info;
+    void *dataloop;
+
+    int target_rank;
+
+    MPIDI_CH3_Pkt_t pkt;
 } MPIDI_RMA_Op_t;
 
 typedef struct MPIDI_PT_single_op {
-    int type;  /* put, get, or accum. */
+    enum MPIDI_CH3_Pkt_types type;  /* put, get, or accum. */
     void *addr;
     int count;
     MPI_Datatype datatype;
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index 4d07c94..8c44d26 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -961,7 +961,7 @@ int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
 			    MPIDI_PT_single_op * single_op;
 			    
 			    single_op = lock_queue->pt_single_op;
-			    if (single_op->type == MPIDI_RMA_PUT) {
+			    if (single_op->type == MPIDI_CH3_PKT_LOCK_PUT_UNLOCK) {
 				mpi_errno = MPIR_Localcopy(single_op->data,
 							   single_op->count,
 							   single_op->datatype,
@@ -969,21 +969,21 @@ int MPIDI_CH3I_Release_lock(MPID_Win *win_ptr)
 							   single_op->count,
 							   single_op->datatype);
 			    }   
-			    else if (single_op->type == MPIDI_RMA_ACCUMULATE) {
+			    else if (single_op->type == MPIDI_CH3_PKT_LOCK_ACCUM_UNLOCK) {
 				if (win_ptr->shm_allocated == TRUE)
 				    MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
 				mpi_errno = do_simple_accumulate(single_op);
 				if (win_ptr->shm_allocated == TRUE)
 				    MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
 			    }
-			    else if (single_op->type == MPIDI_RMA_GET) {
+			    else if (single_op->type == MPIDI_CH3_PKT_LOCK_GET_UNLOCK) {
 				mpi_errno = do_simple_get(win_ptr, lock_queue);
 			    }
 			    
                             if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 			    
 			    /* if put or accumulate, send rma done packet and release lock. */
-			    if (single_op->type != MPIDI_RMA_GET) {
+			    if (single_op->type != MPIDI_CH3_PKT_LOCK_GET_UNLOCK) {
                                 /* NOTE: Only *queued* single_op operations are completed here.
                                    Lock-op-unlock/single_op RMA ops can also be completed as
                                    they arrive within various packet/request handlers via
diff --git a/src/mpid/ch3/src/ch3u_rma_acc_ops.c b/src/mpid/ch3/src/ch3u_rma_acc_ops.c
index b0b5e54..182dec8 100644
--- a/src/mpid/ch3/src/ch3u_rma_acc_ops.c
+++ b/src/mpid/ch3/src/ch3u_rma_acc_ops.c
@@ -86,20 +86,46 @@ int MPIDI_Get_accumulate(const void *origin_addr, int origin_count,
         /* TODO: Can we use the MPIDI_RMA_ACC_CONTIG optimization? */
 
         MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
-        new_ptr->type = MPIDI_RMA_GET_ACCUMULATE;
-        /* Cast away const'ness for origin_address as MPIDI_RMA_Op_t
-         * contain both PUT and GET like ops */
-        new_ptr->origin_addr = (void *) origin_addr;
-        new_ptr->origin_count = origin_count;
-        new_ptr->origin_datatype = origin_datatype;
-        new_ptr->result_addr = result_addr;
-        new_ptr->result_count = result_count;
-        new_ptr->result_datatype = result_datatype;
-        new_ptr->target_rank = target_rank;
-        new_ptr->target_disp = target_disp;
-        new_ptr->target_count = target_count;
-        new_ptr->target_datatype = target_datatype;
-        new_ptr->op = op;
+
+        if (op == MPI_NO_OP) {
+            /* Convert GAcc to a Get */
+            MPIDI_CH3_Pkt_get_t *get_pkt = &(new_ptr->pkt.get);
+            MPIDI_Pkt_init(get_pkt, MPIDI_CH3_PKT_GET);
+            get_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
+                win_ptr->disp_units[target_rank] * target_disp;
+            get_pkt->count = target_count;
+            get_pkt->datatype = target_datatype;
+            get_pkt->dataloop_size = 0;
+            get_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
+            get_pkt->source_win_handle = win_ptr->handle;
+
+            new_ptr->origin_addr = result_addr;
+            new_ptr->origin_count = result_count;
+            new_ptr->origin_datatype = result_datatype;
+            new_ptr->target_rank = target_rank;
+        }
+
+        else {
+            MPIDI_CH3_Pkt_accum_t *accum_pkt = &(new_ptr->pkt.accum);
+            MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_GET_ACCUM);
+            accum_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
+                win_ptr->disp_units[target_rank] * target_disp;
+            accum_pkt->count = target_count;
+            accum_pkt->datatype = target_datatype;
+            accum_pkt->dataloop_size = 0;
+            accum_pkt->op = op;
+            accum_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
+            accum_pkt->source_win_handle = win_ptr->handle;
+
+            new_ptr->origin_addr = (void *) origin_addr;
+            new_ptr->origin_count = origin_count;
+            new_ptr->origin_datatype = origin_datatype;
+            new_ptr->result_addr = result_addr;
+            new_ptr->result_count = result_count;
+            new_ptr->result_datatype = result_datatype;
+            new_ptr->target_rank = target_rank;
+        }
+
         MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
 
         /* if source or target datatypes are derived, increment their
@@ -188,6 +214,8 @@ int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
         MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
         MPIDI_RMA_Op_t *new_ptr = NULL;
 
+        MPIDI_CH3_Pkt_cas_t *cas_pkt = NULL;
+
         /* Append this operation to the RMA ops queue */
         MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
         mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
@@ -195,20 +223,23 @@ int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
         if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
         MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
-        new_ptr->type = MPIDI_RMA_COMPARE_AND_SWAP;
+
+        cas_pkt = &(new_ptr->pkt.cas);
+        MPIDI_Pkt_init(cas_pkt, MPIDI_CH3_PKT_CAS);
+        cas_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
+            win_ptr->disp_units[target_rank] * target_disp;
+        cas_pkt->datatype = datatype;
+        cas_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
+        cas_pkt->source_win_handle = win_ptr->handle;
+
         new_ptr->origin_addr = (void *) origin_addr;
         new_ptr->origin_count = 1;
         new_ptr->origin_datatype = datatype;
-        new_ptr->target_rank = target_rank;
-        new_ptr->target_disp = target_disp;
-        new_ptr->target_count = 1;
-        new_ptr->target_datatype = datatype;
         new_ptr->result_addr = result_addr;
-        new_ptr->result_count = 1;
         new_ptr->result_datatype = datatype;
         new_ptr->compare_addr = (void *) compare_addr;
-        new_ptr->compare_count = 1;
         new_ptr->compare_datatype = datatype;
+        new_ptr->target_rank = target_rank;
         MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
     }
 
@@ -280,6 +311,8 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
         MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
         MPIDI_RMA_Op_t *new_ptr = NULL;
 
+        MPIDI_CH3_Pkt_fop_t *fop_pkt = NULL;
+
         /* Append this operation to the RMA ops queue */
         MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
         mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
@@ -287,18 +320,21 @@ int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
         if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
         MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
-        new_ptr->type = MPIDI_RMA_FETCH_AND_OP;
+        fop_pkt = &(new_ptr->pkt.fop);
+        MPIDI_Pkt_init(fop_pkt, MPIDI_CH3_PKT_FOP);
+        fop_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
+            win_ptr->disp_units[target_rank] * target_disp;
+        fop_pkt->datatype = datatype;
+        fop_pkt->op = op;
+        fop_pkt->source_win_handle = win_ptr->handle;
+        fop_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
+
         new_ptr->origin_addr = (void *) origin_addr;
         new_ptr->origin_count = 1;
         new_ptr->origin_datatype = datatype;
-        new_ptr->target_rank = target_rank;
-        new_ptr->target_disp = target_disp;
-        new_ptr->target_count = 1;
-        new_ptr->target_datatype = datatype;
         new_ptr->result_addr = result_addr;
-        new_ptr->result_count = 1;
         new_ptr->result_datatype = datatype;
-        new_ptr->op = op;
+        new_ptr->target_rank = target_rank;
         MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
     }
 
diff --git a/src/mpid/ch3/src/ch3u_rma_ops.c b/src/mpid/ch3/src/ch3u_rma_ops.c
index f30c464..a9d8224 100644
--- a/src/mpid/ch3/src/ch3u_rma_ops.c
+++ b/src/mpid/ch3/src/ch3u_rma_ops.c
@@ -173,6 +173,8 @@ int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
         MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
         MPIDI_RMA_Op_t *new_ptr = NULL;
 
+        MPIDI_CH3_Pkt_put_t *put_pkt = NULL;
+
 	/* queue it up */
         MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
         mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
@@ -180,18 +182,22 @@ int MPIDI_Put(const void *origin_addr, int origin_count, MPI_Datatype
         if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
 	MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
+
+        put_pkt = &(new_ptr->pkt.put);
+        MPIDI_Pkt_init(put_pkt, MPIDI_CH3_PKT_PUT);
+        put_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
+            win_ptr->disp_units[target_rank] * target_disp;
+        put_pkt->count = target_count;
+        put_pkt->datatype = target_datatype;
+        put_pkt->dataloop_size = 0;
+        put_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
+        put_pkt->source_win_handle = win_ptr->handle;
+
 	/* FIXME: For contig and very short operations, use a streamlined op */
-	new_ptr->type = MPIDI_RMA_PUT;
-        /* Cast away const'ness for the origin address, as the
-         * MPIDI_RMA_Op_t structure is used for both PUT and GET like
-         * operations */
 	new_ptr->origin_addr = (void *) origin_addr;
 	new_ptr->origin_count = origin_count;
 	new_ptr->origin_datatype = origin_datatype;
-	new_ptr->target_rank = target_rank;
-	new_ptr->target_disp = target_disp;
-	new_ptr->target_count = target_count;
-	new_ptr->target_datatype = target_datatype;
+        new_ptr->target_rank = target_rank;
 	MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
 
 	/* if source or target datatypes are derived, increment their
@@ -285,6 +291,8 @@ int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
         MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
         MPIDI_RMA_Op_t *new_ptr = NULL;
 
+        MPIDI_CH3_Pkt_get_t *get_pkt = NULL;
+
 	/* queue it up */
         MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
         mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
@@ -292,15 +300,22 @@ int MPIDI_Get(void *origin_addr, int origin_count, MPI_Datatype
         if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
 	MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
+
+        get_pkt = &(new_ptr->pkt.get);
+        MPIDI_Pkt_init(get_pkt, MPIDI_CH3_PKT_GET);
+        get_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
+            win_ptr->disp_units[target_rank] * target_disp;
+        get_pkt->count = target_count;
+        get_pkt->datatype = target_datatype;
+        get_pkt->dataloop_size = 0;
+        get_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
+        get_pkt->source_win_handle = win_ptr->handle;
+
 	/* FIXME: For contig and very short operations, use a streamlined op */
-	new_ptr->type = MPIDI_RMA_GET;
 	new_ptr->origin_addr = origin_addr;
 	new_ptr->origin_count = origin_count;
 	new_ptr->origin_datatype = origin_datatype;
-	new_ptr->target_rank = target_rank;
-	new_ptr->target_disp = target_disp;
-	new_ptr->target_count = target_count;
-	new_ptr->target_datatype = target_datatype;
+        new_ptr->target_rank = target_rank;
 	MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
 	
 	/* if source or target datatypes are derived, increment their
@@ -396,6 +411,8 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
         MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
         MPIDI_RMA_Op_t *new_ptr = NULL;
 
+        MPIDI_CH3_Pkt_accum_t *accum_pkt = NULL;
+
 	/* queue it up */
         MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
         mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
@@ -405,35 +422,52 @@ int MPIDI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
 	/* If predefined and contiguous, use a simplified element */
 	if (MPIR_DATATYPE_IS_PREDEFINED(origin_datatype) &&
             MPIR_DATATYPE_IS_PREDEFINED(target_datatype) && enableShortACC) {
+            MPI_Aint origin_type_size;
+            size_t len;
+
 	    MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
-	    new_ptr->type = MPIDI_RMA_ACC_CONTIG;
-	    /* Only the information needed for the contig/predefined acc */
-            /* Cast away const'ness for origin_address as
-             * MPIDI_RMA_Op_t contain both PUT and GET like ops */
-	    new_ptr->origin_addr = (void *) origin_addr;
-	    new_ptr->origin_count = origin_count;
-	    new_ptr->origin_datatype = origin_datatype;
-	    new_ptr->target_rank = target_rank;
-	    new_ptr->target_disp = target_disp;
-	    new_ptr->target_count = target_count;
-	    new_ptr->target_datatype = target_datatype;
-	    new_ptr->op = op;
-	    MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
-	    goto fn_exit;
+
+            MPID_Datatype_get_size_macro(origin_datatype, origin_type_size);
+            MPIU_Assign_trunc(len, origin_count * origin_type_size, size_t);
+            if (MPIR_CVAR_CH3_RMA_ACC_IMMED && len <= MPIDI_RMA_IMMED_INTS*sizeof(int)) {
+                MPIDI_CH3_Pkt_accum_immed_t *accumi_pkt = &(new_ptr->pkt.accum_immed);
+
+                MPIDI_Pkt_init(accumi_pkt, MPIDI_CH3_PKT_ACCUM_IMMED);
+                accumi_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
+                    win_ptr->disp_units[target_rank] * target_disp;
+                accumi_pkt->count = target_count;
+                accumi_pkt->datatype = target_datatype;
+                accumi_pkt->op = op;
+                accumi_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
+                accumi_pkt->source_win_handle = win_ptr->handle;
+
+                new_ptr->origin_addr = (void *) origin_addr;
+                new_ptr->origin_count = origin_count;
+                new_ptr->origin_datatype = origin_datatype;
+                new_ptr->target_rank = target_rank;
+                MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
+                goto fn_exit;
+            }
 	}
 
 	MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
-	new_ptr->type = MPIDI_RMA_ACCUMULATE;
-        /* Cast away const'ness for origin_address as MPIDI_RMA_Op_t
-         * contain both PUT and GET like ops */
+
+        accum_pkt = &(new_ptr->pkt.accum);
+
+        MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_ACCUMULATE);
+        accum_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
+            win_ptr->disp_units[target_rank] * target_disp;
+        accum_pkt->count = target_count;
+        accum_pkt->datatype = target_datatype;
+        accum_pkt->dataloop_size = 0;
+        accum_pkt->op = op;
+        accum_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
+        accum_pkt->source_win_handle = win_ptr->handle;
+
 	new_ptr->origin_addr = (void *) origin_addr;
 	new_ptr->origin_count = origin_count;
 	new_ptr->origin_datatype = origin_datatype;
-	new_ptr->target_rank = target_rank;
-	new_ptr->target_disp = target_disp;
-	new_ptr->target_count = target_count;
-	new_ptr->target_datatype = target_datatype;
-	new_ptr->op = op;
+        new_ptr->target_rank = target_rank;
 	MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
 	
 	/* if source or target datatypes are derived, increment their
diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 740192b..7fb9a42 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -1017,24 +1017,10 @@ static int send_unlock_msg(int dest, MPID_Win *win_ptr);
 /* static int send_flush_msg(int dest, MPID_Win *win_ptr); */
 static int wait_for_lock_granted(MPID_Win *win_ptr, int target_rank);
 static int acquire_local_lock(MPID_Win *win_ptr, int lock_mode);
-static int send_rma_msg(MPIDI_RMA_Op_t * rma_op, MPID_Win * win_ptr,
-                                   MPIDI_CH3_Pkt_flags_t flags,
-				   MPI_Win source_win_handle, 
-				   MPI_Win target_win_handle, 
-				   MPIDI_RMA_dtype_info * dtype_info, 
-				   void ** dataloop, MPID_Request ** request);
-static int recv_rma_msg(MPIDI_RMA_Op_t * rma_op, MPID_Win * win_ptr,
-                                   MPIDI_CH3_Pkt_flags_t flags,
-				   MPI_Win source_win_handle, 
-				   MPI_Win target_win_handle, 
-				   MPIDI_RMA_dtype_info * dtype_info, 
-				   void ** dataloop, MPID_Request ** request); 
-static int send_contig_acc_msg(MPIDI_RMA_Op_t *, MPID_Win *,
-                                          MPIDI_CH3_Pkt_flags_t flags,
-					  MPI_Win, MPI_Win, MPID_Request ** );
-static int send_immed_rmw_msg(MPIDI_RMA_Op_t *, MPID_Win *,
-                                         MPIDI_CH3_Pkt_flags_t flags,
-                                         MPI_Win, MPI_Win, MPID_Request ** );
+static int send_rma_msg(MPIDI_RMA_Op_t * rma_op, MPID_Win * win_ptr, MPIDI_CH3_Pkt_flags_t flags);
+static int recv_rma_msg(MPIDI_RMA_Op_t * rma_op, MPID_Win * win_ptr, MPIDI_CH3_Pkt_flags_t flags);
+static int send_contig_acc_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr, MPIDI_CH3_Pkt_flags_t flags);
+static int send_immed_rmw_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr, MPIDI_CH3_Pkt_flags_t flags);
 static int do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
                                             int *wait_for_rma_done_pkt,
                                             MPIDI_CH3_Pkt_flags_t sync_flags);
@@ -1056,60 +1042,30 @@ static int create_datatype(const MPIDI_RMA_dtype_info *dtype_info,
 /* Issue an RMA operation -- Before calling this macro, you must define the
  * MPIDI_CH3I_TRACK_RMA_WRITE helper macro.  This macro defines any extra action
  * that should be taken when a write (put/acc) operation is encountered. */
-#define MPIDI_CH3I_ISSUE_RMA_OP(op_ptr_, win_ptr_, flags_, source_win_handle_, target_win_handle_,err_) \
+#define MPIDI_CH3I_ISSUE_RMA_OP(op_ptr_, win_ptr_, flags_, err_) \
     do {                                                                                        \
-    switch ((op_ptr_)->type)                                                                    \
+        switch ((op_ptr_)->pkt.type)                                    \
     {                                                                                           \
-        case (MPIDI_RMA_PUT):                                                                   \
-        case (MPIDI_RMA_ACCUMULATE):                                                            \
+        case (MPIDI_CH3_PKT_PUT):                                                                   \
+        case (MPIDI_CH3_PKT_ACCUMULATE):                                                            \
+        case (MPIDI_CH3_PKT_GET_ACCUM):                                                        \
             MPIDI_CH3I_TRACK_RMA_WRITE(op_ptr_, win_ptr_);                                      \
-            (err_) = send_rma_msg((op_ptr_), (win_ptr_), (flags_), (source_win_handle_),        \
-                                                (target_win_handle_), &(op_ptr_)->dtype_info,   \
-                                                &(op_ptr_)->dataloop, &(op_ptr_)->request);     \
+            (err_) = send_rma_msg((op_ptr_), (win_ptr_), (flags_));     \
             if (err_) { MPIU_ERR_POP(err_); }                                                   \
             break;                                                                              \
-        case (MPIDI_RMA_GET_ACCUMULATE):                                                        \
-            if ((op_ptr_)->op == MPI_NO_OP) {                                                   \
-                /* Note: Origin arguments are ignored for NO_OP, so we don't                    \
-                 * need to release a ref to the origin datatype. */                             \
-                                                                                                \
-                /* Convert the GAcc to a Get */                                                 \
-                (op_ptr_)->type            = MPIDI_RMA_GET;                                     \
-                (op_ptr_)->origin_addr     = (op_ptr_)->result_addr;                            \
-                (op_ptr_)->origin_count    = (op_ptr_)->result_count;                           \
-                (op_ptr_)->origin_datatype = (op_ptr_)->result_datatype;                        \
-                                                                                                \
-                (err_) = recv_rma_msg((op_ptr_), (win_ptr_), (flags_), (source_win_handle_),    \
-                                                    (target_win_handle_), &(op_ptr_)->dtype_info,\
-                                                    &(op_ptr_)->dataloop, &(op_ptr_)->request); \
-            } else {                                                                            \
-                MPIDI_CH3I_TRACK_RMA_WRITE(op_ptr_, win_ptr_);                                  \
-                (err_) = send_rma_msg((op_ptr_), (win_ptr_), (flags_), (source_win_handle_),    \
-                                                    (target_win_handle_), &(op_ptr_)->dtype_info,\
-                                                    &(op_ptr_)->dataloop, &(op_ptr_)->request); \
-            }                                                                                   \
-            if (err_) { MPIU_ERR_POP(err_); }                                                   \
-            break;                                                                              \
-        case MPIDI_RMA_ACC_CONTIG:                                                              \
+        case MPIDI_CH3_PKT_ACCUM_IMMED:                                                              \
             MPIDI_CH3I_TRACK_RMA_WRITE(op_ptr_, win_ptr_);                                      \
-            (err_) = send_contig_acc_msg((op_ptr_), (win_ptr_), (flags_),                       \
-                                                       (source_win_handle_), (target_win_handle_),\
-                                                       &(op_ptr_)->request );                   \
+            (err_) = send_contig_acc_msg((op_ptr_), (win_ptr_), (flags_)); \
             if (err_) { MPIU_ERR_POP(err_); }                                                   \
             break;                                                                              \
-        case (MPIDI_RMA_GET):                                                                   \
-            (err_) = recv_rma_msg((op_ptr_), (win_ptr_), (flags_),                              \
-                                                (source_win_handle_), (target_win_handle_),     \
-                                                &(op_ptr_)->dtype_info,                         \
-                                                &(op_ptr_)->dataloop, &(op_ptr_)->request);     \
+        case (MPIDI_CH3_PKT_GET):                                                                   \
+            (err_) = recv_rma_msg((op_ptr_), (win_ptr_), (flags_));     \
             if (err_) { MPIU_ERR_POP(err_); }                                                   \
             break;                                                                              \
-        case (MPIDI_RMA_COMPARE_AND_SWAP):                                                      \
-        case (MPIDI_RMA_FETCH_AND_OP):                                                          \
+        case (MPIDI_CH3_PKT_CAS):                                                      \
+        case (MPIDI_CH3_PKT_FOP):                                                          \
             MPIDI_CH3I_TRACK_RMA_WRITE(op_ptr_, win_ptr_);                                      \
-            (err_) = send_immed_rmw_msg((op_ptr_), (win_ptr_), (flags_),                        \
-                                                      (source_win_handle_), (target_win_handle_),\
-                                                      &(op_ptr_)->request );                    \
+            (err_) = send_immed_rmw_msg((op_ptr_), (win_ptr_), (flags_)); \
             if (err_) { MPIU_ERR_POP(err_); }                                                   \
             break;                                                                              \
                                                                                                 \
@@ -1131,7 +1087,6 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
     MPIDI_RMA_Op_t *curr_ptr;
     MPIDI_RMA_Ops_list_t *ops_list;
     MPID_Comm *comm_ptr;
-    MPI_Win source_win_handle, target_win_handle;
     MPID_Progress_state progress_state;
     int errflag = FALSE;
     MPIU_CHKLMEM_DECL(3);
@@ -1310,12 +1265,8 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
                 flags = MPIDI_CH3_PKT_FLAG_RMA_AT_COMPLETE;
             }
 
-            source_win_handle = win_ptr->handle;
-	    target_win_handle = win_ptr->all_win_handles[curr_ptr->target_rank];
-
 #define MPIDI_CH3I_TRACK_RMA_WRITE(op_ptr_, win_ptr_) /* Not used by active mode */
-            MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, flags,
-                                    source_win_handle, target_win_handle, mpi_errno);
+            MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, flags, mpi_errno);
 #undef MPIDI_CH3I_TRACK_RMA_WRITE
 
 	    i++;
@@ -1497,16 +1448,10 @@ static int create_datatype(const MPIDI_RMA_dtype_info *dtype_info,
 #define FUNCNAME send_rma_msg
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-static int send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
-                                   MPIDI_CH3_Pkt_flags_t flags,
-				   MPI_Win source_win_handle, 
-				   MPI_Win target_win_handle, 
-				   MPIDI_RMA_dtype_info *dtype_info, 
-				   void **dataloop, MPID_Request **request) 
+static int send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr, MPIDI_CH3_Pkt_flags_t flags)
 {
-    MPIDI_CH3_Pkt_t upkt;
-    MPIDI_CH3_Pkt_put_t *put_pkt = &upkt.put;
-    MPIDI_CH3_Pkt_accum_t *accum_pkt = &upkt.accum;
+    MPIDI_CH3_Pkt_put_t *put_pkt = &rma_op->pkt.put;
+    MPIDI_CH3_Pkt_accum_t *accum_pkt = &rma_op->pkt.accum;
     MPID_IOV iov[MPID_IOV_LIMIT];
     int mpi_errno=MPI_SUCCESS;
     int origin_dt_derived, target_dt_derived, iovcnt;
@@ -1514,6 +1459,7 @@ static int send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
     MPIDI_VC_t * vc;
     MPID_Comm *comm_ptr;
     MPID_Datatype *target_dtp=NULL, *origin_dtp=NULL;
+    MPI_Datatype target_datatype;
     MPID_Request *resp_req=NULL;
     MPIU_CHKPMEM_DECL(1);
     MPIDI_STATE_DECL(MPID_STATE_SEND_RMA_MSG);
@@ -1521,24 +1467,15 @@ static int send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
 
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_RMA_MSG);
 
-    *request = NULL;
+    rma_op->request = NULL;
 
-    if (rma_op->type == MPIDI_RMA_PUT)
+    if (rma_op->pkt.type == MPIDI_CH3_PKT_PUT)
     {
-        MPIDI_Pkt_init(put_pkt, MPIDI_CH3_PKT_PUT);
-        put_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
-            win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
         put_pkt->flags = flags;
-        put_pkt->count = rma_op->target_count;
-        put_pkt->datatype = rma_op->target_datatype;
-        put_pkt->dataloop_size = 0;
-        put_pkt->target_win_handle = target_win_handle;
-        put_pkt->source_win_handle = source_win_handle;
-        
         iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) put_pkt;
         iov[0].MPID_IOV_LEN = sizeof(*put_pkt);
     }
-    else if (rma_op->type == MPIDI_RMA_GET_ACCUMULATE)
+    else if (rma_op->pkt.type == MPIDI_CH3_PKT_GET_ACCUM)
     {
         /* Create a request for the GACC response.  Store the response buf, count, and
            datatype in it, and pass the request's handle in the GACC packet. When the
@@ -1551,8 +1488,8 @@ static int send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
         resp_req->dev.user_buf = rma_op->result_addr;
         resp_req->dev.user_count = rma_op->result_count;
         resp_req->dev.datatype = rma_op->result_datatype;
-        resp_req->dev.target_win_handle = target_win_handle;
-        resp_req->dev.source_win_handle = source_win_handle;
+        resp_req->dev.target_win_handle = accum_pkt->target_win_handle;
+        resp_req->dev.source_win_handle = accum_pkt->source_win_handle;
 
         if (!MPIR_DATATYPE_IS_PREDEFINED(resp_req->dev.datatype)) {
             MPID_Datatype *result_dtp = NULL;
@@ -1563,34 +1500,14 @@ static int send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
         }
 
         /* Note: Get_accumulate uses the same packet type as accumulate */
-        MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_GET_ACCUM);
-        accum_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
-            win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
-        accum_pkt->flags = flags;
-        accum_pkt->count = rma_op->target_count;
-        accum_pkt->datatype = rma_op->target_datatype;
-        accum_pkt->dataloop_size = 0;
-        accum_pkt->op = rma_op->op;
-        accum_pkt->target_win_handle = target_win_handle;
-        accum_pkt->source_win_handle = source_win_handle;
         accum_pkt->request_handle = resp_req->handle;
-
+        accum_pkt->flags = flags;
         iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) accum_pkt;
         iov[0].MPID_IOV_LEN = sizeof(*accum_pkt);
     }
     else
     {
-        MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_ACCUMULATE);
-        accum_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
-            win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
         accum_pkt->flags = flags;
-        accum_pkt->count = rma_op->target_count;
-        accum_pkt->datatype = rma_op->target_datatype;
-        accum_pkt->dataloop_size = 0;
-        accum_pkt->op = rma_op->op;
-        accum_pkt->target_win_handle = target_win_handle;
-        accum_pkt->source_win_handle = source_win_handle;
-
         iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) accum_pkt;
         iov[0].MPID_IOV_LEN = sizeof(*accum_pkt);
     }
@@ -1613,10 +1530,11 @@ static int send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
         origin_dt_derived = 0;
     }
 
-    if (!MPIR_DATATYPE_IS_PREDEFINED(rma_op->target_datatype))
+    MPIDI_CH3_PKT_RMA_GET_TARGET_DATATYPE(rma_op->pkt, target_datatype);
+    if (!MPIR_DATATYPE_IS_PREDEFINED(target_datatype))
     {
         target_dt_derived = 1;
-        MPID_Datatype_get_ptr(rma_op->target_datatype, target_dtp);
+        MPID_Datatype_get_ptr(target_datatype, target_dtp);
     }
     else
     {
@@ -1626,32 +1544,32 @@ static int send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
     if (target_dt_derived)
     {
         /* derived datatype on target. fill derived datatype info */
-        dtype_info->is_contig = target_dtp->is_contig;
-        dtype_info->max_contig_blocks = target_dtp->max_contig_blocks;
-        dtype_info->size = target_dtp->size;
-        dtype_info->extent = target_dtp->extent;
-        dtype_info->dataloop_size = target_dtp->dataloop_size;
-        dtype_info->dataloop_depth = target_dtp->dataloop_depth;
-        dtype_info->eltype = target_dtp->eltype;
-        dtype_info->dataloop = target_dtp->dataloop;
-        dtype_info->ub = target_dtp->ub;
-        dtype_info->lb = target_dtp->lb;
-        dtype_info->true_ub = target_dtp->true_ub;
-        dtype_info->true_lb = target_dtp->true_lb;
-        dtype_info->has_sticky_ub = target_dtp->has_sticky_ub;
-        dtype_info->has_sticky_lb = target_dtp->has_sticky_lb;
-
-	MPIU_CHKPMEM_MALLOC(*dataloop, void *, target_dtp->dataloop_size, 
+        rma_op->dtype_info.is_contig = target_dtp->is_contig;
+        rma_op->dtype_info.max_contig_blocks = target_dtp->max_contig_blocks;
+        rma_op->dtype_info.size = target_dtp->size;
+        rma_op->dtype_info.extent = target_dtp->extent;
+        rma_op->dtype_info.dataloop_size = target_dtp->dataloop_size;
+        rma_op->dtype_info.dataloop_depth = target_dtp->dataloop_depth;
+        rma_op->dtype_info.eltype = target_dtp->eltype;
+        rma_op->dtype_info.dataloop = target_dtp->dataloop;
+        rma_op->dtype_info.ub = target_dtp->ub;
+        rma_op->dtype_info.lb = target_dtp->lb;
+        rma_op->dtype_info.true_ub = target_dtp->true_ub;
+        rma_op->dtype_info.true_lb = target_dtp->true_lb;
+        rma_op->dtype_info.has_sticky_ub = target_dtp->has_sticky_ub;
+        rma_op->dtype_info.has_sticky_lb = target_dtp->has_sticky_lb;
+
+	MPIU_CHKPMEM_MALLOC(rma_op->dataloop, void *, target_dtp->dataloop_size, 
 			    mpi_errno, "dataloop");
 
 	MPIDI_FUNC_ENTER(MPID_STATE_MEMCPY);
-        MPIU_Memcpy(*dataloop, target_dtp->dataloop, target_dtp->dataloop_size);
+        MPIU_Memcpy(rma_op->dataloop, target_dtp->dataloop, target_dtp->dataloop_size);
 	MPIDI_FUNC_EXIT(MPID_STATE_MEMCPY);
         /* the dataloop can have undefined padding sections, so we need to let
          * valgrind know that it is OK to pass this data to writev later on */
-        MPL_VG_MAKE_MEM_DEFINED(*dataloop, target_dtp->dataloop_size);
+        MPL_VG_MAKE_MEM_DEFINED(rma_op->dataloop, target_dtp->dataloop_size);
 
-        if (rma_op->type == MPIDI_RMA_PUT)
+        if (rma_op->pkt.type == MPIDI_CH3_PKT_PUT)
 	{
             put_pkt->dataloop_size = target_dtp->dataloop_size;
 	}
@@ -1673,36 +1591,36 @@ static int send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
             iov[1].MPID_IOV_LEN = rma_op->origin_count * origin_type_size;
             iovcnt = 2;
 	    MPIU_THREAD_CS_ENTER(CH3COMM,vc);
-            mpi_errno = MPIDI_CH3_iStartMsgv(vc, iov, iovcnt, request);
+            mpi_errno = MPIDI_CH3_iStartMsgv(vc, iov, iovcnt, &rma_op->request);
 	    MPIU_THREAD_CS_EXIT(CH3COMM,vc);
             MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
         }
         else
         {
             /* derived datatype on origin */
-            *request = MPID_Request_create();
-            MPIU_ERR_CHKANDJUMP(*request == NULL,mpi_errno,MPI_ERR_OTHER,"**nomemreq");
+            rma_op->request = MPID_Request_create();
+            MPIU_ERR_CHKANDJUMP(rma_op->request == NULL,mpi_errno,MPI_ERR_OTHER,"**nomemreq");
             
-            MPIU_Object_set_ref(*request, 2);
-            (*request)->kind = MPID_REQUEST_SEND;
+            MPIU_Object_set_ref(rma_op->request, 2);
+            rma_op->request->kind = MPID_REQUEST_SEND;
             
-            (*request)->dev.segment_ptr = MPID_Segment_alloc( );
-            MPIU_ERR_CHKANDJUMP1((*request)->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
+            rma_op->request->dev.segment_ptr = MPID_Segment_alloc( );
+            MPIU_ERR_CHKANDJUMP1(rma_op->request->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
 
-            (*request)->dev.datatype_ptr = origin_dtp;
+            rma_op->request->dev.datatype_ptr = origin_dtp;
             /* this will cause the datatype to be freed when the request
                is freed. */
             MPID_Segment_init(rma_op->origin_addr, rma_op->origin_count,
                               rma_op->origin_datatype,
-                              (*request)->dev.segment_ptr, 0);
-            (*request)->dev.segment_first = 0;
-            (*request)->dev.segment_size = rma_op->origin_count * origin_type_size;
+                              rma_op->request->dev.segment_ptr, 0);
+            rma_op->request->dev.segment_first = 0;
+            rma_op->request->dev.segment_size = rma_op->origin_count * origin_type_size;
 
-            (*request)->dev.OnFinal = 0;
-            (*request)->dev.OnDataAvail = 0;
+            rma_op->request->dev.OnFinal = 0;
+            rma_op->request->dev.OnDataAvail = 0;
 
 	    MPIU_THREAD_CS_ENTER(CH3COMM,vc);
-            mpi_errno = vc->sendNoncontig_fn(vc, *request, iov[0].MPID_IOV_BUF, iov[0].MPID_IOV_LEN);
+            mpi_errno = vc->sendNoncontig_fn(vc, rma_op->request, iov[0].MPID_IOV_BUF, iov[0].MPID_IOV_LEN);
 	    MPIU_THREAD_CS_EXIT(CH3COMM,vc);
             MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
         }
@@ -1712,36 +1630,36 @@ static int send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
         /* derived datatype on target */
         MPID_Datatype *combined_dtp = NULL;
 
-        *request = MPID_Request_create();
-        if (*request == NULL) {
+        rma_op->request = MPID_Request_create();
+        if (rma_op->request == NULL) {
 	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomemreq");
         }
 
-        MPIU_Object_set_ref(*request, 2);
-        (*request)->kind = MPID_REQUEST_SEND;
+        MPIU_Object_set_ref(rma_op->request, 2);
+        rma_op->request->kind = MPID_REQUEST_SEND;
 
-	(*request)->dev.segment_ptr = MPID_Segment_alloc( );
-        MPIU_ERR_CHKANDJUMP1((*request)->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
+	rma_op->request->dev.segment_ptr = MPID_Segment_alloc( );
+        MPIU_ERR_CHKANDJUMP1(rma_op->request->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
 
         /* create a new datatype containing the dtype_info, dataloop, and origin data */
 
-        mpi_errno = create_datatype(dtype_info, *dataloop, target_dtp->dataloop_size, rma_op->origin_addr,
+        mpi_errno = create_datatype(&rma_op->dtype_info, rma_op->dataloop, target_dtp->dataloop_size, rma_op->origin_addr,
                                     rma_op->origin_count, rma_op->origin_datatype, &combined_dtp);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-        (*request)->dev.datatype_ptr = combined_dtp;
+        rma_op->request->dev.datatype_ptr = combined_dtp;
         /* combined_datatype will be freed when request is freed */
 
         MPID_Segment_init(MPI_BOTTOM, 1, combined_dtp->handle,
-                          (*request)->dev.segment_ptr, 0);
-        (*request)->dev.segment_first = 0;
-        (*request)->dev.segment_size = combined_dtp->size;
+                          rma_op->request->dev.segment_ptr, 0);
+        rma_op->request->dev.segment_first = 0;
+        rma_op->request->dev.segment_size = combined_dtp->size;
 
-        (*request)->dev.OnFinal = 0;
-        (*request)->dev.OnDataAvail = 0;
+        rma_op->request->dev.OnFinal = 0;
+        rma_op->request->dev.OnDataAvail = 0;
 
 	MPIU_THREAD_CS_ENTER(CH3COMM,vc);
-        mpi_errno = vc->sendNoncontig_fn(vc, *request, iov[0].MPID_IOV_BUF, iov[0].MPID_IOV_LEN);
+        mpi_errno = vc->sendNoncontig_fn(vc, rma_op->request, iov[0].MPID_IOV_BUF, iov[0].MPID_IOV_LEN);
 	MPIU_THREAD_CS_EXIT(CH3COMM,vc);
         MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
 
@@ -1754,7 +1672,7 @@ static int send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
     /* This operation can generate two requests; one for inbound and one for
        outbound data. */
     if (resp_req != NULL) {
-        if (*request != NULL) {
+        if (rma_op->request != NULL) {
             /* If we have both inbound and outbound requests (i.e. GACC
                operation), we need to ensure that the source buffer is
                available and that the response data has been received before
@@ -1770,11 +1688,11 @@ static int send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
                it will be completed by the progress engine.
              */
 
-            MPID_Request_release(*request);
-            *request = resp_req;
+            MPID_Request_release(rma_op->request);
+            rma_op->request = resp_req;
 
         } else {
-            *request = resp_req;
+            rma_op->request = resp_req;
         }
 
         /* For error checking */
@@ -1790,14 +1708,14 @@ static int send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
     if (resp_req) {
         MPID_Request_release(resp_req);
     }
-    if (*request)
+    if (rma_op->request)
     {
         MPIU_CHKPMEM_REAP();
-        if ((*request)->dev.datatype_ptr)
-            MPID_Datatype_release((*request)->dev.datatype_ptr);
-        MPID_Request_release(*request);
+        if (rma_op->request->dev.datatype_ptr)
+            MPID_Datatype_release(rma_op->request->dev.datatype_ptr);
+        MPID_Request_release(rma_op->request);
     }
-    *request = NULL;
+    rma_op->request = NULL;
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
@@ -1809,15 +1727,9 @@ static int send_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
 #define FUNCNAME send_contig_acc_msg
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-static int send_contig_acc_msg(MPIDI_RMA_Op_t *rma_op,
-					  MPID_Win *win_ptr,
-                                          MPIDI_CH3_Pkt_flags_t flags,
-					  MPI_Win source_win_handle, 
-					  MPI_Win target_win_handle, 
-					  MPID_Request **request) 
+static int send_contig_acc_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr, MPIDI_CH3_Pkt_flags_t flags)
 {
-    MPIDI_CH3_Pkt_t upkt;
-    MPIDI_CH3_Pkt_accum_t *accum_pkt = &upkt.accum;
+    MPIDI_CH3_Pkt_accum_t *accum_pkt = &rma_op->pkt.accum;
     MPID_IOV iov[MPID_IOV_LIMIT];
     int mpi_errno=MPI_SUCCESS;
     int iovcnt;
@@ -1829,24 +1741,16 @@ static int send_contig_acc_msg(MPIDI_RMA_Op_t *rma_op,
 
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_CONTIG_ACC_MSG);
 
-    *request = NULL;
+    rma_op->request = NULL;
 
     MPID_Datatype_get_size_macro(rma_op->origin_datatype, origin_type_size);
     /* FIXME: Make this size check efficient and match the packet type */
     MPIU_Assign_trunc(len, rma_op->origin_count * origin_type_size, size_t);
     if (MPIR_CVAR_CH3_RMA_ACC_IMMED && len <= MPIDI_RMA_IMMED_INTS*sizeof(int)) {
-	MPIDI_CH3_Pkt_accum_immed_t * accumi_pkt = &upkt.accum_immed;
+	MPIDI_CH3_Pkt_accum_immed_t * accumi_pkt = &rma_op->pkt.accum_immed;
 	void *dest = accumi_pkt->data, *src = rma_op->origin_addr;
 	
-	MPIDI_Pkt_init(accumi_pkt, MPIDI_CH3_PKT_ACCUM_IMMED);
-	accumi_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
-	    win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
         accumi_pkt->flags = flags;
-	accumi_pkt->count = rma_op->target_count;
-	accumi_pkt->datatype = rma_op->target_datatype;
-	accumi_pkt->op = rma_op->op;
-	accumi_pkt->target_win_handle = target_win_handle;
-	accumi_pkt->source_win_handle = source_win_handle;
 	
 	switch (len) {
 	case 1: *(uint8_t *)dest  = *(uint8_t *)src;  break;
@@ -1859,23 +1763,13 @@ static int send_contig_acc_msg(MPIDI_RMA_Op_t *rma_op,
 	comm_ptr = win_ptr->comm_ptr;
 	MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
 	MPIU_THREAD_CS_ENTER(CH3COMM,vc);
-	mpi_errno = MPIDI_CH3_iStartMsg(vc, accumi_pkt, sizeof(*accumi_pkt), request);
+	mpi_errno = MPIDI_CH3_iStartMsg(vc, accumi_pkt, sizeof(*accumi_pkt), &rma_op->request);
 	MPIU_THREAD_CS_EXIT(CH3COMM,vc);
 	MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
 	goto fn_exit;
     }
 
-    MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_ACCUMULATE);
-    accum_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
-	win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
     accum_pkt->flags = flags;
-    accum_pkt->count = rma_op->target_count;
-    accum_pkt->datatype = rma_op->target_datatype;
-    accum_pkt->dataloop_size = 0;
-    accum_pkt->op = rma_op->op;
-    accum_pkt->target_win_handle = target_win_handle;
-    accum_pkt->source_win_handle = source_win_handle;
-    
     iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) accum_pkt;
     iov[0].MPID_IOV_LEN = sizeof(*accum_pkt);
 
@@ -1898,7 +1792,7 @@ static int send_contig_acc_msg(MPIDI_RMA_Op_t *rma_op,
     iov[1].MPID_IOV_LEN = rma_op->origin_count * origin_type_size;
     iovcnt = 2;
     MPIU_THREAD_CS_ENTER(CH3COMM,vc);
-    mpi_errno = MPIDI_CH3_iStartMsgv(vc, iov, iovcnt, request);
+    mpi_errno = MPIDI_CH3_iStartMsgv(vc, iov, iovcnt, &rma_op->request);
     MPIU_THREAD_CS_EXIT(CH3COMM,vc);
     MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
 
@@ -1907,11 +1801,11 @@ static int send_contig_acc_msg(MPIDI_RMA_Op_t *rma_op,
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
  fn_fail:
-    if (*request)
+    if (rma_op->request)
     {
-        MPID_Request_release(*request);
+        MPID_Request_release(rma_op->request);
     }
-    *request = NULL;
+    rma_op->request = NULL;
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
@@ -1924,15 +1818,10 @@ static int send_contig_acc_msg(MPIDI_RMA_Op_t *rma_op,
 #define FUNCNAME send_immed_rmw_msg
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-static int send_immed_rmw_msg(MPIDI_RMA_Op_t *rma_op,
-                                         MPID_Win *win_ptr,
-                                         MPIDI_CH3_Pkt_flags_t flags,
-                                         MPI_Win source_win_handle, 
-                                         MPI_Win target_win_handle, 
-                                         MPID_Request **request) 
+static int send_immed_rmw_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr, MPIDI_CH3_Pkt_flags_t flags)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_Request *rmw_req = NULL, *resp_req = NULL;
+    MPID_Request *rmw_req = NULL;
     MPIDI_VC_t *vc;
     MPID_Comm *comm_ptr;
     MPI_Aint len;
@@ -1940,45 +1829,37 @@ static int send_immed_rmw_msg(MPIDI_RMA_Op_t *rma_op,
 
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_IMMED_RMW_MSG);
 
-    *request = NULL;
+    rma_op->request = NULL;
 
     /* Create a request for the RMW response.  Store the origin buf, count, and
        datatype in it, and pass the request's handle RMW packet. When the
        response comes from the target, it will contain the request handle. */
-    resp_req = MPID_Request_create();
-    MPIU_ERR_CHKANDJUMP(resp_req == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");
-    *request = resp_req;
+    rma_op->request = MPID_Request_create();
+    MPIU_ERR_CHKANDJUMP(rma_op->request == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");
 
     /* Set refs on the request to 2: one for the response message, and one for
        the partial completion handler */
-    MPIU_Object_set_ref(resp_req, 2);
+    MPIU_Object_set_ref(rma_op->request, 2);
 
-    resp_req->dev.user_buf = rma_op->result_addr;
-    resp_req->dev.user_count = rma_op->result_count;
-    resp_req->dev.datatype = rma_op->result_datatype;
-    resp_req->dev.target_win_handle = target_win_handle;
-    resp_req->dev.source_win_handle = source_win_handle;
+    rma_op->request->dev.user_buf = rma_op->result_addr;
+    rma_op->request->dev.user_count = rma_op->result_count;
+    rma_op->request->dev.datatype = rma_op->result_datatype;
 
     /* REQUIRE: All datatype arguments must be of the same, builtin
                 type and counts must be 1. */
     MPID_Datatype_get_size_macro(rma_op->origin_datatype, len);
     comm_ptr = win_ptr->comm_ptr;
 
-    if (rma_op->type == MPIDI_RMA_COMPARE_AND_SWAP) {
-        MPIDI_CH3_Pkt_t upkt;
-        MPIDI_CH3_Pkt_cas_t *cas_pkt = &upkt.cas;
+    if (rma_op->pkt.type == MPIDI_CH3_PKT_CAS) {
+        MPIDI_CH3_Pkt_cas_t *cas_pkt = &rma_op->pkt.cas;
 
         MPIU_Assert(len <= sizeof(MPIDI_CH3_CAS_Immed_u));
 
-        MPIDI_Pkt_init(cas_pkt, MPIDI_CH3_PKT_CAS);
+        rma_op->request->dev.target_win_handle = cas_pkt->target_win_handle;
+        rma_op->request->dev.source_win_handle = cas_pkt->source_win_handle;
 
-        cas_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
-            win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
+        cas_pkt->request_handle = rma_op->request->handle;
         cas_pkt->flags = flags;
-        cas_pkt->datatype = rma_op->target_datatype;
-        cas_pkt->target_win_handle = target_win_handle;
-        cas_pkt->request_handle = resp_req->handle;
-
         MPIU_Memcpy( (void *) &cas_pkt->origin_data, rma_op->origin_addr, len );
         MPIU_Memcpy( (void *) &cas_pkt->compare_data, rma_op->compare_addr, len );
 
@@ -1993,25 +1874,20 @@ static int send_immed_rmw_msg(MPIDI_RMA_Op_t *rma_op,
         }
     }
 
-    else if (rma_op->type == MPIDI_RMA_FETCH_AND_OP) {
-        MPIDI_CH3_Pkt_t upkt;
-        MPIDI_CH3_Pkt_fop_t *fop_pkt = &upkt.fop;
+    else if (rma_op->pkt.type == MPIDI_CH3_PKT_FOP) {
+        MPIDI_CH3_Pkt_fop_t *fop_pkt = &rma_op->pkt.fop;
 
         MPIU_Assert(len <= sizeof(MPIDI_CH3_FOP_Immed_u));
 
-        MPIDI_Pkt_init(fop_pkt, MPIDI_CH3_PKT_FOP);
+        rma_op->request->dev.target_win_handle = fop_pkt->target_win_handle;
+        rma_op->request->dev.source_win_handle = fop_pkt->source_win_handle;
 
-        fop_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
-            win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
+        fop_pkt->request_handle = rma_op->request->handle;
         fop_pkt->flags = flags;
-        fop_pkt->datatype = rma_op->target_datatype;
-        fop_pkt->target_win_handle = target_win_handle;
-        fop_pkt->request_handle = resp_req->handle;
-        fop_pkt->op = rma_op->op;
 
-        if (len <= sizeof(fop_pkt->origin_data) || rma_op->op == MPI_NO_OP) {
+        if (len <= sizeof(fop_pkt->origin_data) || fop_pkt->op == MPI_NO_OP) {
             /* Embed FOP data in the packet header */
-            if (rma_op->op != MPI_NO_OP) {
+            if (fop_pkt->op != MPI_NO_OP) {
                 MPIU_Memcpy( fop_pkt->origin_data, rma_op->origin_addr, len );
             }
 
@@ -2058,10 +1934,10 @@ fn_exit:
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
 fn_fail:
-    if (*request) {
-        MPID_Request_release(*request);
+    if (rma_op->request) {
+        MPID_Request_release(rma_op->request);
     }
-    *request = NULL;
+    rma_op->request = NULL;
     if (rmw_req) {
         MPID_Request_release(rmw_req);
     }
@@ -2075,20 +1951,15 @@ fn_fail:
 #define FUNCNAME recv_rma_msg
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-static int recv_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
-                                   MPIDI_CH3_Pkt_flags_t flags,
-				   MPI_Win source_win_handle, 
-				   MPI_Win target_win_handle, 
-				   MPIDI_RMA_dtype_info *dtype_info, 
-				   void **dataloop, MPID_Request **request) 
+static int recv_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr, MPIDI_CH3_Pkt_flags_t flags)
 {
-    MPIDI_CH3_Pkt_t upkt;
-    MPIDI_CH3_Pkt_get_t *get_pkt = &upkt.get;
+    MPIDI_CH3_Pkt_get_t *get_pkt = &rma_op->pkt.get;
     int mpi_errno=MPI_SUCCESS;
     MPIDI_VC_t * vc;
     MPID_Comm *comm_ptr;
-    MPID_Request *req = NULL;
     MPID_Datatype *dtp;
+    MPI_Datatype target_datatype;
+    MPID_Request *req = NULL;
     MPID_IOV iov[MPID_IOV_LIMIT];
     MPIU_CHKPMEM_DECL(1);
     MPIDI_STATE_DECL(MPID_STATE_RECV_RMA_MSG);
@@ -2100,37 +1971,28 @@ static int recv_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
        and pass a handle to it in the get packet. When the get
        response comes from the target, it will contain the request
        handle. */  
-    req = MPID_Request_create();
-    if (req == NULL) {
+    rma_op->request = MPID_Request_create();
+    if (rma_op->request == NULL) {
 	MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomemreq");
     }
 
-    *request = req;
+    MPIU_Object_set_ref(rma_op->request, 2);
 
-    MPIU_Object_set_ref(req, 2);
-
-    req->dev.user_buf = rma_op->origin_addr;
-    req->dev.user_count = rma_op->origin_count;
-    req->dev.datatype = rma_op->origin_datatype;
-    req->dev.target_win_handle = MPI_WIN_NULL;
-    req->dev.source_win_handle = source_win_handle;
-    if (!MPIR_DATATYPE_IS_PREDEFINED(req->dev.datatype))
+    rma_op->request->dev.user_buf = rma_op->origin_addr;
+    rma_op->request->dev.user_count = rma_op->origin_count;
+    rma_op->request->dev.datatype = rma_op->origin_datatype;
+    rma_op->request->dev.target_win_handle = MPI_WIN_NULL;
+    rma_op->request->dev.source_win_handle = get_pkt->source_win_handle;
+    if (!MPIR_DATATYPE_IS_PREDEFINED(rma_op->request->dev.datatype))
     {
-        MPID_Datatype_get_ptr(req->dev.datatype, dtp);
-        req->dev.datatype_ptr = dtp;
+        MPID_Datatype_get_ptr(rma_op->request->dev.datatype, dtp);
+        rma_op->request->dev.datatype_ptr = dtp;
         /* this will cause the datatype to be freed when the
            request is freed. */  
     }
 
-    MPIDI_Pkt_init(get_pkt, MPIDI_CH3_PKT_GET);
-    get_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
-        win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
+    get_pkt->request_handle = rma_op->request->handle;
     get_pkt->flags = flags;
-    get_pkt->count = rma_op->target_count;
-    get_pkt->datatype = rma_op->target_datatype;
-    get_pkt->request_handle = req->handle;
-    get_pkt->target_win_handle = target_win_handle;
-    get_pkt->source_win_handle = source_win_handle;
 
 /*    printf("send pkt: type %d, addr %d, count %d, base %d\n", rma_pkt->type,
            rma_pkt->addr, rma_pkt->count, win_ptr->base_addrs[rma_op->target_rank]);
@@ -2140,7 +2002,8 @@ static int recv_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
     comm_ptr = win_ptr->comm_ptr;
     MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
 
-    if (MPIR_DATATYPE_IS_PREDEFINED(rma_op->target_datatype))
+    MPIDI_CH3_PKT_RMA_GET_TARGET_DATATYPE(rma_op->pkt, target_datatype);
+    if (MPIR_DATATYPE_IS_PREDEFINED(target_datatype))
     {
         /* basic datatype on target. simply send the get_pkt. */
 	MPIU_THREAD_CS_ENTER(CH3COMM,vc);
@@ -2152,40 +2015,40 @@ static int recv_rma_msg(MPIDI_RMA_Op_t *rma_op, MPID_Win *win_ptr,
         /* derived datatype on target. fill derived datatype info and
            send it along with get_pkt. */
 
-        MPID_Datatype_get_ptr(rma_op->target_datatype, dtp);
-        dtype_info->is_contig = dtp->is_contig;
-        dtype_info->max_contig_blocks = dtp->max_contig_blocks;
-        dtype_info->size = dtp->size;
-        dtype_info->extent = dtp->extent;
-        dtype_info->dataloop_size = dtp->dataloop_size;
-        dtype_info->dataloop_depth = dtp->dataloop_depth;
-        dtype_info->eltype = dtp->eltype;
-        dtype_info->dataloop = dtp->dataloop;
-        dtype_info->ub = dtp->ub;
-        dtype_info->lb = dtp->lb;
-        dtype_info->true_ub = dtp->true_ub;
-        dtype_info->true_lb = dtp->true_lb;
-        dtype_info->has_sticky_ub = dtp->has_sticky_ub;
-        dtype_info->has_sticky_lb = dtp->has_sticky_lb;
-
-	MPIU_CHKPMEM_MALLOC(*dataloop, void *, dtp->dataloop_size, 
+        MPID_Datatype_get_ptr(target_datatype, dtp);
+        rma_op->dtype_info.is_contig = dtp->is_contig;
+        rma_op->dtype_info.max_contig_blocks = dtp->max_contig_blocks;
+        rma_op->dtype_info.size = dtp->size;
+        rma_op->dtype_info.extent = dtp->extent;
+        rma_op->dtype_info.dataloop_size = dtp->dataloop_size;
+        rma_op->dtype_info.dataloop_depth = dtp->dataloop_depth;
+        rma_op->dtype_info.eltype = dtp->eltype;
+        rma_op->dtype_info.dataloop = dtp->dataloop;
+        rma_op->dtype_info.ub = dtp->ub;
+        rma_op->dtype_info.lb = dtp->lb;
+        rma_op->dtype_info.true_ub = dtp->true_ub;
+        rma_op->dtype_info.true_lb = dtp->true_lb;
+        rma_op->dtype_info.has_sticky_ub = dtp->has_sticky_ub;
+        rma_op->dtype_info.has_sticky_lb = dtp->has_sticky_lb;
+
+	MPIU_CHKPMEM_MALLOC(rma_op->dataloop, void *, dtp->dataloop_size, 
 			    mpi_errno, "dataloop");
 
 	MPIDI_FUNC_ENTER(MPID_STATE_MEMCPY);
-        MPIU_Memcpy(*dataloop, dtp->dataloop, dtp->dataloop_size);
+        MPIU_Memcpy(rma_op->dataloop, dtp->dataloop, dtp->dataloop_size);
 	MPIDI_FUNC_EXIT(MPID_STATE_MEMCPY);
 
         /* the dataloop can have undefined padding sections, so we need to let
          * valgrind know that it is OK to pass this data to writev later on */
-        MPL_VG_MAKE_MEM_DEFINED(*dataloop, dtp->dataloop_size);
+        MPL_VG_MAKE_MEM_DEFINED(rma_op->dataloop, dtp->dataloop_size);
 
         get_pkt->dataloop_size = dtp->dataloop_size;
 
         iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)get_pkt;
         iov[0].MPID_IOV_LEN = sizeof(*get_pkt);
-        iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)dtype_info;
-        iov[1].MPID_IOV_LEN = sizeof(*dtype_info);
-        iov[2].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)*dataloop;
+        iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) &rma_op->dtype_info;
+        iov[1].MPID_IOV_LEN = sizeof(rma_op->dtype_info);
+        iov[2].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)rma_op->dataloop;
         iov[2].MPID_IOV_LEN = dtp->dataloop_size;
 
 	MPIU_THREAD_CS_ENTER(CH3COMM,vc);
@@ -2583,7 +2446,6 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
     MPIDI_RMA_Op_t *curr_ptr;
     MPIDI_RMA_Ops_list_t *ops_list;
     MPID_Comm *comm_ptr;
-    MPI_Win source_win_handle, target_win_handle;
     int start_grp_size, *ranks_in_win_grp, rank;
     int nRequest = 0;
     int nRequestNew = 0;
@@ -2684,12 +2546,8 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
             flags = MPIDI_CH3_PKT_FLAG_RMA_AT_COMPLETE;
         }
 
-        source_win_handle = win_ptr->handle;
-	target_win_handle = win_ptr->all_win_handles[curr_ptr->target_rank];
-
 #define MPIDI_CH3I_TRACK_RMA_WRITE(op_ptr_, win_ptr_) /* Not used by active mode */
-            MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, flags,
-                                    source_win_handle, target_win_handle, mpi_errno);
+            MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, flags, mpi_errno);
 #undef MPIDI_CH3I_TRACK_RMA_WRITE
 
 	i++;
@@ -3075,9 +2933,9 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
     if ( MPIR_CVAR_CH3_RMA_MERGE_LOCK_OP_UNLOCK &&
          win_ptr->targets[dest].remote_lock_state == MPIDI_CH3_WIN_LOCK_CALLED &&
          rma_op && rma_op->next == NULL /* There is only one op */ &&
-         rma_op->type != MPIDI_RMA_COMPARE_AND_SWAP &&
-         rma_op->type != MPIDI_RMA_FETCH_AND_OP &&
-         rma_op->type != MPIDI_RMA_GET_ACCUMULATE )
+         rma_op->pkt.type != MPIDI_CH3_PKT_CAS &&
+         rma_op->pkt.type != MPIDI_CH3_PKT_FOP &&
+         rma_op->pkt.type != MPIDI_CH3_PKT_GET_ACCUM )
     {
 	/* Single put, get, or accumulate between the lock and unlock. If it
 	 * is of small size and predefined datatype at the target, we
@@ -3087,18 +2945,20 @@ int MPIDI_Win_unlock(int dest, MPID_Win *win_ptr)
         MPI_Aint type_size;
         MPIDI_VC_t *vc;
         MPIDI_RMA_Op_t *curr_op = rma_op;
+        MPI_Datatype target_datatype;
 
         MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
 
 	MPID_Datatype_get_size_macro(curr_op->origin_datatype, type_size);
 
 	/* msg_sz typically = 65480 */
-	if (MPIR_DATATYPE_IS_PREDEFINED(curr_op->target_datatype) &&
+        MPIDI_CH3_PKT_RMA_GET_TARGET_DATATYPE(curr_op->pkt, target_datatype);
+	if (MPIR_DATATYPE_IS_PREDEFINED(target_datatype) &&
 	     (type_size * curr_op->origin_count <= vc->eager_max_msg_sz) ) {
 	    single_op_opt = 1;
 	    /* Set the lock granted flag to 1 */
 	    win_ptr->targets[dest].remote_lock_state = MPIDI_CH3_WIN_LOCK_GRANTED;
-	    if (curr_op->type == MPIDI_RMA_GET) {
+	    if (curr_op->pkt.type == MPIDI_CH3_PKT_GET) {
 		mpi_errno = send_lock_get(win_ptr, dest);
 		wait_for_rma_done_pkt = 0;
 	    }
@@ -3583,7 +3443,6 @@ static int do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
 {
     int mpi_errno = MPI_SUCCESS, nops;
     MPIDI_RMA_Op_t *curr_ptr;
-    MPI_Win source_win_handle = MPI_WIN_NULL, target_win_handle = MPI_WIN_NULL;
     int nRequest=0, nRequestNew=0;
     MPIDI_STATE_DECL(MPID_STATE_DO_PASSIVE_TARGET_RMA);
 
@@ -3614,10 +3473,10 @@ static int do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
         /* Check if we can piggyback the RMA done acknowlegdement on the last
            operation in the epoch. */
 
-        if (tail->type == MPIDI_RMA_GET ||
-            tail->type == MPIDI_RMA_COMPARE_AND_SWAP ||
-            tail->type == MPIDI_RMA_FETCH_AND_OP ||
-            tail->type == MPIDI_RMA_GET_ACCUMULATE)
+        if (tail->pkt.type == MPIDI_CH3_PKT_GET ||
+            tail->pkt.type == MPIDI_CH3_PKT_CAS ||
+            tail->pkt.type == MPIDI_CH3_PKT_FOP ||
+            tail->pkt.type == MPIDI_CH3_PKT_GET_ACCUM)
         {
             /* last operation sends a response message. no need to wait
                for an additional rma done pkt */
@@ -3633,7 +3492,7 @@ static int do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
             curr_ptr = MPIDI_CH3I_RMA_Ops_head(&win_ptr->targets[target_rank].rma_ops_list);
             
             while (curr_ptr != NULL) {
-                if (curr_ptr->type == MPIDI_RMA_GET) {
+                if (curr_ptr->pkt.type == MPIDI_CH3_PKT_GET) {
 		    /* Found a GET, move it to the end */
                     *wait_for_rma_done_pkt = 0;
 
@@ -3660,10 +3519,6 @@ static int do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
 
     curr_ptr = MPIDI_CH3I_RMA_Ops_head(&win_ptr->targets[target_rank].rma_ops_list);
 
-    if (curr_ptr != NULL) {
-        target_win_handle = win_ptr->all_win_handles[curr_ptr->target_rank];
-    }
-
     while (curr_ptr != NULL)
     {
         MPIDI_CH3_Pkt_flags_t flags = MPIDI_CH3_PKT_FLAG_NONE;
@@ -3709,8 +3564,6 @@ static int do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
             if (*wait_for_rma_done_pkt) {
                 flags |= MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK;
             }
-
-            source_win_handle = win_ptr->handle;
         }
 
         /* Track passive target write operations.  This is used during Win_free
@@ -3719,8 +3572,7 @@ static int do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
 #define MPIDI_CH3I_TRACK_RMA_WRITE(op_, win_ptr_) \
         do { (win_ptr_)->pt_rma_puts_accs[(op_)->target_rank]++; } while (0)
 
-        MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, flags, source_win_handle,
-                                target_win_handle, mpi_errno);
+        MPIDI_CH3I_ISSUE_RMA_OP(curr_ptr, win_ptr, flags, mpi_errno);
 #undef MPIDI_CH3I_TRACK_RMA_WRITE
 
 	/* If the request is null, we can remove it immediately */
@@ -4020,10 +3872,11 @@ static int send_lock_put_or_acc(MPID_Win *win_ptr, int target_rank)
     MPID_Datatype *origin_dtp=NULL;
     MPI_Aint origin_type_size;
     MPIDI_CH3_Pkt_t upkt;
-    MPIDI_CH3_Pkt_lock_put_unlock_t *lock_put_unlock_pkt = 
-	&upkt.lock_put_unlock;
-    MPIDI_CH3_Pkt_lock_accum_unlock_t *lock_accum_unlock_pkt = 
-	&upkt.lock_accum_unlock;
+    MPIDI_CH3_Pkt_lock_put_unlock_t *lock_put_unlock_pkt = &upkt.lock_put_unlock;
+    MPIDI_CH3_Pkt_lock_accum_unlock_t *lock_accum_unlock_pkt = &upkt.lock_accum_unlock;
+    MPIDI_CH3_Pkt_put_t *put_pkt;
+    MPIDI_CH3_Pkt_accum_t *accum_pkt;
+    MPIDI_CH3_Pkt_accum_immed_t *accumi_pkt;
         
     MPIDI_STATE_DECL(MPID_STATE_SEND_LOCK_PUT_OR_ACC);
 
@@ -4035,27 +3888,26 @@ static int send_lock_put_or_acc(MPID_Win *win_ptr, int target_rank)
 
     win_ptr->pt_rma_puts_accs[rma_op->target_rank]++;
 
-    if (rma_op->type == MPIDI_RMA_PUT) {
+    if (rma_op->pkt.type == MPIDI_CH3_PKT_PUT) {
+        put_pkt = &rma_op->pkt.put;
+
         MPIDI_Pkt_init(lock_put_unlock_pkt, MPIDI_CH3_PKT_LOCK_PUT_UNLOCK);
         lock_put_unlock_pkt->flags = MPIDI_CH3_PKT_FLAG_RMA_LOCK |
             MPIDI_CH3_PKT_FLAG_RMA_UNLOCK | MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK;
-        lock_put_unlock_pkt->target_win_handle = 
-            win_ptr->all_win_handles[rma_op->target_rank];
+        lock_put_unlock_pkt->target_win_handle = win_ptr->all_win_handles[rma_op->target_rank];
         lock_put_unlock_pkt->source_win_handle = win_ptr->handle;
         lock_put_unlock_pkt->lock_type = lock_type;
- 
-        lock_put_unlock_pkt->addr = 
-            (char *) win_ptr->base_addrs[rma_op->target_rank] +
-            win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
-        
-        lock_put_unlock_pkt->count = rma_op->target_count;
-        lock_put_unlock_pkt->datatype = rma_op->target_datatype;
+        lock_put_unlock_pkt->addr = put_pkt->addr;
+        lock_put_unlock_pkt->count = put_pkt->count;
+        lock_put_unlock_pkt->datatype = put_pkt->datatype;
 
         iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) lock_put_unlock_pkt;
         iov[0].MPID_IOV_LEN = sizeof(*lock_put_unlock_pkt);
     }
     
-    else if (rma_op->type == MPIDI_RMA_ACCUMULATE) {        
+    else if (rma_op->pkt.type == MPIDI_CH3_PKT_ACCUMULATE) {        
+        accum_pkt = &rma_op->pkt.accum;
+
         MPIDI_Pkt_init(lock_accum_unlock_pkt, MPIDI_CH3_PKT_LOCK_ACCUM_UNLOCK);
         lock_accum_unlock_pkt->flags = MPIDI_CH3_PKT_FLAG_RMA_LOCK |
             MPIDI_CH3_PKT_FLAG_RMA_UNLOCK | MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK;
@@ -4063,19 +3915,17 @@ static int send_lock_put_or_acc(MPID_Win *win_ptr, int target_rank)
             win_ptr->all_win_handles[rma_op->target_rank];
         lock_accum_unlock_pkt->source_win_handle = win_ptr->handle;
         lock_accum_unlock_pkt->lock_type = lock_type;
-
-        lock_accum_unlock_pkt->addr = 
-            (char *) win_ptr->base_addrs[rma_op->target_rank] +
-            win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
-        
-        lock_accum_unlock_pkt->count = rma_op->target_count;
-        lock_accum_unlock_pkt->datatype = rma_op->target_datatype;
-        lock_accum_unlock_pkt->op = rma_op->op;
+        lock_accum_unlock_pkt->addr = accum_pkt->addr;
+        lock_accum_unlock_pkt->count = accum_pkt->count;
+        lock_accum_unlock_pkt->datatype = accum_pkt->datatype;
+        lock_accum_unlock_pkt->op = accum_pkt->op;
 
         iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) lock_accum_unlock_pkt;
         iov[0].MPID_IOV_LEN = sizeof(*lock_accum_unlock_pkt);
     }
-    else if (rma_op->type == MPIDI_RMA_ACC_CONTIG) {
+    else if (rma_op->pkt.type == MPIDI_CH3_PKT_ACCUM_IMMED) {
+        accumi_pkt = &rma_op->pkt.accum_immed;
+
         MPIDI_Pkt_init(lock_accum_unlock_pkt, MPIDI_CH3_PKT_LOCK_ACCUM_UNLOCK);
         lock_accum_unlock_pkt->flags = MPIDI_CH3_PKT_FLAG_RMA_LOCK |
             MPIDI_CH3_PKT_FLAG_RMA_UNLOCK | MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK;
@@ -4083,14 +3933,10 @@ static int send_lock_put_or_acc(MPID_Win *win_ptr, int target_rank)
             win_ptr->all_win_handles[rma_op->target_rank];
         lock_accum_unlock_pkt->source_win_handle = win_ptr->handle;
         lock_accum_unlock_pkt->lock_type = lock_type;
-
-        lock_accum_unlock_pkt->addr = 
-            (char *) win_ptr->base_addrs[rma_op->target_rank] +
-            win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
-        
-        lock_accum_unlock_pkt->count = rma_op->target_count;
-        lock_accum_unlock_pkt->datatype = rma_op->target_datatype;
-        lock_accum_unlock_pkt->op = rma_op->op;
+        lock_accum_unlock_pkt->addr = accumi_pkt->addr;
+        lock_accum_unlock_pkt->count = accumi_pkt->count;
+        lock_accum_unlock_pkt->datatype = accumi_pkt->datatype;
+        lock_accum_unlock_pkt->op = accumi_pkt->op;
 
         iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) lock_accum_unlock_pkt;
         iov[0].MPID_IOV_LEN = sizeof(*lock_accum_unlock_pkt);
@@ -4223,8 +4069,8 @@ static int send_lock_get(MPID_Win *win_ptr, int target_rank)
     MPID_Comm *comm_ptr;
     MPID_Datatype *dtp;
     MPIDI_CH3_Pkt_t upkt;
-    MPIDI_CH3_Pkt_lock_get_unlock_t *lock_get_unlock_pkt = 
-	&upkt.lock_get_unlock;
+    MPIDI_CH3_Pkt_lock_get_unlock_t *lock_get_unlock_pkt = &upkt.lock_get_unlock;
+    MPIDI_CH3_Pkt_get_t *get_pkt;
 
     MPIDI_STATE_DECL(MPID_STATE_SEND_LOCK_GET);
 
@@ -4259,6 +4105,8 @@ static int send_lock_get(MPID_Win *win_ptr, int target_rank)
            request is freed. */  
     }
 
+    get_pkt = &rma_op->pkt.get;
+
     MPIDI_Pkt_init(lock_get_unlock_pkt, MPIDI_CH3_PKT_LOCK_GET_UNLOCK);
     lock_get_unlock_pkt->flags = MPIDI_CH3_PKT_FLAG_RMA_LOCK |
         MPIDI_CH3_PKT_FLAG_RMA_UNLOCK; /* FIXME | MPIDI_CH3_PKT_FLAG_RMA_REQ_ACK; */
@@ -4266,13 +4114,9 @@ static int send_lock_get(MPID_Win *win_ptr, int target_rank)
         win_ptr->all_win_handles[rma_op->target_rank];
     lock_get_unlock_pkt->source_win_handle = win_ptr->handle;
     lock_get_unlock_pkt->lock_type = lock_type;
- 
-    lock_get_unlock_pkt->addr = 
-        (char *) win_ptr->base_addrs[rma_op->target_rank] +
-        win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
-        
-    lock_get_unlock_pkt->count = rma_op->target_count;
-    lock_get_unlock_pkt->datatype = rma_op->target_datatype;
+    lock_get_unlock_pkt->addr = get_pkt->addr;
+    lock_get_unlock_pkt->count = get_pkt->count;
+    lock_get_unlock_pkt->datatype = get_pkt->datatype;
     lock_get_unlock_pkt->request_handle = rreq->handle;
 
     comm_ptr = win_ptr->comm_ptr;
@@ -5389,7 +5233,7 @@ int MPIDI_CH3_PktHandler_LockPutUnlock( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 	new_ptr->source_win_handle = lock_put_unlock_pkt->source_win_handle;
 	new_ptr->vc = vc;
 	
-	new_ptr->pt_single_op->type = MPIDI_RMA_PUT;
+	new_ptr->pt_single_op->type = MPIDI_CH3_PKT_LOCK_PUT_UNLOCK;
 	new_ptr->pt_single_op->flags = lock_put_unlock_pkt->flags;
 	new_ptr->pt_single_op->addr = lock_put_unlock_pkt->addr;
 	new_ptr->pt_single_op->count = lock_put_unlock_pkt->count;
@@ -5543,7 +5387,7 @@ int MPIDI_CH3_PktHandler_LockGetUnlock( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 	new_ptr->source_win_handle = lock_get_unlock_pkt->source_win_handle;
 	new_ptr->vc = vc;
 	
-	new_ptr->pt_single_op->type = MPIDI_RMA_GET;
+	new_ptr->pt_single_op->type = MPIDI_CH3_PKT_LOCK_GET_UNLOCK;
 	new_ptr->pt_single_op->flags = lock_get_unlock_pkt->flags;
 	new_ptr->pt_single_op->addr = lock_get_unlock_pkt->addr;
 	new_ptr->pt_single_op->count = lock_get_unlock_pkt->count;
@@ -5638,7 +5482,7 @@ int MPIDI_CH3_PktHandler_LockAccumUnlock( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     new_ptr->source_win_handle = lock_accum_unlock_pkt->source_win_handle;
     new_ptr->vc = vc;
     
-    new_ptr->pt_single_op->type = MPIDI_RMA_ACCUMULATE;
+    new_ptr->pt_single_op->type = MPIDI_CH3_PKT_LOCK_ACCUM_UNLOCK;
     new_ptr->pt_single_op->flags = lock_accum_unlock_pkt->flags;
     new_ptr->pt_single_op->addr = lock_accum_unlock_pkt->addr;
     new_ptr->pt_single_op->count = lock_accum_unlock_pkt->count;

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/ch3/include/mpidpkt.h          |   41 +++-
 src/mpid/ch3/include/mpidrma.h          |   43 +--
 src/mpid/ch3/src/ch3u_handle_recv_req.c |    8 +-
 src/mpid/ch3/src/ch3u_rma_acc_ops.c     |   92 ++++--
 src/mpid/ch3/src/ch3u_rma_ops.c         |  104 ++++--
 src/mpid/ch3/src/ch3u_rma_sync.c        |  576 +++++++++++--------------------
 6 files changed, 401 insertions(+), 463 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list