[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1rc2-126-g8c5ab38

mysql vizuser noreply at mpich.org
Tue Dec 31 12:56:32 CST 2013


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  8c5ab38319b3ac34ed4e7a37a265b4a166baaf33 (commit)
      from  a1882012d3c977b44a17e6fa887ea662f4f2a2c2 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/8c5ab38319b3ac34ed4e7a37a265b4a166baaf33

commit 8c5ab38319b3ac34ed4e7a37a265b4a166baaf33
Author: Haizhu Liu <haizhu at us.ibm.com>
Date:   Thu Dec 19 16:38:50 2013 -0500

    PAMID support of rma atomic functions
    
    (ibm) F189037
    
    Signed-off-by: Michael Blocksome <blocksom at us.ibm.com>

diff --git a/src/mpid/pamid/include/mpidi_datatypes.h b/src/mpid/pamid/include/mpidi_datatypes.h
index 2b7ae56..203961e 100644
--- a/src/mpid/pamid/include/mpidi_datatypes.h
+++ b/src/mpid/pamid/include/mpidi_datatypes.h
@@ -31,7 +31,6 @@
 #ifdef MPIDI_STATISTICS
 #include <pami_ext_pe.h>
 #endif
-
 #include "mpidi_constants.h"
 #include "mpidi_platform.h"
 #include "pami.h"
@@ -164,6 +163,8 @@ enum
     MPIDI_Protocols_Dyntask,
     MPIDI_Protocols_Dyntask_disconnect,
 #endif
+    MPIDI_Protocols_WinAtomic,
+    MPIDI_Protocols_WinAtomicAck,
     MPIDI_Protocols_COUNT,
   };
 
@@ -375,24 +376,12 @@ struct MPID_Win;
 /** \brief Forward declaration of the MPID_Group structure */
 struct MPID_Group;
 
-
-/**
- * \brief Collective information related to a window
- *
- * This structure is used to share information about a local window with
- * all nodes in the window communicator. Part of that information includes
- * statistics about RMA operations during access/exposure epochs.
- *
- * The structure is allocated as an array sized for the window communicator.
- * Each entry in the array corresponds directly to the node of the same rank.
- */
 typedef enum
   {
     MPIDI_REQUEST_LOCK,
     MPIDI_REQUEST_LOCKALL,
   } MPIDI_LOCK_TYPE_t;
 
-
 struct MPIDI_Win_lock
 {
   struct MPIDI_Win_lock *next;
@@ -437,6 +426,17 @@ typedef struct workQ_t {
    int  count;
 } workQ_t;
 
+
+/**
+ * \brief Collective information related to a window
+ *
+ * This structure is used to share information about a local window with
+ * all nodes in the window communicator. Part of that information includes
+ * statistics about RMA operations during access/exposure epochs.
+ *
+ * The structure is allocated as an array sized for the window communicator.
+ * Each entry in the array corresponds directly to the node of the same rank.
+ */
 typedef struct MPIDI_Win_info
 {
   void             * base_addr;     /**< Node's exposure window base address                  */
@@ -450,10 +450,10 @@ typedef struct MPIDI_Win_info
  */
 struct MPIDI_Win
 {
-  struct MPIDI_Win_info * info;    /**< allocated array of collective info             */
+  struct MPIDI_Win_info     *info;          /**< allocated array of collective info             */
   MPIDI_Win_info_args info_args;
   void             ** shm_base_addrs; /* base address shared by all process in comm      */
-  workQ_t work;
+  workQ_t work;    
   RMA_nOps_t *origin;
   struct MPIDI_Win_sync
   {
@@ -490,7 +490,6 @@ struct MPIDI_Win
       } local;
     } lock;
   } sync;
-
   int request_based;          /* flag for request based rma */
   struct MPID_Request *rreq;  /* anchor of MPID_Request for request based rma */
 };
diff --git a/src/mpid/pamid/include/mpidi_prototypes.h b/src/mpid/pamid/include/mpidi_prototypes.h
index dfdd1b8..d833637 100644
--- a/src/mpid/pamid/include/mpidi_prototypes.h
+++ b/src/mpid/pamid/include/mpidi_prototypes.h
@@ -203,6 +203,25 @@ MPIDI_WinControlCB(pami_context_t    context,
                    size_t            sndlen,
                    pami_endpoint_t   sender,
                    pami_recv_t     * recv);
+void
+MPIDI_WinAtomicCB(pami_context_t    context,
+                  void            * cookie,
+                  const void      * _control,
+                  size_t            size,
+                  const void      * sndbuf,
+                  size_t            sndlen,
+                  pami_endpoint_t   sender,
+                  pami_recv_t     * recv);
+void
+MPIDI_WinAtomicAckCB(pami_context_t    context,
+                     void            * cookie,
+                     const void      * _control,
+                     size_t            size,
+                     const void      * sndbuf,
+                     size_t            sndlen,
+                     pami_endpoint_t   sender,
+                     pami_recv_t     * recv);
+
 
 void
 MPIDI_WinGetAccumCB(pami_context_t    context,
diff --git a/src/mpid/pamid/src/misc/mpid_unimpl.c b/src/mpid/pamid/src/misc/mpid_unimpl.c
index 6c6dea4..44bda50 100644
--- a/src/mpid/pamid/src/misc/mpid_unimpl.c
+++ b/src/mpid/pamid/src/misc/mpid_unimpl.c
@@ -91,7 +91,6 @@ int MPID_Comm_group_failed(MPID_Comm *comm_ptr, MPID_Group **failed_group_ptr)
   return 0;
 }
 
-
 int MPID_Win_allocate_shared(MPI_Aint size, int disp_unit, MPID_Info *info_ptr, MPID_Comm *comm_ptr,
                              void **base_ptr, MPID_Win **win_ptr)
 {
@@ -99,29 +98,9 @@ int MPID_Win_allocate_shared(MPI_Aint size, int disp_unit, MPID_Info *info_ptr,
   return 0;
 }
 
-
-int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
-                          void *result_addr, MPI_Datatype datatype, int target_rank,
-                          MPI_Aint target_disp, MPID_Win *win)
-{
-  MPID_abort();
-  return 0;
-}
-
-
-int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
-                      MPI_Datatype datatype, int target_rank, MPI_Aint target_disp,
-                      MPI_Op op, MPID_Win *win)
-{
-  MPID_abort();
-  return 0;
-}
-
 int MPID_Win_shared_query(MPID_Win *win, int rank, MPI_Aint *size, int *disp_unit,
                           void *baseptr)
 {
   MPID_abort();
   return 0;
 }
-
-
diff --git a/src/mpid/pamid/src/mpid_init.c b/src/mpid/pamid/src/mpid_init.c
index 5809ec1..3938b67 100644
--- a/src/mpid/pamid/src/mpid_init.c
+++ b/src/mpid/pamid/src/mpid_init.c
@@ -153,6 +153,8 @@ static struct
   struct protocol_t RVZ_zerobyte;
   struct protocol_t WinGetAccum;
   struct protocol_t WinGetAccumAck;
+  struct protocol_t WinAtomic;
+  struct protocol_t WinAtomicAck;
 #ifdef DYNAMIC_TASKING
   struct protocol_t Dyntask;
   struct protocol_t Dyntask_disconnect;
@@ -274,6 +276,28 @@ static struct
     },
     .immediate_min     = sizeof(MPIDI_Win_GetAccMsgInfo),
   },
+  .WinAtomic = {
+    .func = MPIDI_WinAtomicCB,
+    .dispatch = MPIDI_Protocols_WinAtomic,
+    .options = {
+      .consistency     = USE_PAMI_CONSISTENCY,
+      .long_header     = PAMI_HINT_DISABLE,
+      .recv_immediate  = PAMI_HINT_ENABLE,
+      .use_rdma        = PAMI_HINT_DISABLE,
+    },
+    .immediate_min     = sizeof(MPIDI_AtomicHeader_t),
+  },
+  .WinAtomicAck = {
+    .func = MPIDI_WinAtomicAckCB,
+    .dispatch = MPIDI_Protocols_WinAtomicAck,
+    .options = {
+      .consistency     = USE_PAMI_CONSISTENCY,
+      .long_header     = PAMI_HINT_DISABLE,
+      .recv_immediate  = PAMI_HINT_ENABLE,
+      .use_rdma        = PAMI_HINT_DISABLE,
+    },
+    .immediate_min     = sizeof(MPIDI_AtomicHeader_t),
+  },
 #ifdef DYNAMIC_TASKING
   .Dyntask = {
     .func = MPIDI_Recvfrom_remote_world,
@@ -826,6 +850,9 @@ MPIDI_PAMI_dispath_init()
   MPIDI_PAMI_dispath_set(MPIDI_Protocols_RVZ_zerobyte, &proto_list.RVZ_zerobyte, NULL);
   MPIDI_PAMI_dispath_set(MPIDI_Protocols_WinGetAccum, &proto_list.WinGetAccum, NULL);
   MPIDI_PAMI_dispath_set(MPIDI_Protocols_WinGetAccumAck, &proto_list.WinGetAccumAck, NULL);
+  MPIDI_PAMI_dispath_set(MPIDI_Protocols_WinAtomic, &proto_list.WinAtomic,   NULL);
+  MPIDI_PAMI_dispath_set(MPIDI_Protocols_WinAtomicAck, &proto_list.WinAtomicAck,   NULL);
+
 #ifdef DYNAMIC_TASKING
   MPIDI_PAMI_dispath_set(MPIDI_Protocols_Dyntask,   &proto_list.Dyntask,  NULL);
   MPIDI_PAMI_dispath_set(MPIDI_Protocols_Dyntask_disconnect,   &proto_list.Dyntask_disconnect,  NULL);
diff --git a/src/mpid/pamid/src/mpid_progress.h b/src/mpid/pamid/src/mpid_progress.h
index 61418a4..268cf50 100644
--- a/src/mpid/pamid/src/mpid_progress.h
+++ b/src/mpid/pamid/src/mpid_progress.h
@@ -170,6 +170,29 @@ typedef enum
 
 
 /**
+ * \brief A macro to easily implement advancing until a specific
+ * condition becomes false.
+ *
+ * \param[in] COND This is not a true parameter.  It is *specifically*
+ * designed to be evaluated several times, allowing for the result to
+ * change.  The condition would generally look something like
+ * "(cb.client == 0)".  This would be used as the condition on a while
+ * loop.
+ *
+ * \returns MPI_SUCCESS
+ *
+ * This macro makes one pami advance regardless of the state of the COND.
+ */
+#define MPID_PROGRESS_WAIT_DO_WHILE(COND)       \
+({                                              \
+  do {                                          \
+    MPID_Progress_wait(&__state);               \
+  } while(COND);                                \
+  MPI_SUCCESS;                                  \
+})
+
+
+/**
  * \brief Unused, provided since MPI calls it.
  * \param[in] state The previously seen state of advance
  */
diff --git a/src/mpid/pamid/src/onesided/Makefile.mk b/src/mpid/pamid/src/onesided/Makefile.mk
index 3497b71..cb526db 100644
--- a/src/mpid/pamid/src/onesided/Makefile.mk
+++ b/src/mpid/pamid/src/onesided/Makefile.mk
@@ -28,7 +28,6 @@ noinst_HEADERS +=                                                    \
 lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
   src/mpid/pamid/src/onesided/mpid_1s.c                              \
   src/mpid/pamid/src/onesided/mpid_win_accumulate.c                  \
-  src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c              \
   src/mpid/pamid/src/onesided/mpid_win_create.c                      \
   src/mpid/pamid/src/onesided/mpid_win_fence.c                       \
   src/mpid/pamid/src/onesided/mpid_win_free.c                        \
@@ -37,16 +36,19 @@ lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
   src/mpid/pamid/src/onesided/mpid_win_lock_all.c                    \
   src/mpid/pamid/src/onesided/mpid_win_pscw.c                        \
   src/mpid/pamid/src/onesided/mpid_win_put.c                         \
-  src/mpid/pamid/src/onesided/mpid_win_allocate.c                    \
   src/mpid/pamid/src/onesided/mpid_win_create_dynamic.c              \
   src/mpid/pamid/src/onesided/mpid_win_flush.c                       \
+  src/mpid/pamid/src/onesided/mpid_win_allocate.c                    \
+  src/mpid/pamid/src/onesided/mpid_win_sync.c                        \
   src/mpid/pamid/src/onesided/mpid_win_attach.c                      \
   src/mpid/pamid/src/onesided/mpid_win_detach.c                      \
-  src/mpid/pamid/src/onesided/mpid_win_sync.c                        \
   src/mpid/pamid/src/onesided/mpid_win_get_info.c                    \
   src/mpid/pamid/src/onesided/mpid_win_set_info.c                    \
+  src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c              \
   src/mpid/pamid/src/onesided/mpid_win_reqops.c                      \
-  src/mpid/pamid/src/onesided/mpidi_win_control.c
+  src/mpid/pamid/src/onesided/mpidi_win_control.c                    \
+  src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c            \
+  src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
 
 
 endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/onesided/mpid_1s.c b/src/mpid/pamid/src/onesided/mpid_1s.c
index 7398174..618f01f 100644
--- a/src/mpid/pamid/src/onesided/mpid_1s.c
+++ b/src/mpid/pamid/src/onesided/mpid_1s.c
@@ -53,12 +53,12 @@ MPIDI_Win_DoneCB(pami_context_t  context,
         }
     }
 
-  if (req->origin.completed == req->target.dt.num_contig)
+
+    if (req->origin.completed == req->target.dt.num_contig)
     {
       req->win->mpid.origin[target_rank].nCompleted++;
-	if(req->req_handle) {
+      if(req->req_handle)
           MPID_cc_set(req->req_handle->cc_ptr, 0);
-        }
 
       if (req->buffer_free) {
           MPIU_Free(req->buffer);
@@ -66,11 +66,13 @@ MPIDI_Win_DoneCB(pami_context_t  context,
           req->buffer_free = 0;
       }
       if (req->accum_headers)
-        MPIU_Free(req->accum_headers);
-
-      if( (req->type != MPIDI_WIN_REQUEST_RPUT) && (req->type != MPIDI_WIN_REQUEST_RGET) && (req->type != MPIDI_WIN_REQUEST_RACCUMULATE) && (req->type != MPIDI_WIN_REQUEST_RGET_ACCUMULATE) )
-        MPIU_Free(req);
+          MPIU_Free(req->accum_headers);
+      if (!((req->type > MPIDI_WIN_REQUEST_GET_ACCUMULATE) && (req->type <=MPIDI_WIN_REQUEST_RGET_ACCUMULATE)))
+          MPIU_Free(req);
     }
+
+    if ( (req->origin.completed == req->origin.dt.num_contig) && ( (req->type == MPIDI_WIN_REQUEST_FETCH_AND_OP) || (req->type == MPIDI_WIN_REQUEST_COMPARE_AND_SWAP) ) )
+          MPIU_Free(req);
   MPIDI_Progress_signal();
 }
 
diff --git a/src/mpid/pamid/src/onesided/mpid_win_accumulate.c b/src/mpid/pamid/src/onesided/mpid_win_accumulate.c
index 4f70997..d6c3224 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_accumulate.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_accumulate.c
@@ -171,7 +171,6 @@ MPID_Accumulate(void         *origin_addr,
 {
   int mpi_errno = MPI_SUCCESS;
   MPIDI_Win_request *req = MPIU_Calloc0(1, MPIDI_Win_request);
-  *req = zero_req;
   req->win          = win;
   if(win->mpid.request_based != 1)
     req->type         = MPIDI_WIN_REQUEST_ACCUMULATE;
@@ -252,7 +251,7 @@ MPID_Accumulate(void         *origin_addr,
         MPIU_Free(req);
       return MPI_SUCCESS;
     }
-  win->mpid.origin[target_rank].nStarted++; 
+  win->mpid.origin[target_rank].nStarted++;
 
   req->target.rank = target_rank;
 
diff --git a/src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c b/src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c
new file mode 100644
index 0000000..8022bc7
--- /dev/null
+++ b/src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c
@@ -0,0 +1,197 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/onesided/mpid_win_compare_and_swap.c
+ * \brief ???
+ */
+#include "mpidi_onesided.h"
+
+extern pami_result_t
+MPIDI_Atomic (pami_context_t   context,
+	      void           * _req);
+
+static pami_result_t
+MPIDI_Compare_and_swap_using_pami_rmw(pami_context_t   context,
+                                      void           * _req)
+{
+  MPIDI_Win_request *req = (MPIDI_Win_request*)_req;
+  pami_result_t rc;
+  void *map;
+  MPID_Win    *win;
+  int  target_rank;  
+
+  MPID_assert(req != NULL);
+  win = req->win;
+  target_rank = req->target.rank;
+
+  pami_rmw_t  params;
+  params=zero_rmw_parms;
+  params.dest=req->dest;
+  params.cookie=(void *)req;
+  params.done_fn=MPIDI_Win_DoneCB;
+  params.type = req->pami_datatype;
+  params.operation = PAMI_ATOMIC_FETCH_COMPARE_SET;
+  params.local=req->user_buffer;  /*result*/
+  params.remote=req->win->mpid.info[target_rank].base_addr + req->offset + (size_t)req->origin.dt.map[0].DLOOP_VECTOR_BUF;
+  params.value=req->buffer;        /* replaced value with origin */
+  params.test=req->compare_buffer;
+
+  rc = PAMI_Rmw(context, &params);
+  MPID_assert(rc == PAMI_SUCCESS);
+  return rc;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIDI_Compare_and_swap
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
+                          void *result_addr, MPI_Datatype datatype, int target_rank,
+                          MPI_Aint target_disp, MPID_Win *win)
+{
+  int mpi_errno = MPI_SUCCESS;
+  MPIDI_Win_request *req;
+  int good_for_rmw=0;
+  pami_type_t pami_type;
+  int shm_locked=0;
+
+  if(win->mpid.sync.origin_epoch_type == win->mpid.sync.target_epoch_type &&
+     win->mpid.sync.origin_epoch_type == MPID_EPOTYPE_REFENCE){
+     win->mpid.sync.origin_epoch_type = MPID_EPOTYPE_FENCE;
+     win->mpid.sync.target_epoch_type = MPID_EPOTYPE_FENCE;
+  }
+
+  if(win->mpid.sync.origin_epoch_type == MPID_EPOTYPE_NONE ||
+     win->mpid.sync.origin_epoch_type == MPID_EPOTYPE_POST){
+    MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_SYNC,
+                        return mpi_errno, "**rmasync");
+  }
+
+  if (target_rank == MPI_PROC_NULL)
+    {
+      return MPI_SUCCESS;
+    }
+  /* Check if datatype is a C integer, Fortran Integer,
+     logical, or byte, per the classes given on page 165. */
+  MPIR_ERRTEST_TYPE_RMA_ATOMIC(datatype, "datatype", mpi_errno);
+
+  req = (MPIDI_Win_request *) MPIU_Calloc0(1, MPIDI_Win_request);
+  req->win          = win;
+  req->type         = MPIDI_WIN_REQUEST_COMPARE_AND_SWAP;
+
+  req->offset = target_disp * win->mpid.info[target_rank].disp_unit;
+
+  MPIDI_Win_datatype_basic(1, datatype, &req->origin.dt);
+
+  if (req->origin.dt.size == 0)
+    {
+      MPIU_Free(req);
+      return MPI_SUCCESS;
+    }
+
+  req->target.rank = target_rank;
+
+  if (target_rank == win->comm_ptr->rank || win->create_flavor == MPI_WIN_FLAVOR_SHARED)
+    {
+        void *base, *dest_addr;
+        int disp_unit;
+        int len;
+
+#ifdef PENDING_SHM_WIN
+        if (win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
+            MPIDI_SHM_MUTEX_LOCK(win);
+            shm_locked = 1;
+
+            base = win->mpid.info[target_rank].base_addr;
+            disp_unit = win->disp_unit;
+        }
+        else {
+#endif
+            base = win->base;
+            disp_unit = win->disp_unit;
+#ifdef PENDING_SHM_WIN
+        }
+#endif
+
+        dest_addr = (char *) base + disp_unit * target_disp;
+
+        MPID_Datatype_get_size_macro(datatype, len);
+        MPIU_Memcpy(result_addr, dest_addr, len);
+
+        if (MPIR_Compare_equal(compare_addr, dest_addr, datatype))
+            MPIU_Memcpy(dest_addr, origin_addr, len); 
+
+#ifdef PENDING_SHM_WIN
+        if (shm_locked) {
+            MPIDI_SHM_MUTEX_UNLOCK(win);
+            shm_locked = 0;
+        }
+#endif
+        MPIU_Free(req);
+    } 
+  else {
+    req->buffer      = origin_addr + req->origin.dt.true_lb;
+    req->user_buffer      = result_addr + req->origin.dt.true_lb;
+    req->compare_buffer      = compare_addr + req->origin.dt.true_lb;
+
+    pami_result_t rc;
+    pami_task_t task = MPID_VCR_GET_LPID(win->comm_ptr->vcr, target_rank);
+    if (win->mpid.sync.origin_epoch_type == MPID_EPOTYPE_START &&
+        !MPIDI_valid_group_rank(task, win->mpid.sync.sc.group))
+    {
+        MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_SYNC,
+                            return mpi_errno, "**rmasync");
+    }
+
+    rc = PAMI_Endpoint_create(MPIDI_Client, task, 0, &req->dest);
+    MPID_assert(rc == PAMI_SUCCESS);
+
+    MPIDI_Win_datatype_map(&req->origin.dt);
+    win->mpid.sync.total += 1; 
+
+    MPI_Datatype basic_type = MPI_DATATYPE_NULL;
+    MPID_Datatype_get_basic_type(datatype, basic_type);
+    MPID_assert(basic_type != MPI_DATATYPE_NULL);
+    req->origin.datatype=basic_type;
+
+    /* The pamid one-sided design requires context post in order to handle the
+     * case where the number of pending rma operation exceeds the
+     * 'PAMID_RMA_PENDING' threshold. When there are too many pending requests the
+     * work function remains on the context post queue (by returning PAMI_EAGAIN)
+     * so that the next time the context is advanced the work function will be
+     * invoked again.
+     *
+     * TODO - When context post is not required it would be better to attempt a
+     *        direct context operation and then fail over to using context post if
+     *        the rma pending threshold has been reached. This would result in
+     *        better latency for one-sided operations.
+     */
+    
+  MPI_Op null_op=0;
+  pami_data_function  pami_op;
+  if(MPIDI_Datatype_is_pami_rmw_supported(basic_type, &pami_type, null_op, &pami_op)  ) {
+      req->pami_datatype = pami_type;
+      PAMI_Context_post(MPIDI_Context[0], &req->post_request, MPIDI_Compare_and_swap_using_pami_rmw, req);
+    } else {
+      PAMI_Context_post(MPIDI_Context[0], &req->post_request, MPIDI_Atomic, req);
+    }
+   }
+
+fn_fail:
+  return mpi_errno;
+}
diff --git a/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c b/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
new file mode 100644
index 0000000..153ef88
--- /dev/null
+++ b/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
@@ -0,0 +1,420 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/onesided/mpid_win_fetch_and_op.c.c
+ * \brief ???
+ */
+#include "mpidi_onesided.h"
+
+static pami_result_t
+MPIDI_Fetch_and_op_using_pami_rmw(pami_context_t   context,
+                                  void           * _req)
+{
+    MPIDI_Win_request *req = (MPIDI_Win_request*)_req;
+    pami_result_t rc;
+    MPID_Win    *win;
+    int  target_rank;  
+  
+    MPID_assert(req != NULL);
+    win = req->win;
+    target_rank = req->target.rank;
+
+    pami_rmw_t  params; 
+    params=zero_rmw_parms;
+    params.dest=req->dest;
+    params.cookie=(void *)req;
+    params.done_fn=MPIDI_Win_DoneCB;
+    params.type = req->pami_datatype;
+    params.operation = req->pami_op;
+    params.local=req->user_buffer;  /*result*/
+    params.remote=req->win->mpid.info[target_rank].base_addr + req->offset + (size_t)req->origin.dt.map[0].DLOOP_VECTOR_BUF;
+    params.value=req->buffer;        /* replaced value with origin */
+
+    rc = PAMI_Rmw(context, &params);
+    MPID_assert(rc == PAMI_SUCCESS);
+    return rc;
+}
+
+
+void
+MPIDI_WinAtomicCB(pami_context_t    context,
+		  void            * cookie,
+		  const void      * _hdr,
+		  size_t            size,
+		  const void      * sndbuf,
+		  size_t            sndlen,
+		  pami_endpoint_t   sender,
+		  pami_recv_t     * recv)
+{
+  MPIDI_AtomicHeader_t *ahdr = (MPIDI_AtomicHeader_t *) _hdr;
+  MPID_assert (ahdr != NULL);
+  MPID_assert (sizeof(MPIDI_AtomicHeader_t) == size);
+  MPIDI_AtomicHeader_t ack_hdr = *ahdr;
+
+  void *dest_addr = ahdr->remote_addr; 
+  int len;       
+  len = MPID_Datatype_get_basic_size (ahdr->datatype);
+
+  if (ahdr->atomic_type == MPIDI_WIN_REQUEST_COMPARE_AND_SWAP) {
+
+    //overwrite value with result in ack_hdr
+    MPIU_Memcpy(ack_hdr.buf, dest_addr, len);
+    
+    if (MPIR_Compare_equal (&ahdr->test, dest_addr, ahdr->datatype))
+      MPIU_Memcpy(dest_addr, ahdr->buf, len);      
+  }    
+  else if (ahdr->atomic_type == MPIDI_WIN_REQUEST_FETCH_AND_OP) {
+    //overwrite value with result
+    MPIU_Memcpy(ack_hdr.buf, dest_addr, len);
+
+    MPI_User_function *uop;
+    int one = 1;
+    uop = MPIR_OP_HDL_TO_FN(ahdr->op);
+
+    if (ahdr->op == MPI_REPLACE) 
+      MPIU_Memcpy(dest_addr, ahdr->buf, len);
+    else if (ahdr->op == MPI_NO_OP);
+    else
+      (*uop) ((void *)ahdr->buf, dest_addr, &one, &ahdr->datatype);
+  }
+  else
+    MPID_abort();
+
+  pami_send_immediate_t params = {
+    .dispatch = MPIDI_Protocols_WinAtomicAck,
+    .dest     = sender,
+    .header   = {
+      .iov_base = &ack_hdr,
+      .iov_len  = sizeof(MPIDI_AtomicHeader_t),
+    },
+    .data     = {
+       .iov_base = NULL,
+       .iov_len  = 0,
+     },
+    .hints = 0, 
+  };
+  
+  pami_result_t rc = PAMI_Send_immediate(context, &params);  
+  MPID_assert(rc == PAMI_SUCCESS);
+}
+
+void
+MPIDI_WinAtomicAckCB(pami_context_t    context,
+		     void            * cookie,
+		     const void      * _hdr,
+		     size_t            size,
+		     const void      * sndbuf,
+		     size_t            sndlen,
+		     pami_endpoint_t   sender,
+		     pami_recv_t     * recv)
+{
+  int len;       
+  MPIDI_AtomicHeader_t *ahdr = (MPIDI_AtomicHeader_t *) _hdr;
+  //We have a valid result addr
+  if (ahdr->result_addr != NULL) {
+    len = MPID_Datatype_get_basic_size (ahdr->datatype);
+    MPIU_Memcpy(ahdr->result_addr, ahdr->buf, len);
+  }
+    
+  MPIDI_Win_DoneCB(context, ahdr->request_addr, PAMI_SUCCESS);
+}
+
+
+pami_result_t
+MPIDI_Atomic (pami_context_t   context,
+	      void           * _req)
+{
+  MPIDI_Win_request *req = (MPIDI_Win_request*)_req;
+  pami_result_t rc;
+  MPIDI_AtomicHeader_t atomic_hdr;
+  int len;
+
+  len = MPID_Datatype_get_basic_size (req->origin.datatype);
+  assert(len <= MAX_ATOMIC_TYPE_SIZE);
+  if (req->buffer)
+    MPIU_Memcpy(atomic_hdr.buf, req->buffer, len);
+  if (req->type == MPIDI_WIN_REQUEST_COMPARE_AND_SWAP)
+    MPIU_Memcpy(atomic_hdr.test, req->compare_buffer, len);
+  
+  atomic_hdr.result_addr = req->user_buffer;
+  atomic_hdr.remote_addr = req->win->mpid.info[req->target.rank].base_addr + req->offset;
+  atomic_hdr.request_addr = req;
+  atomic_hdr.datatype = req->origin.datatype;
+  atomic_hdr.atomic_type = req->type;
+  atomic_hdr.op = req->op;
+  
+  struct MPIDI_Win_sync* sync = &req->win->mpid.sync;
+  MPID_assert (req->origin.dt.num_contig == 1);  
+  ++sync->started;
+
+  pami_send_immediate_t params = {
+    .dispatch = MPIDI_Protocols_WinAtomic,
+    .dest     = req->dest,
+    .header   = {
+      .iov_base = &atomic_hdr,
+      .iov_len  = sizeof(MPIDI_AtomicHeader_t),
+    },
+    .data     = {
+       .iov_base = NULL,
+       .iov_len  = 0,
+     },
+    .hints = 0, 
+  };
+  
+  rc = PAMI_Send_immediate(context, &params);  
+  MPID_assert(rc == PAMI_SUCCESS);
+  return PAMI_SUCCESS;  
+}
+
+
+#define FUNCNAME MPIDI_Fetch_and_op
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
+                      MPI_Datatype datatype, int target_rank,
+                      MPI_Aint target_disp, MPI_Op op, MPID_Win *win)
+{
+  int mpi_errno = MPI_SUCCESS;
+  MPIDI_Win_request *req;
+  int datatype_iscontig=0;
+  int good_for_rmw=0;
+  int count = 1;
+  int shm_locked = 0;
+
+  if(win->mpid.sync.origin_epoch_type == win->mpid.sync.target_epoch_type &&
+     win->mpid.sync.origin_epoch_type == MPID_EPOTYPE_REFENCE){
+     win->mpid.sync.origin_epoch_type = MPID_EPOTYPE_FENCE; win->mpid.sync.target_epoch_type = MPID_EPOTYPE_FENCE;
+  }
+
+  if(win->mpid.sync.origin_epoch_type == MPID_EPOTYPE_NONE ||
+     win->mpid.sync.origin_epoch_type == MPID_EPOTYPE_POST){
+    MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_SYNC,
+                        return mpi_errno, "**rmasync");
+  }
+
+  int null=0;
+  MPI_Op null_op=0;
+  pami_type_t         pami_type;
+  pami_atomic_t  pami_op;
+
+  if (target_rank == MPI_PROC_NULL)
+      return MPI_SUCCESS;
+
+  MPI_Datatype basic_type = MPI_DATATYPE_NULL;
+  MPID_Datatype_get_basic_type(datatype, basic_type);
+  if ((datatype == MPI_FLOAT_INT)  ||
+      (datatype == MPI_DOUBLE_INT) ||
+      (datatype == MPI_LONG_INT)   ||
+      (datatype == MPI_SHORT_INT)  ||
+      (datatype == MPI_LONG_DOUBLE_INT))
+    {
+      MPID_assert(basic_type == MPI_DATATYPE_NULL);
+      basic_type = datatype;
+    }
+    MPID_assert(basic_type != MPI_DATATYPE_NULL);
+
+  if(MPIDI_Datatype_is_pami_rmw_supported(basic_type, &pami_type, op, &pami_op)  ) {
+    good_for_rmw = 1; 
+  } else {
+     if((op == MPI_NO_OP) && (origin_addr == NULL) && (win->create_flavor != MPI_WIN_FLAVOR_SHARED) ) {
+        /* essentially a MPI_Get to result buffer */
+        MPID_Get(result_addr, 1, datatype, target_rank,
+	 	 target_disp, 1, datatype, win);
+	return 0;
+    }  
+  }
+
+  req = (MPIDI_Win_request *) MPIU_Calloc0(1, MPIDI_Win_request);
+  req->win          = win;
+  req->type         = MPIDI_WIN_REQUEST_FETCH_AND_OP;
+
+  req->offset = target_disp * win->mpid.info[target_rank].disp_unit;
+
+  if (datatype == MPI_DOUBLE_INT)
+    {
+      MPIDI_Win_datatype_basic(count*2,
+                               MPI_DOUBLE,
+                               &req->origin.dt);
+    }
+  else if (datatype == MPI_LONG_DOUBLE_INT)
+    {
+      MPIDI_Win_datatype_basic(count*2,
+                               MPI_LONG_DOUBLE,
+                               &req->origin.dt);
+    }
+  else if (datatype == MPI_LONG_INT)
+    {
+      MPIDI_Win_datatype_basic(count*2,
+                               MPI_LONG,
+                               &req->origin.dt);
+    }
+  else if (datatype == MPI_SHORT_INT)
+    {
+      MPIDI_Win_datatype_basic(count*2,
+                               MPI_INT,
+                               &req->origin.dt);
+    }
+  else
+    {
+      MPIDI_Win_datatype_basic(count,
+                               datatype,
+                               &req->origin.dt);
+    }
+
+
+  if (req->origin.dt.size == 0) 
+    {
+      MPIU_Free(req);
+      return MPI_SUCCESS;
+    }
+
+  req->target.rank = target_rank;
+
+
+  if (target_rank == win->comm_ptr->rank || win->create_flavor == MPI_WIN_FLAVOR_SHARED)
+    {
+        MPI_User_function *uop;
+        void *base, *dest_addr;
+        int disp_unit;
+        int len, one;
+
+#ifdef PENDING_SHM_WIN
+        if (win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
+            MPIDI_SHM_MUTEX_LOCK(win);
+            shm_locked = 1;
+            base = win->mpid.shm->base_addr;
+            disp_unit = win->disp_unit;
+
+        }
+        else {
+#endif
+            base = win->base;
+            disp_unit = win->disp_unit;
+#ifdef PENDING_SHM_WIN
+        }
+#endif
+
+        dest_addr = (char *) base + disp_unit * target_disp;
+
+        MPID_Datatype_get_size_macro(datatype, len);
+        MPIU_Memcpy(result_addr, dest_addr, len);
+
+        uop = MPIR_OP_HDL_TO_FN(op);
+        one = 1;
+
+        (*uop)((void *) origin_addr, dest_addr, &one, &datatype);
+
+#ifdef PENDING_SHM_WIN
+        if (shm_locked) {
+            MPIDI_SHM_MUTEX_UNLOCK(win);
+            shm_locked = 0;
+        }
+#endif
+
+        MPIU_Free(req);
+
+    }
+  else {
+    req->compare_buffer = NULL;
+    req->pami_op = pami_op;
+    req->op = op;
+    req->pami_datatype = pami_type;
+    /* MPI_Fetch_and_op only supports predefined datatype */
+    req->buffer      = origin_addr + req->origin.dt.true_lb;
+    req->user_buffer      = result_addr + req->origin.dt.true_lb;
+
+    pami_result_t rc;
+    pami_task_t task = MPID_VCR_GET_LPID(win->comm_ptr->vcr, target_rank);
+    if (win->mpid.sync.origin_epoch_type == MPID_EPOTYPE_START &&
+      !MPIDI_valid_group_rank(task, win->mpid.sync.sc.group))
+    {
+       MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_SYNC,
+                          return mpi_errno, "**rmasync");
+    }
+
+    rc = PAMI_Endpoint_create(MPIDI_Client, task, 0, &req->dest);
+    MPID_assert(rc == PAMI_SUCCESS);
+
+    MPIDI_Win_datatype_map(&req->origin.dt);
+    win->mpid.sync.total += req->origin.dt.num_contig;
+    req->origin.datatype= basic_type;
+
+   /* The pamid one-sided design requires context post in order to handle the
+    * case where the number of pending rma operation exceeds the
+    * 'PAMID_RMA_PENDING' threshold. When there are too many pending requests the
+    * work function remains on the context post queue (by returning PAMI_EAGAIN)
+    * so that the next time the context is advanced the work function will be
+    * invoked again.
+    *
+    * TODO - When context post is not required it would be better to attempt a
+    *        direct context operation and then fail over to using context post if
+    *        the rma pending threshold has been reached. This would result in
+    *        better latency for one-sided operations.
+    */
+    if(good_for_rmw) {
+      PAMI_Context_post(MPIDI_Context[0], &req->post_request, MPIDI_Fetch_and_op_using_pami_rmw, req);
+    } else {
+      PAMI_Context_post(MPIDI_Context[0], &req->post_request, MPIDI_Atomic, req);
+
+    }
+  }
+
+fn_fail:
+  return mpi_errno;
+}
+
+
+int MPIDI_Datatype_is_pami_rmw_supported(MPI_Datatype datatype, pami_type_t *pami_type, MPI_Op op, pami_atomic_t *pami_op)
+{
+  int null=0;
+  MPI_Op null_op=0;
+  int rc = FALSE;
+  pami_data_function pami_data_fn;
+
+  MPIDI_Datatype_to_pami(datatype, pami_type, op, &pami_data_fn, &null);
+
+  if(*pami_type == PAMI_TYPE_SIGNED_INT || 
+     *pami_type == PAMI_TYPE_UNSIGNED_INT ||
+     *pami_type == PAMI_TYPE_SIGNED_LONG || 
+     *pami_type == PAMI_TYPE_UNSIGNED_LONG ||
+     *pami_type == PAMI_TYPE_SIGNED_LONG_LONG || 
+     *pami_type == PAMI_TYPE_SIGNED_LONG_LONG) { 
+     if(op == null_op) {
+	rc = TRUE;
+     } else if (op == MPI_SUM) {
+        *pami_op = PAMI_ATOMIC_FETCH_ADD;
+	rc = TRUE;
+     } else if (op == MPI_BOR) {
+        *pami_op = PAMI_ATOMIC_FETCH_OR;
+        rc = TRUE;
+     } else if (op == MPI_BAND) {
+        *pami_op = PAMI_ATOMIC_FETCH_AND;
+	rc = TRUE;
+     } else if (op == MPI_BXOR) {
+        *pami_op = PAMI_ATOMIC_FETCH_XOR;
+	rc = TRUE;
+     } else if (op == MPI_REPLACE) {
+        *pami_op = PAMI_ATOMIC_FETCH_SET;
+	rc = TRUE;
+     } else if (op == MPI_NO_OP) {
+        *pami_op = PAMI_ATOMIC_FETCH;
+	rc = TRUE;
+     }
+  }
+  return rc;
+}
diff --git a/src/mpid/pamid/src/onesided/mpid_win_get.c b/src/mpid/pamid/src/onesided/mpid_win_get.c
index fd8ea46..0d5b6a2 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_get.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_get.c
@@ -214,7 +214,6 @@ MPID_Get(void         *origin_addr,
 {
   int mpi_errno = MPI_SUCCESS;
   MPIDI_Win_request *req = MPIU_Calloc0(1, MPIDI_Win_request);
-  *req = zero_req;
   req->win          = win;
   if(win->mpid.request_based != 1) 
     req->type         = MPIDI_WIN_REQUEST_GET;
diff --git a/src/mpid/pamid/src/onesided/mpid_win_lock.c b/src/mpid/pamid/src/onesided/mpid_win_lock.c
index 92c6b47..259ab75 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_lock.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_lock.c
@@ -206,7 +206,7 @@ MPID_Win_unlock(int       rank,
    }
   if (rank == MPI_PROC_NULL) goto fn_exit;
   struct MPIDI_Win_sync* sync = &win->mpid.sync;
-  MPID_PROGRESS_WAIT_WHILE(win->mpid.origin[rank].nStarted != win->mpid.origin[rank].nCompleted);
+  MPID_PROGRESS_WAIT_DO_WHILE(win->mpid.origin[rank].nStarted != win->mpid.origin[rank].nCompleted);
   win->mpid.origin[rank].nCompleted=0;
   win->mpid.origin[rank].nStarted=0;
 
diff --git a/src/mpid/pamid/src/onesided/mpid_win_put.c b/src/mpid/pamid/src/onesided/mpid_win_put.c
index 7132783..6e8d38b 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_put.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_put.c
@@ -227,7 +227,6 @@ MPID_Put(void         *origin_addr,
 {
   int mpi_errno = MPI_SUCCESS;
   MPIDI_Win_request *req = MPIU_Calloc0(1, MPIDI_Win_request);
-  *req = zero_req;
   req->win          = win;
   if(win->mpid.request_based != 1) 
     req->type         = MPIDI_WIN_REQUEST_PUT;
diff --git a/src/mpid/pamid/src/onesided/mpidi_onesided.h b/src/mpid/pamid/src/onesided/mpidi_onesided.h
index 79d2981..57aabca 100644
--- a/src/mpid/pamid/src/onesided/mpidi_onesided.h
+++ b/src/mpid/pamid/src/onesided/mpidi_onesided.h
@@ -29,7 +29,9 @@ pami_get_simple_t zero_get_parms;
 pami_rput_simple_t zero_rput_parms;
 pami_put_simple_t zero_put_parms;
 pami_send_t   zero_send_parms;
+pami_send_immediate_t   zero_send_immediate_parms;
 pami_recv_t   zero_recv_parms;
+pami_rmw_t   zero_rmw_parms;
 
 /**
  * \brief One-sided Message Types
@@ -57,6 +59,8 @@ typedef enum
     MPIDI_WIN_REQUEST_RGET,
     MPIDI_WIN_REQUEST_RPUT,
     MPIDI_WIN_REQUEST_RGET_ACCUMULATE,
+    MPIDI_WIN_REQUEST_COMPARE_AND_SWAP,
+    MPIDI_WIN_REQUEST_FETCH_AND_OP,
   } MPIDI_Win_requesttype_t;
 
 typedef enum
@@ -81,6 +85,20 @@ typedef struct
 } MPIDI_Win_control_t;
 
 
+#define MAX_ATOMIC_TYPE_SIZE 32
+typedef struct
+{
+  char    buf[MAX_ATOMIC_TYPE_SIZE];   //Origin value or ack result value
+  char    test[MAX_ATOMIC_TYPE_SIZE];  //Test element for CAS
+  void  * result_addr;                 //Address on source to store output
+  void  * remote_addr;                 //Address of target on destination
+  void  * request_addr;                //Address of the request object
+  MPI_Datatype  datatype;
+  MPI_Op        op;
+  int           atomic_type;
+} MPIDI_AtomicHeader_t;
+
+
 typedef struct MPIDI_WinLock_info
 {
   unsigned            peer;
@@ -177,6 +195,7 @@ typedef struct _mpidi_win_request
   } target;
 
   void     *user_buffer;
+  void     *compare_buffer;     /* anchor of compare buffer for compare and swap */
   uint32_t  buffer_free;
   void     *buffer;
   struct _mpidi_win_request *next; 
@@ -185,6 +204,12 @@ typedef struct _mpidi_win_request
   MPI_Op     op;
   int        result_num_contig;   
 
+
+  /* for RMA atomic functions */
+  
+  pami_atomic_t      pami_op;        
+  pami_type_t        pami_datatype;  
+ 
   int request_based;            /* flag for request based rma */
   MPID_Request *req_handle;     /* anchor of MPID_Request struc for request based rma*/
 } MPIDI_Win_request;

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/pamid/include/mpidi_datatypes.h           |   31 +-
 src/mpid/pamid/include/mpidi_prototypes.h          |   19 +
 src/mpid/pamid/src/misc/mpid_unimpl.c              |   21 -
 src/mpid/pamid/src/mpid_init.c                     |   27 ++
 src/mpid/pamid/src/mpid_progress.h                 |   23 +
 src/mpid/pamid/src/onesided/Makefile.mk            |   10 +-
 src/mpid/pamid/src/onesided/mpid_1s.c              |   16 +-
 src/mpid/pamid/src/onesided/mpid_win_accumulate.c  |    3 +-
 .../pamid/src/onesided/mpid_win_compare_and_swap.c |  197 +++++++++
 .../pamid/src/onesided/mpid_win_fetch_and_op.c     |  420 ++++++++++++++++++++
 src/mpid/pamid/src/onesided/mpid_win_get.c         |    1 -
 src/mpid/pamid/src/onesided/mpid_win_lock.c        |    2 +-
 src/mpid/pamid/src/onesided/mpid_win_put.c         |    1 -
 src/mpid/pamid/src/onesided/mpidi_onesided.h       |   25 ++
 14 files changed, 743 insertions(+), 53 deletions(-)
 create mode 100644 src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c
 create mode 100644 src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list