[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2a1-16-g09930cc

Service Account noreply at mpich.org
Sat Sep 20 08:18:18 CDT 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  09930cc20a8532a2cfd9f7e022787bf40af8286f (commit)
      from  9eacce005f347eeaba824f143a00974aafc0d27d (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/09930cc20a8532a2cfd9f7e022787bf40af8286f

commit 09930cc20a8532a2cfd9f7e022787bf40af8286f
Author: Igor Ivanov <Igor.Ivanov at itseez.com>
Date:   Fri Sep 12 16:11:50 2014 +0300

    netmod/mxm: Change supported MXM version to 3.1+
    
    - Changed MXM version that is supported to 3.1+
    - Added additional debug output for iov
    - Added check for datatype mismatch during noncontiguous data
    
    Signed-off-by: Igor Ivanov <Igor.Ivanov at itseez.com>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_cancel.c b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_cancel.c
index 2de472d..202d149 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_cancel.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_cancel.c
@@ -50,7 +50,7 @@ int MPID_nem_mxm_cancel_send(MPIDI_VC_t * vc, MPID_Request * req)
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 int MPID_nem_mxm_cancel_recv(MPIDI_VC_t * vc, MPID_Request * req)
 {
-    int mpi_errno = MPI_SUCCESS;
+    int mpi_errno ATTRIBUTE((unused)) = MPI_SUCCESS;
     mxm_error_t ret = MXM_OK;
     MPID_nem_mxm_req_area *req_area = REQ_BASE(req);
 
diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h
index 6d3b9b2..bd80a69 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h
@@ -149,7 +149,7 @@ typedef struct {
 } MPID_nem_mxm_vc_area;
 
 /* macro for mxm private in VC */
-#define VC_BASE(vcp) ((MPID_nem_mxm_vc_area *)((vcp)->ch.netmod_area.padding))
+#define VC_BASE(vcp) ((vcp) ? (MPID_nem_mxm_vc_area *)((vcp)->ch.netmod_area.padding) : NULL)
 
 /* The req provides a generic buffer in which network modules can store
    private fields This removes all dependencies from the req structure
@@ -172,7 +172,7 @@ typedef struct {
 } MPID_nem_mxm_req_area;
 
 /* macro for mxm private in REQ */
-#define REQ_BASE(reqp) ((MPID_nem_mxm_req_area *)((reqp)->ch.netmod_area.padding))
+#define REQ_BASE(reqp) ((reqp) ? (MPID_nem_mxm_req_area *)((reqp)->ch.netmod_area.padding) : NULL)
 
 typedef struct MPID_nem_mxm_module_t {
     char *runtime_version;
@@ -212,7 +212,7 @@ static inline void list_grow_mxm_req(list_head_t * list_head)
 
 static inline void _mxm_barrier(void)
 {
-    int pmi_errno;
+    int pmi_errno ATTRIBUTE((unused));
 
 #ifdef USE_PMI2_API
     pmi_errno = PMI2_KVS_Fence();
diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_poll.c b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_poll.c
index 87259f3..0defcbb 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_poll.c
@@ -162,18 +162,22 @@ int MPID_nem_mxm_recv(MPIDI_VC_t * vc, MPID_Request * rreq)
         int dt_contig;
         MPI_Aint dt_true_lb;
         MPID_Datatype *dt_ptr;
-        MPID_nem_mxm_vc_area *vc_area = VC_BASE(vc);
-        MPID_nem_mxm_req_area *req_area = REQ_BASE(rreq);
+        MPID_nem_mxm_vc_area *vc_area = NULL;
+        MPID_nem_mxm_req_area *req_area = NULL;
 
         MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, data_sz,
                                 dt_ptr, dt_true_lb);
         rreq->dev.OnDataAvail = NULL;
         rreq->dev.tmpbuf = NULL;
         rreq->ch.vc = vc;
+        rreq->ch.noncontig = FALSE;
 
         _dbg_mxm_output(5,
                         "Recv ========> Getting USER msg for req %p (context %d rank %d tag %d size %d) \n",
-                        rreq, context_id, source, tag, data_sz);
+                        rreq, context_id, rreq->dev.match.parts.rank, tag, data_sz);
+
+        vc_area = VC_BASE(vc);
+        req_area = REQ_BASE(rreq);
 
         req_area->ctx = rreq;
         req_area->iov_buf = req_area->tmp_buf;
@@ -187,6 +191,7 @@ int MPID_nem_mxm_recv(MPIDI_VC_t * vc, MPID_Request * rreq)
             req_area->iov_buf[0].length = data_sz;
         }
         else {
+            rreq->ch.noncontig = TRUE;
             mpi_errno = _mxm_process_rdtype(&rreq, rreq->dev.datatype, dt_ptr, data_sz,
                                             rreq->dev.user_buf, rreq->dev.user_count,
                                             &req_area->iov_buf, &req_area->iov_count);
@@ -194,7 +199,7 @@ int MPID_nem_mxm_recv(MPIDI_VC_t * vc, MPID_Request * rreq)
                 MPIU_ERR_POP(mpi_errno);
         }
 
-        mpi_errno = _mxm_irecv((vc ? vc_area->mxm_ep : NULL), req_area,
+        mpi_errno = _mxm_irecv((vc_area ? vc_area->mxm_ep : NULL), req_area,
                                tag,
                                (rreq->comm ? (mxm_mq_h) rreq->comm->dev.ch.netmod_priv : mxm_obj->
                                 mxm_mq), _mxm_tag_mpi2mxm(tag, context_id));
@@ -217,17 +222,18 @@ static int _mxm_handle_rreq(MPID_Request * req)
 {
     int complete = FALSE;
     int dt_contig;
-    MPI_Aint dt_true_lb;
+    MPI_Aint dt_true_lb ATTRIBUTE((unused));
     MPIDI_msg_sz_t userbuf_sz;
     MPID_Datatype *dt_ptr;
     MPIDI_msg_sz_t data_sz;
     MPIDI_VC_t *vc = NULL;
-    MPID_nem_mxm_vc_area *vc_area = NULL;
-    MPID_nem_mxm_req_area *req_area = NULL
+    MPID_nem_mxm_vc_area *vc_area ATTRIBUTE((unused)) = NULL;
+    MPID_nem_mxm_req_area *req_area = NULL;
+    void *tmp_buf = NULL;
 
     MPIU_THREAD_CS_ENTER(MSGQUEUE, req);
-    complete = MPIDI_CH3U_Recvq_DP(req)
-        MPIU_THREAD_CS_EXIT(MSGQUEUE, req);
+    complete = MPIDI_CH3U_Recvq_DP(req);
+    MPIU_THREAD_CS_EXIT(MSGQUEUE, req);
     if (!complete) {
         return TRUE;
     }
@@ -270,29 +276,60 @@ static int _mxm_handle_rreq(MPID_Request * req)
                                                      req->dev.recv_data_sz, userbuf_sz);
     }
 
-    if ((!dt_contig) && (req->dev.tmpbuf != NULL)) {
-        MPIDI_msg_sz_t last;
+    if (!dt_contig) {
+        MPIDI_msg_sz_t last = 0;
 
-        last = req->dev.recv_data_sz;
-        MPID_Segment_unpack(req->dev.segment_ptr, 0, &last, req->dev.tmpbuf);
-        MPIU_Free(req->dev.tmpbuf);
+        if (req->dev.tmpbuf != NULL) {
+            last = req->dev.recv_data_sz;
+            MPID_Segment_unpack(req->dev.segment_ptr, 0, &last, req->dev.tmpbuf);
+            tmp_buf = req->dev.tmpbuf;
+        }
+        else {
+            mxm_req_buffer_t * iov_buf;
+            MPID_IOV *iov;
+            int n_iov = 0;
+            int index;
+
+            last = req->dev.recv_data_sz;
+            n_iov = req_area->iov_count;
+            iov_buf = req_area->iov_buf;
+            if (last && n_iov > 0) {
+                iov = MPIU_Malloc(n_iov * sizeof(*iov));
+                MPIU_Assert(iov);
+
+                n_iov = req_area->iov_count;
+                iov_buf = req_area->iov_buf;
+                for (index = 0; index < n_iov; index++) {
+                    iov[index].MPID_IOV_BUF = iov_buf[index].ptr;
+                    iov[index].MPID_IOV_LEN = iov_buf[index].length;
+                }
+
+                MPID_Segment_unpack_vector(req->dev.segment_ptr, req->dev.segment_first, &last, iov, &n_iov);
+                MPIU_Free(iov);
+            }
+            if (req_area->iov_count > MXM_MPICH_MAX_IOV) {
+                tmp_buf = req_area->iov_buf;
+                req_area->iov_buf = req_area->tmp_buf;
+                req_area->iov_count = 0;
+            }
+        }
         if (last != data_sz) {
             MPIR_STATUS_SET_COUNT(req->status, last);
             if (req->dev.recv_data_sz <= userbuf_sz) {
+                /* If the data can't be unpacked, the we have a
+                 *  mismatch between the datatype and the amount of
+                 *  data received.  Throw away received data.
+                 */
                 MPIU_ERR_SETSIMPLE(req->status.MPI_ERROR, MPI_ERR_TYPE, "**dtypemismatch");
             }
         }
     }
 
-    if (req_area->iov_count > MXM_MPICH_MAX_IOV) {
-        MPIU_Free(req_area->iov_buf);
-        req_area->iov_buf = req_area->tmp_buf;
-        req_area->iov_count = 0;
-    }
-
     MPIDI_CH3U_Handle_recv_req(vc, req, &complete);
     MPIU_Assert(complete == TRUE);
 
+    if (tmp_buf) MPIU_Free(tmp_buf);
+
     return complete;
 }
 
@@ -422,6 +459,14 @@ static int _mxm_process_rdtype(MPID_Request ** rreq_p, MPI_Datatype datatype,
     MPID_Segment_unpack_vector(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, iov, &n_iov);
     MPIU_Assert(last == rreq->dev.segment_size);
 
+#if defined(MXM_DEBUG) && (MXM_DEBUG > 0)
+    _dbg_mxm_output(7, "Recv Noncontiguous data vector %i entries (free slots : %i)\n", n_iov, MXM_REQ_DATA_MAX_IOV);
+    for(index = 0; index < n_iov; index++) {
+        _dbg_mxm_output(7, "======= Recv iov[%i] = ptr : %p, len : %i \n",
+                        index, iov[index].MPID_IOV_BUF, iov[index].MPID_IOV_LEN);
+    }
+#endif
+
     if (n_iov <= MXM_REQ_DATA_MAX_IOV) {
         if (n_iov > MXM_MPICH_MAX_IOV) {
             *iov_buf = (mxm_req_buffer_t *) MPIU_Malloc(n_iov * sizeof(**iov_buf));
diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_send.c b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_send.c
index 701ed35..f91edf3 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_send.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_send.c
@@ -577,6 +577,7 @@ int MPID_nem_mxm_issend(MPIDI_VC_t * vc, const void *buf, int count, MPI_Datatyp
             MPIDI_msg_sz_t last;
             MPI_Aint packsize = 0;
 
+            sreq->ch.noncontig = TRUE;
             sreq->dev.segment_ptr = MPID_Segment_alloc();
             MPIU_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER,
                                  "**nomem", "**nomem %s", "MPID_Segment_alloc");
@@ -593,7 +594,6 @@ int MPID_nem_mxm_issend(MPIDI_VC_t * vc, const void *buf, int count, MPI_Datatyp
                 req_area->iov_buf[0].ptr = sreq->dev.tmpbuf;
                 req_area->iov_buf[0].length = last;
             }
-            sreq->ch.noncontig = TRUE;
         }
     }
 
@@ -796,6 +796,14 @@ static int _mxm_process_sdtype(MPID_Request ** sreq_p, MPI_Datatype datatype,
     MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, iov, &n_iov);
     MPIU_Assert(last == sreq->dev.segment_size);
 
+#if defined(MXM_DEBUG) && (MXM_DEBUG > 0)
+    _dbg_mxm_output(7, "Send Noncontiguous data vector %i entries (free slots : %i)\n", n_iov, MXM_REQ_DATA_MAX_IOV);
+    for(index = 0; index < n_iov; index++) {
+        _dbg_mxm_output(7, "======= Recv iov[%i] = ptr : %p, len : %i \n",
+                        index, iov[index].MPID_IOV_BUF, iov[index].MPID_IOV_LEN);
+    }
+#endif
+
     if (n_iov > MXM_MPICH_MAX_IOV) {
         *iov_buf = (mxm_req_buffer_t *) MPIU_Malloc(n_iov * sizeof(**iov_buf));
         MPIU_Assert(*iov_buf);
diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/subconfigure.m4 b/src/mpid/ch3/channels/nemesis/netmod/mxm/subconfigure.m4
index 28dcdd2..29f829a 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/subconfigure.m4
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/subconfigure.m4
@@ -27,14 +27,14 @@ AM_COND_IF([BUILD_NEMESIS_NETMOD_MXM],[
 #error "MXM Version is less than 1.5, please upgrade"
 #endif
 #
-#if MXM_API < MXM_VERSION(3,0)
-#error "MXM Version is less than 3.0, please upgrade"
+#if MXM_API < MXM_VERSION(3,1)
+#error "MXM Version is less than 3.1, please upgrade"
 #endif],
      [int a=0;],
      mxm_api_version=yes,
      mxm_api_version=no)
      if test "$mxm_api_version" = no ; then
-        AC_MSG_ERROR(['MXM API version Problem.  Are you running a recent version of MXM (at least 3.0)?'])
+        AC_MSG_ERROR(['MXM API version Problem.  Are you running a recent version of MXM (at least 3.1)?'])
      fi;
      AC_DEFINE([ENABLE_COMM_OVERRIDES], 1, [define to add per-vc function pointers to override send and recv functions])
      PAC_APPEND_FLAG([-lmxm],[EXTERNAL_LIBS])

-----------------------------------------------------------------------

Summary of changes:
 .../ch3/channels/nemesis/netmod/mxm/mxm_cancel.c   |    2 +-
 .../ch3/channels/nemesis/netmod/mxm/mxm_impl.h     |    6 +-
 .../ch3/channels/nemesis/netmod/mxm/mxm_poll.c     |   85 +++++++++++++++-----
 .../ch3/channels/nemesis/netmod/mxm/mxm_send.c     |   10 ++-
 .../channels/nemesis/netmod/mxm/subconfigure.m4    |    6 +-
 5 files changed, 81 insertions(+), 28 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list