[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2b3-53-gc229b00

Service Account noreply at mpich.org
Thu Jun 11 09:11:36 CDT 2015


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  c229b00e9f8a0e5acbc4cff8cac8957a46a9d6d5 (commit)
      from  125304f7bd5c1cf48d6df8e6a0a2a0d25196907e (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/c229b00e9f8a0e5acbc4cff8cac8957a46a9d6d5

commit c229b00e9f8a0e5acbc4cff8cac8957a46a9d6d5
Author: Charles J Archer <charles.j.archer at intel.com>
Date:   Wed Jun 10 21:19:31 2015 -0700

    OFI Netmod: Improve *msg functionality and fix bugs
    
     * Fix 32-bit integer overflow
     * Add non-contig support for contig *Msg functions when OFI permits
     * Remove asserts for better error handling in out of memory condition
    
    Change-Id: I0eaa848c9919b7f4b3088b64b9fef79fd5ad2406
    Signed-off-by: Charles J Archer <charles.j.archer at intel.com>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_cm.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_cm.c
index 7ccae8e..b951a5c 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_cm.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_cm.c
@@ -166,10 +166,10 @@ static inline int MPID_nem_ofi_handle_packet(cq_tagged_entry_t * wc ATTRIBUTE((u
 
     BEGIN_FUNC(FCNAME);
     if (MPID_cc_get(rreq->cc) == 1) {
-        vc = REQ_OFI(rreq)->vc;
-        MPIU_Assert(vc);
-        MPI_RC(MPID_nem_handle_pkt(vc, REQ_OFI(rreq)->pack_buffer, REQ_OFI(rreq)->pack_buffer_size))
-            MPIU_Free(REQ_OFI(rreq)->pack_buffer);
+      vc = REQ_OFI(rreq)->vc;
+      MPIU_Assert(vc);
+      MPI_RC(MPID_nem_handle_pkt(vc, REQ_OFI(rreq)->pack_buffer, REQ_OFI(rreq)->pack_buffer_size));
+      MPIU_Free(REQ_OFI(rreq)->pack_buffer);
     }
     MPIDI_CH3U_Request_complete(rreq);
     END_FUNC_RC(FCNAME);
@@ -215,10 +215,13 @@ static inline int MPID_nem_ofi_preposted_callback(cq_tagged_entry_t * wc, MPID_R
     MPIU_Assert(vc);
     VC_READY_CHECK(vc);
 
-    pkt_len = rreq->dev.user_count;
+    pkt_len = REQ_OFI(rreq)->msg_bytes;
     pack_buffer = (char *) MPIU_Malloc(pkt_len);
-    MPIU_ERR_CHKANDJUMP1(pack_buffer == NULL, mpi_errno, MPI_ERR_OTHER,
-                         "**nomem", "**nomem %s", "Pack Buffer alloc");
+    /* If the pack buffer is NULL, let OFI handle the truncation
+     * in the progress loop
+     */
+    if(pack_buffer == NULL)
+      pkt_len = 0;
     c = 1;
     MPID_nem_ofi_create_req(&new_rreq, 1);
     MPID_cc_incr(new_rreq->cc_ptr, &c);
@@ -248,15 +251,17 @@ static inline int MPID_nem_ofi_preposted_callback(cq_tagged_entry_t * wc, MPID_R
                      wc->tag | MPID_MSG_CTS, &(REQ_OFI(sreq)->ofi_context)), tsend);
     MPIU_Assert(gl_data.persistent_req == rreq);
 
-    rreq->dev.user_count = 0;
     FI_RC_RETRY(fi_trecv(gl_data.endpoint,
-                   &rreq->dev.user_count,
-                   sizeof rreq->dev.user_count,
+                   &REQ_OFI(rreq)->msg_bytes,
+                   sizeof REQ_OFI(rreq)->msg_bytes,
                    gl_data.mr,
                    FI_ADDR_UNSPEC,
                    MPID_MSG_RTS,
                    GET_RCD_IGNORE_MASK(),
                    &(REQ_OFI(rreq)->ofi_context)), trecv);
+    /* Return a proper error to MPI to indicate out of memory condition */
+    MPIU_ERR_CHKANDJUMP1(pack_buffer == NULL, mpi_errno, MPI_ERR_OTHER,
+                         "**nomem", "**nomem %s", "Pack Buffer alloc");
     END_FUNC_RC(FCNAME);
 }
 
@@ -319,8 +324,8 @@ int MPID_nem_ofi_cm_init(MPIDI_PG_t * pg_p, int pg_rank ATTRIBUTE((unused)))
     REQ_OFI(persistent_req)->vc = NULL;
     REQ_OFI(persistent_req)->event_callback = MPID_nem_ofi_preposted_callback;
     FI_RC_RETRY(fi_trecv(gl_data.endpoint,
-                   &persistent_req->dev.user_count,
-                   sizeof persistent_req->dev.user_count,
+                   &REQ_OFI(persistent_req)->msg_bytes,
+                   sizeof REQ_OFI(persistent_req)->msg_bytes,
                    gl_data.mr,
                    FI_ADDR_UNSPEC,
                    MPID_MSG_RTS,
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
index 0aa9c4e..22560da 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
@@ -57,6 +57,7 @@ typedef struct {
     MPID_Request *persistent_req;       /* Unexpected request queue    */
     MPID_Request *conn_req;     /* Connection request          */
     MPIDI_Comm_ops_t comm_ops;
+    size_t iov_limit;           /* Max send iovec limit        */
     int rts_cts_in_flight;
     int api_set;
 } MPID_nem_ofi_global_t;
@@ -80,13 +81,17 @@ typedef struct {
 typedef struct {
     context_t ofi_context;      /* Context Object              */
     void *addr;                 /* OFI Address                 */
-    event_callback_fn event_callback;   /* Callback Event              */
-    char *pack_buffer;          /* MPI Pack Buffer             */
-    int pack_buffer_size;       /* Pack buffer size            */
+    event_callback_fn event_callback;   /* Callback Event      */
+    char  *pack_buffer;         /* MPI Pack Buffer             */
+    size_t pack_buffer_size;    /* Pack buffer size            */
+    size_t msg_bytes;           /* msg api bytes               */
+    int    iov_count;           /* Number of iovecs            */
+    void *real_hdr;             /* Extended header             */
     int match_state;            /* State of the match          */
     int req_started;            /* Request state               */
     MPIDI_VC_t *vc;             /* VC paired with this request */
     uint64_t tag;               /* 64 bit tag request          */
+    struct iovec iov[3];        /* scatter gather list         */
     MPID_Request *parent;       /* Parent request              */
 } MPID_nem_ofi_req_t;
 #define REQ_OFI(req) ((MPID_nem_ofi_req_t *)((req)->ch.netmod_area.padding))
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
index 40e6759..cc8eec8 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
@@ -126,8 +126,8 @@ int MPID_nem_ofi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
                     &gl_data.fabric,    /* Out:  Fabric descriptor */
                     NULL), openfabric); /* Context: fabric events  */
 
+    gl_data.iov_limit = prov_use->tx_attr->iov_limit;
     gl_data.api_set = API_SET_1;
-
     /* ------------------------------------------------------------------------ */
     /* Create the access domain, which is the physical or virtual network or    */
     /* hardware port/collection of ports.  Returns a domain object that can be  */
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c
index db64284..e7b042a 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c
@@ -125,18 +125,36 @@ static int MPID_nem_ofi_data_callback(cq_tagged_entry_t * wc, MPID_Request * sre
     switch (wc->tag & MPID_PROTOCOL_MASK) {
     case MPID_MSG_CTS | MPID_MSG_RTS:
         vc = REQ_OFI(sreq)->vc;
-        FI_RC_RETRY(fi_tsend(gl_data.endpoint,
-                       REQ_OFI(sreq)->pack_buffer,
-                       REQ_OFI(sreq)->pack_buffer_size,
-                       gl_data.mr,
-                       VC_OFI(vc)->direct_addr,
-                       wc->tag | MPID_MSG_DATA, (void *) &(REQ_OFI(sreq)->ofi_context)), tsend);
+        if(REQ_OFI(sreq)->pack_buffer) {
+          FI_RC_RETRY(fi_tsend(gl_data.endpoint,
+                               REQ_OFI(sreq)->pack_buffer,
+                               REQ_OFI(sreq)->pack_buffer_size,
+                               gl_data.mr,
+                               VC_OFI(vc)->direct_addr,
+                               wc->tag | MPID_MSG_DATA,
+                               (void *) &(REQ_OFI(sreq)->ofi_context)), tsend);
+        } else {
+          struct  fi_msg_tagged msg;
+          void   *desc    = NULL;
+          msg.msg_iov     = REQ_OFI(sreq)->iov;
+          msg.desc        = &desc;
+          msg.iov_count   = REQ_OFI(sreq)->iov_count;
+          msg.addr        = VC_OFI(vc)->direct_addr;
+          msg.tag         = wc->tag | MPID_MSG_DATA,
+          msg.ignore      = 0ULL;
+          msg.context     = &(REQ_OFI(sreq)->ofi_context);
+          msg.data        = 0ULL;
+          FI_RC_RETRY(fi_tsendmsg(gl_data.endpoint,&msg,0ULL),tsend);
+        }
         MPIDI_CH3U_Request_complete(sreq);
         break;
     case MPID_MSG_CTS | MPID_MSG_RTS | MPID_MSG_DATA:
         if (REQ_OFI(sreq)->pack_buffer)
             MPIU_Free(REQ_OFI(sreq)->pack_buffer);
 
+        if (REQ_OFI(sreq)->real_hdr)
+            MPIU_Free(REQ_OFI(sreq)->real_hdr);
+
         reqFn = sreq->dev.OnDataAvail;
         if (!reqFn) {
             MPIDI_CH3U_Request_complete(sreq);
@@ -171,11 +189,7 @@ static int MPID_nem_ofi_cts_recv_callback(cq_tagged_entry_t * wc, MPID_Request *
 
 /* ------------------------------------------------------------------------ */
 /* The nemesis API implementations:                                         */
-/* These functions currently memory copy into a pack buffer before sending  */
-/* To improve performance, we can replace the memory copy with a non-contig */
-/* send (using tsendmsg)                                                    */
-/* For now, the memory copy is the simplest implementation of these         */
-/* functions over a tagged msg interface                                    */
+/* Use packing if iovecs are not supported by the OFI provider              */
 /* ------------------------------------------------------------------------ */
 #undef FCNAME
 #define FCNAME DECL_FUNC(MPID_nem_ofi_iSendContig)
@@ -183,25 +197,54 @@ int MPID_nem_ofi_iSendContig(MPIDI_VC_t * vc,
                              MPID_Request * sreq,
                              void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz)
 {
-    int pgid, c, pkt_len, mpi_errno = MPI_SUCCESS;
-    char *pack_buffer;
+    int pgid, c, mpi_errno = MPI_SUCCESS;
+    char *pack_buffer = NULL;
     uint64_t match_bits;
     MPID_Request *cts_req;
     MPIDI_msg_sz_t buf_offset = 0;
-
+    size_t         pkt_len;
     BEGIN_FUNC(FCNAME);
     MPIU_Assert(hdr_sz <= (MPIDI_msg_sz_t) sizeof(MPIDI_CH3_Pkt_t));
     MPID_nem_ofi_init_req(sreq);
     pkt_len = sizeof(MPIDI_CH3_Pkt_t) + sreq->dev.ext_hdr_sz + data_sz;
-    pack_buffer = MPIU_Malloc(pkt_len);
-    MPIU_Assert(pack_buffer);
-    MPIU_Memcpy(pack_buffer, hdr, hdr_sz);
-    buf_offset += sizeof(MPIDI_CH3_Pkt_t);
-    if (sreq->dev.ext_hdr_sz > 0) {
+    if (sreq->dev.ext_hdr_sz > 0 && gl_data.iov_limit > 2) {
+      REQ_OFI(sreq)->real_hdr        = MPIU_Malloc(sizeof(MPIDI_CH3_Pkt_t)+sreq->dev.ext_hdr_sz);
+      MPIU_ERR_CHKANDJUMP1(REQ_OFI(sreq)->real_hdr == NULL, mpi_errno, MPI_ERR_OTHER,
+                            "**nomem", "**nomem %s", "iSendContig extended header allocation");
+      REQ_OFI(sreq)->iov[0].iov_base = REQ_OFI(sreq)->real_hdr;
+      REQ_OFI(sreq)->iov[0].iov_len  = hdr_sz;
+      REQ_OFI(sreq)->iov[1].iov_base = REQ_OFI(sreq)->real_hdr+sizeof(MPIDI_CH3_Pkt_t);
+      REQ_OFI(sreq)->iov[1].iov_len  = sreq->dev.ext_hdr_sz;
+      REQ_OFI(sreq)->iov[2].iov_base = data;
+      REQ_OFI(sreq)->iov[2].iov_len  = data_sz;
+      REQ_OFI(sreq)->iov_count       = 3;
+      MPIU_Memcpy(REQ_OFI(sreq)->real_hdr, hdr, hdr_sz);
+      MPIU_Memcpy(REQ_OFI(sreq)->real_hdr + sizeof(MPIDI_CH3_Pkt_t),
+                  sreq->dev.ext_hdr_ptr, sreq->dev.ext_hdr_sz);
+      }
+    else if(sreq->dev.ext_hdr_sz == 0 && gl_data.iov_limit > 1) {
+        REQ_OFI(sreq)->real_hdr = MPIU_Malloc(sizeof(MPIDI_CH3_Pkt_t));
+        MPIU_ERR_CHKANDJUMP1(REQ_OFI(sreq)->real_hdr == NULL, mpi_errno, MPI_ERR_OTHER,
+                             "**nomem", "**nomem %s", "iSendContig header allocation");
+        MPIU_Memcpy(REQ_OFI(sreq)->real_hdr, hdr, hdr_sz);
+        REQ_OFI(sreq)->iov[0].iov_base = REQ_OFI(sreq)->real_hdr;
+        REQ_OFI(sreq)->iov[0].iov_len  = sizeof(MPIDI_CH3_Pkt_t);
+        REQ_OFI(sreq)->iov[1].iov_base = data;
+        REQ_OFI(sreq)->iov[1].iov_len  = data_sz;
+        REQ_OFI(sreq)->iov_count       = 2;
+    }
+    else {
+      pack_buffer = MPIU_Malloc(pkt_len);
+      MPIU_ERR_CHKANDJUMP1(pack_buffer == NULL, mpi_errno, MPI_ERR_OTHER,
+                           "**nomem", "**nomem %s", "iSendContig pack buffer allocation");
+      MPIU_Memcpy(pack_buffer, hdr, hdr_sz);
+      buf_offset += sizeof(MPIDI_CH3_Pkt_t);
+      if (sreq->dev.ext_hdr_sz > 0) {
         MPIU_Memcpy(pack_buffer + buf_offset, sreq->dev.ext_hdr_ptr, sreq->dev.ext_hdr_sz);
         buf_offset += sreq->dev.ext_hdr_sz;
+      }
+      MPIU_Memcpy(pack_buffer + buf_offset, data, data_sz);
     }
-    MPIU_Memcpy(pack_buffer + buf_offset, data, data_sz);
     START_COMM();
     END_FUNC_RC(FCNAME);
 }
@@ -211,23 +254,25 @@ int MPID_nem_ofi_iSendContig(MPIDI_VC_t * vc,
 int MPID_nem_ofi_SendNoncontig(MPIDI_VC_t * vc,
                                MPID_Request * sreq, void *hdr, MPIDI_msg_sz_t hdr_sz)
 {
-    int c, pgid, pkt_len, mpi_errno = MPI_SUCCESS;
+    int c, pgid, mpi_errno = MPI_SUCCESS;
     char *pack_buffer;
     MPI_Aint data_sz;
     uint64_t match_bits;
     MPID_Request *cts_req;
     MPIDI_msg_sz_t first, last;
     MPIDI_msg_sz_t buf_offset = 0;
-
+    void          *data       = NULL;
+    size_t         pkt_len;
     BEGIN_FUNC(FCNAME);
     MPIU_Assert(hdr_sz <= (MPIDI_msg_sz_t) sizeof(MPIDI_CH3_Pkt_t));
-
+    MPID_nem_ofi_init_req(sreq);
     first = sreq->dev.segment_first;
     last = sreq->dev.segment_size;
     data_sz = sreq->dev.segment_size - sreq->dev.segment_first;
     pkt_len = sizeof(MPIDI_CH3_Pkt_t) + sreq->dev.ext_hdr_sz + data_sz;
     pack_buffer = MPIU_Malloc(pkt_len);
-    MPIU_Assert(pack_buffer);
+    MPIU_ERR_CHKANDJUMP1(pack_buffer == NULL, mpi_errno, MPI_ERR_OTHER,
+                         "**nomem", "**nomem %s", "SendNonContig pack buffer allocation");
     MPIU_Memcpy(pack_buffer, hdr, hdr_sz);
     buf_offset += sizeof(MPIDI_CH3_Pkt_t);
     if (sreq->dev.ext_hdr_sz > 0) {
@@ -247,11 +292,12 @@ int MPID_nem_ofi_iStartContigMsg(MPIDI_VC_t * vc,
                                  MPIDI_msg_sz_t hdr_sz,
                                  void *data, MPIDI_msg_sz_t data_sz, MPID_Request ** sreq_ptr)
 {
-    int pkt_len, c, pgid, mpi_errno = MPI_SUCCESS;
+    int c, pgid, mpi_errno = MPI_SUCCESS;
     MPID_Request *sreq;
     MPID_Request *cts_req;
-    char *pack_buffer;
+    char    *pack_buffer = NULL;
     uint64_t match_bits;
+    size_t   pkt_len;
     BEGIN_FUNC(FCNAME);
     MPIU_Assert(hdr_sz <= (MPIDI_msg_sz_t) sizeof(MPIDI_CH3_Pkt_t));
 
@@ -260,11 +306,23 @@ int MPID_nem_ofi_iStartContigMsg(MPIDI_VC_t * vc,
     sreq->dev.OnDataAvail = NULL;
     sreq->dev.next = NULL;
     pkt_len = sizeof(MPIDI_CH3_Pkt_t) + data_sz;
-    pack_buffer = MPIU_Malloc(pkt_len);
-    MPIU_Assert(pack_buffer);
-    MPIU_Memcpy((void *) pack_buffer, hdr, hdr_sz);
-    if (data_sz)
+    if(gl_data.iov_limit > 1) {
+      REQ_OFI(sreq)->real_hdr = MPIU_Malloc(sizeof(MPIDI_CH3_Pkt_t));
+      MPIU_Memcpy(REQ_OFI(sreq)->real_hdr, hdr, hdr_sz);
+      REQ_OFI(sreq)->iov[0].iov_base = REQ_OFI(sreq)->real_hdr;
+      REQ_OFI(sreq)->iov[0].iov_len  = sizeof(MPIDI_CH3_Pkt_t);
+      REQ_OFI(sreq)->iov[1].iov_base = data;
+      REQ_OFI(sreq)->iov[1].iov_len  = data_sz;
+      REQ_OFI(sreq)->iov_count       = 2;
+    }
+    else {
+      pack_buffer = MPIU_Malloc(pkt_len);
+      MPIU_ERR_CHKANDJUMP1(pack_buffer == NULL, mpi_errno, MPI_ERR_OTHER,
+                           "**nomem", "**nomem %s", "iStartContig pack buffer allocation");
+      MPIU_Memcpy((void *) pack_buffer, hdr, hdr_sz);
+      if (data_sz)
         MPIU_Memcpy((void *) (pack_buffer + sizeof(MPIDI_CH3_Pkt_t)), data, data_sz);
+    }
     START_COMM();
     *sreq_ptr = sreq;
     END_FUNC_RC(FCNAME);

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_cm.c  |   29 +++--
 .../ch3/channels/nemesis/netmod/ofi/ofi_impl.h     |   11 ++-
 .../ch3/channels/nemesis/netmod/ofi/ofi_init.c     |    2 +-
 src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c |  118 +++++++++++++++-----
 4 files changed, 114 insertions(+), 46 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list