[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2-15-g8e6ce14

Service Account noreply at mpich.org
Tue Dec 8 12:36:07 CST 2015


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  8e6ce14305048395df675d3f8636f8996c620627 (commit)
      from  0deaab3b9316ccba0aa64315cbd2e1b1ab3017b4 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/8e6ce14305048395df675d3f8636f8996c620627

commit 8e6ce14305048395df675d3f8636f8996c620627
Author: Kim McMahon <kmcmahon at cray.com>
Date:   Wed Dec 2 14:56:12 2015 -0600

    remove use of send-side CQ tag field in ofi netmod
    
    description:
    The libfabric provider does not need to supply a valid tag field for
    send-side events on a fi_cq_read() call.  The ofi netmod was relying
    on this field, and all MPI tests will hang during finalize if it isn't
    set.  The TCP libfabric provider had originally supplied a valid tag for
    send events, but this has recently changed.  The gni provider does not
    supply a valid tag field for send events.
    
    author: Kim McMahon, Cray, Inc.
    
    Signed-off-by: Charles J Archer <charles.j.archer at intel.com>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c
index 58491ce..46c6c64 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c
@@ -112,6 +112,8 @@
 /* General handler for RTS-CTS-Data protocol.  Waits for the cc counter     */
 /* to hit two (send RTS and receive CTS decrementers) before kicking off the*/
 /* bulk data transfer.  On data send completion, the request can be freed   */
+/* Handles SEND-side events only.  We cannot rely on wc->tag field being    */
+/* set for these events, so we must use the TAG stored in the sreq.         */
 /* ------------------------------------------------------------------------ */
 #undef FCNAME
 #define FCNAME DECL_FUNC(MPID_nem_ofi_data_callback)
@@ -122,49 +124,32 @@ static int MPID_nem_ofi_data_callback(cq_tagged_entry_t * wc, MPID_Request * sre
     req_fn reqFn;
     uint64_t tag = 0;
     BEGIN_FUNC(FCNAME);
-    switch (wc->tag & MPID_PROTOCOL_MASK) {
-    case MPID_MSG_CTS | MPID_MSG_RTS:
-        vc = REQ_OFI(sreq)->vc;
-        if(REQ_OFI(sreq)->pack_buffer) {
-          FI_RC_RETRY(fi_tsend(gl_data.endpoint,
-                               REQ_OFI(sreq)->pack_buffer,
-                               REQ_OFI(sreq)->pack_buffer_size,
-                               gl_data.mr,
-                               VC_OFI(vc)->direct_addr,
-                               wc->tag | MPID_MSG_DATA,
-                               (void *) &(REQ_OFI(sreq)->ofi_context)), tsend);
-        } else {
-          struct  fi_msg_tagged msg;
-          void   *desc    = NULL;
-          msg.msg_iov     = REQ_OFI(sreq)->iov;
-          msg.desc        = &desc;
-          msg.iov_count   = REQ_OFI(sreq)->iov_count;
-          msg.addr        = VC_OFI(vc)->direct_addr;
-          msg.tag         = wc->tag | MPID_MSG_DATA,
-          msg.ignore      = 0ULL;
-          msg.context     = &(REQ_OFI(sreq)->ofi_context);
-          msg.data        = 0ULL;
-          FI_RC_RETRY(fi_tsendmsg(gl_data.endpoint,&msg,0ULL),tsend);
-        }
-        MPIDI_CH3I_NM_OFI_RC(MPID_Request_complete(sreq));
-
-        break;
+    switch (REQ_OFI(sreq)->tag & MPID_PROTOCOL_MASK) {
     case MPID_MSG_CTS | MPID_MSG_RTS | MPID_MSG_DATA:
-        if (REQ_OFI(sreq)->pack_buffer)
-            MPIU_Free(REQ_OFI(sreq)->pack_buffer);
+        /* Verify request is complete prior to freeing buffers.
+         * Multiple DATA events may arrive because we need
+         * to store updated TAG values in the sreq.
+         */
+        if (MPID_cc_get(sreq->cc) == 1) {
+            if (REQ_OFI(sreq)->pack_buffer)
+                MPIU_Free(REQ_OFI(sreq)->pack_buffer);
 
-        if (REQ_OFI(sreq)->real_hdr)
-            MPIU_Free(REQ_OFI(sreq)->real_hdr);
+            if (REQ_OFI(sreq)->real_hdr)
+                MPIU_Free(REQ_OFI(sreq)->real_hdr);
 
-        reqFn = sreq->dev.OnDataAvail;
-        if (!reqFn) {
+            reqFn = sreq->dev.OnDataAvail;
+            if (!reqFn) {
+                MPIDI_CH3I_NM_OFI_RC(MPID_Request_complete(sreq));
+            }
+            else {
+                vc = REQ_OFI(sreq)->vc;
+                MPIDI_CH3I_NM_OFI_RC(reqFn(vc, sreq, &complete));
+            }
+            gl_data.rts_cts_in_flight--;
+
+        } else {
             MPIDI_CH3I_NM_OFI_RC(MPID_Request_complete(sreq));
         }
-        else {
-            vc = REQ_OFI(sreq)->vc;
-            MPIDI_CH3I_NM_OFI_RC(reqFn(vc, sreq, &complete));
-        }
-        gl_data.rts_cts_in_flight--;
         break;
     case MPID_MSG_RTS:
         MPIDI_CH3I_NM_OFI_RC(MPID_Request_complete(sreq));
@@ -176,14 +161,47 @@ static int MPID_nem_ofi_data_callback(cq_tagged_entry_t * wc, MPID_Request * sre
 /* ------------------------------------------------------------------------ */
 /* Signals the CTS has been received.  Call MPID_nem_ofi_data_callback on   */
 /* the parent send request to kick off the bulk data transfer               */
+/* Handles RECV-side events only.  We rely on wc->tag field being set for   */
+/* these events.                                                            */
 /* ------------------------------------------------------------------------ */
 #undef FCNAME
 #define FCNAME DECL_FUNC(MPID_nem_ofi_cts_recv_callback)
 static int MPID_nem_ofi_cts_recv_callback(cq_tagged_entry_t * wc, MPID_Request * rreq)
 {
     int mpi_errno = MPI_SUCCESS;
+    MPID_Request *preq;
+    MPIDI_VC_t *vc;
     BEGIN_FUNC(FCNAME);
-    MPIDI_CH3I_NM_OFI_RC(MPID_nem_ofi_data_callback(wc, REQ_OFI(rreq)->parent));
+    preq = REQ_OFI(rreq)->parent;
+    switch (wc->tag & MPID_PROTOCOL_MASK) {
+    case MPID_MSG_CTS | MPID_MSG_RTS:
+        vc = REQ_OFI(preq)->vc;
+        /* store tag in the request for SEND-side event processing */
+        REQ_OFI(preq)->tag = wc->tag | MPID_MSG_DATA;
+        if(REQ_OFI(preq)->pack_buffer) {
+          FI_RC_RETRY(fi_tsend(gl_data.endpoint,
+                               REQ_OFI(preq)->pack_buffer,
+                               REQ_OFI(preq)->pack_buffer_size,
+                               gl_data.mr,
+                               VC_OFI(vc)->direct_addr,
+                               REQ_OFI(preq)->tag,
+                               (void *) &(REQ_OFI(preq)->ofi_context)), tsend);
+        } else {
+          struct  fi_msg_tagged msg;
+          void   *desc    = NULL;
+          msg.msg_iov     = REQ_OFI(preq)->iov;
+          msg.desc        = &desc;
+          msg.iov_count   = REQ_OFI(preq)->iov_count;
+          msg.addr        = VC_OFI(vc)->direct_addr;
+          msg.tag         = REQ_OFI(preq)->tag,
+          msg.ignore      = 0ULL;
+          msg.context     = &(REQ_OFI(preq)->ofi_context);
+          msg.data        = 0ULL;
+          FI_RC_RETRY(fi_tsendmsg(gl_data.endpoint,&msg,0ULL),tsend);
+        }
+        MPIDI_CH3I_NM_OFI_RC(MPID_Request_complete(preq));
+        break;
+    }
     MPIDI_CH3I_NM_OFI_RC(MPID_Request_complete(rreq));
 
     END_FUNC_RC(FCNAME);

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c |   96 ++++++++++++--------
 1 files changed, 57 insertions(+), 39 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list