[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.3-69-g72a1e6f

Service Account noreply at mpich.org
Sat Nov 1 22:27:41 CDT 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  72a1e6f871e16943e364dd580e10b392d1e904a4 (commit)
       via  5f78f0fa399747dd23201f38fae773dda9af3f4d (commit)
      from  c76aa786e784faf0302367f7fc46d62f399bcbba (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/72a1e6f871e16943e364dd580e10b392d1e904a4

commit 72a1e6f871e16943e364dd580e10b392d1e904a4
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Thu Oct 30 15:57:02 2014 -0500

    Bug-fix: avoid free NULL pointer in RMA.
    
    req->dev.user_buf points to the data sent from origin process
    to target process, and for FOP sometimes it points to the IMMED
    area in packet header when data can be fit in packet header.
    In such case, we should not free req->dev.user_buf in final
    request handler since that data area will be freed by the
    runtime when packet header is freed.
    
    In this patch we initialize user_buf to NULL when creating the
    request, and set it to NULL when FOP is completed, and avoid free
    a NULL pointer in final request handler.
    
    Signed-off-by: Min Si <msi at il.is.s.u-tokyo.ac.jp>

diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index 8031d04..db6760c 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -322,7 +322,8 @@ int MPIDI_CH3_ReqHandler_GetAccumRespComplete( MPIDI_VC_t *vc,
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GETACCUMRESPCOMPLETE);
     
     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GETACCUMRESPCOMPLETE);
-    MPIU_Free(rreq->dev.user_buf);
+    if (rreq->dev.user_buf != NULL)
+        MPIU_Free(rreq->dev.user_buf);
 
     MPID_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);
 
@@ -616,6 +617,16 @@ int MPIDI_CH3_ReqHandler_FOPComplete( MPIDI_VC_t *vc,
     /* Free temporary buffer allocated in PktHandler_FOP */
     if (len > sizeof(int) * MPIDI_RMA_FOP_IMMED_INTS && rreq->dev.op != MPI_NO_OP) {
         MPIU_Free(rreq->dev.user_buf);
+        /* Assign user_buf to NULL so that reqHandler_GetAccumRespComplete()
+           will not try to free an empty buffer. */
+        rreq->dev.user_buf = NULL;
+    }
+    else {
+        /* FOP data fit in pkt header and user_buf just points to data area in pkt header
+           in pktHandler_FOP(), and it should be freed when pkt header is freed.
+           Here we assign user_buf to NULL so that reqHandler_GetAccumRespComplete()
+           will not try to free it. */
+        rreq->dev.user_buf = NULL;
     }
 
     *complete = 1;
diff --git a/src/mpid/ch3/src/ch3u_request.c b/src/mpid/ch3/src/ch3u_request.c
index c5ff48d..7caa636 100644
--- a/src/mpid/ch3/src/ch3u_request.c
+++ b/src/mpid/ch3/src/ch3u_request.c
@@ -88,6 +88,7 @@ MPID_Request * MPID_Request_create(void)
 	req->dev.iov_offset        = 0;
         req->dev.flags             = MPIDI_CH3_PKT_FLAG_NONE;
         req->dev.resp_request_handle = MPI_REQUEST_NULL;
+        req->dev.user_buf          = NULL;
         req->dev.OnDataAvail       = NULL;
         req->dev.OnFinal           = NULL;
 #ifdef MPIDI_CH3_REQUEST_INIT

http://git.mpich.org/mpich.git/commitdiff/5f78f0fa399747dd23201f38fae773dda9af3f4d

commit 5f78f0fa399747dd23201f38fae773dda9af3f4d
Author: Igor Ivanov <Igor.Ivanov at itseez.com>
Date:   Mon Oct 20 17:35:05 2014 +0200

    netmod/mxm: Avoid calling mxm send req handling from mxm send completion callback
    
    Signed-off-by: Devendar Bureddy <devendar at mellanox.com>
    Signed-off-by: Igor Ivanov <Igor.Ivanov at itseez.com>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h
index 3b2bb12..43070da 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h
@@ -69,6 +69,8 @@ void MPID_nem_mxm_get_adi_msg(mxm_conn_h conn, mxm_imm_t imm, void *data,
 void MPID_nem_mxm_anysource_posted(MPID_Request * req);
 int MPID_nem_mxm_anysource_matched(MPID_Request * req);
 
+int _mxm_handle_sreq(MPID_Request * req);
+
 /* List type as queue
  * Operations, initialization etc
  */
@@ -174,6 +176,25 @@ typedef struct {
 /* macro for mxm private in REQ */
 #define REQ_BASE(reqp) ((reqp) ? (MPID_nem_mxm_req_area *)((reqp)->ch.netmod_area.padding) : NULL)
 
+typedef GENERIC_Q_DECL(struct MPID_Request) MPID_nem_mxm_reqq_t;
+#define MPID_nem_mxm_queue_empty(q) GENERIC_Q_EMPTY (q)
+#define MPID_nem_mxm_queue_head(q) GENERIC_Q_HEAD (q)
+#define MPID_nem_mxm_queue_enqueue(qp, ep) do {                                           \
+        /* add refcount so req doesn't get freed before it's dequeued */                \
+        MPIR_Request_add_ref(ep);                                                       \
+        MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST,                         \
+                          "MPID_nem_mxm_queue_enqueue req=%p (handle=%#x), queue=%p",     \
+                          ep, (ep)->handle, qp));                                       \
+        GENERIC_Q_ENQUEUE (qp, ep, dev.next);                                           \
+    } while (0)
+#define MPID_nem_mxm_queue_dequeue(qp, ep)  do {                                          \
+        GENERIC_Q_DEQUEUE (qp, ep, dev.next);                                           \
+        MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST,                         \
+                          "MPID_nem_mxm_queue_dequeuereq=%p (handle=%#x), queue=%p",      \
+                          *(ep), *(ep) ? (*(ep))->handle : -1, qp));                    \
+        MPID_Request_release(*(ep));                                                    \
+    } while (0)
+
 typedef struct MPID_nem_mxm_module_t {
     char *runtime_version;
     const char *compiletime_version;
@@ -188,6 +209,7 @@ typedef struct MPID_nem_mxm_module_t {
     int mxm_np;
     MPID_nem_mxm_ep_t *endpoint;
     list_head_t free_queue;
+    MPID_nem_mxm_reqq_t sreq_queue;
     struct {
         int bulk_connect;       /* use bulk connect */
         int bulk_disconnect;    /* use bulk disconnect */
diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_init.c b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_init.c
index 949efc7..4d78565 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_init.c
@@ -482,6 +482,8 @@ static int _mxm_init(int rank, int size)
     list_grow_mxm_req(&_mxm_obj.free_queue);
     MPIU_Assert(list_length(&_mxm_obj.free_queue) == MXM_MPICH_MAX_REQ);
 
+    _mxm_obj.sreq_queue.head = _mxm_obj.sreq_queue.tail = NULL;
+
     mxm_obj = &_mxm_obj;
 
   fn_exit:
diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_poll.c b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_poll.c
index ba7686e..e8bddc3 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_poll.c
@@ -24,10 +24,16 @@ static int _mxm_process_rdtype(MPID_Request ** rreq_p, MPI_Datatype datatype,
 int MPID_nem_mxm_poll(int in_blocking_progress)
 {
     int mpi_errno = MPI_SUCCESS;
+    MPID_Request *req = NULL;
 
     MPIDI_STATE_DECL(MPID_STATE_MXM_POLL);
     MPIDI_FUNC_ENTER(MPID_STATE_MXM_POLL);
 
+    while (!MPID_nem_mxm_queue_empty(mxm_obj->sreq_queue)) {
+        MPID_nem_mxm_queue_dequeue(&mxm_obj->sreq_queue, &req);
+        _mxm_handle_sreq(req);
+    }
+
     mpi_errno = _mxm_poll();
     if (mpi_errno)
         MPIU_ERR_POP(mpi_errno);
@@ -72,6 +78,7 @@ void MPID_nem_mxm_get_adi_msg(mxm_conn_h conn, mxm_imm_t imm, void *data,
     vc = mxm_conn_ctx_get(conn);
 
     _dbg_mxm_output(5, "========> Getting ADI msg (from=%d data_size %d) \n", vc->pg_rank, length);
+    _dbg_mxm_out_buf(data, (length > 16 ? 16 : length));
 
     MPID_nem_handle_pkt(vc, data, (MPIDI_msg_sz_t) (length));
 }
@@ -144,6 +151,10 @@ int MPID_nem_mxm_anysource_matched(MPID_Request * req)
 int MPID_nem_mxm_recv(MPIDI_VC_t * vc, MPID_Request * rreq)
 {
     int mpi_errno = MPI_SUCCESS;
+    MPIDI_msg_sz_t data_sz;
+    int dt_contig;
+    MPI_Aint dt_true_lb;
+    MPID_Datatype *dt_ptr;
 
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_RECV);
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_RECV);
@@ -152,18 +163,15 @@ int MPID_nem_mxm_recv(MPIDI_VC_t * vc, MPID_Request * rreq)
     MPIU_Assert(((rreq->dev.match.parts.rank == MPI_ANY_SOURCE) && (vc == NULL)) ||
                 (vc && !vc->ch.is_local));
 
+    MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, data_sz,
+                            dt_ptr, dt_true_lb);
+
     {
         MPIR_Context_id_t context_id = rreq->dev.match.parts.context_id;
         int tag = rreq->dev.match.parts.tag;
-        MPIDI_msg_sz_t data_sz;
-        int dt_contig;
-        MPI_Aint dt_true_lb;
-        MPID_Datatype *dt_ptr;
         MPID_nem_mxm_vc_area *vc_area = NULL;
         MPID_nem_mxm_req_area *req_area = NULL;
 
-        MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, data_sz,
-                                dt_ptr, dt_true_lb);
         rreq->dev.OnDataAvail = NULL;
         rreq->dev.tmpbuf = NULL;
         rreq->ch.vc = vc;
@@ -223,7 +231,6 @@ static int _mxm_handle_rreq(MPID_Request * req)
     MPIDI_msg_sz_t userbuf_sz;
     MPID_Datatype *dt_ptr;
     MPIDI_msg_sz_t data_sz;
-    MPIDI_VC_t *vc = NULL;
     MPID_nem_mxm_vc_area *vc_area ATTRIBUTE((unused)) = NULL;
     MPID_nem_mxm_req_area *req_area = NULL;
     void *tmp_buf = NULL;
@@ -319,7 +326,7 @@ static int _mxm_handle_rreq(MPID_Request * req)
         }
     }
 
-    MPIDI_CH3U_Handle_recv_req(vc, req, &complete);
+    MPIDI_CH3U_Handle_recv_req(req->ch.vc, req, &complete);
     MPIU_Assert(complete == TRUE);
 
     if (tmp_buf) MPIU_Free(tmp_buf);
diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_send.c b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_send.c
index 75003bf..69f3adc 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_send.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_send.c
@@ -15,7 +15,6 @@ enum {
 };
 
 
-static int _mxm_handle_sreq(MPID_Request * req);
 static void _mxm_send_completion_cb(void *context);
 static int _mxm_isend(MPID_nem_mxm_ep_t * ep, MPID_nem_mxm_req_area * req,
                       int type, mxm_mq_h mxm_mq, int mxm_rank, int id, mxm_tag_t tag, int block);
@@ -235,6 +234,7 @@ int MPID_nem_mxm_send(MPIDI_VC_t * vc, const void *buf, int count, MPI_Datatype
     MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
     MPIU_Assert(sreq != NULL);
     MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
+
     MPIDI_VC_FAI_send_seqnum(vc, seqnum);
     MPIDI_Request_set_seqnum(sreq, seqnum);
     if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) {
@@ -336,7 +336,8 @@ int MPID_nem_mxm_ssend(MPIDI_VC_t * vc, const void *buf, int count, MPI_Datatype
     /* create a request */
     MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
     MPIU_Assert(sreq != NULL);
-    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
+    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SSEND);
+
     MPIDI_VC_FAI_send_seqnum(vc, seqnum);
     MPIDI_Request_set_seqnum(sreq, seqnum);
     if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) {
@@ -439,6 +440,7 @@ int MPID_nem_mxm_isend(MPIDI_VC_t * vc, const void *buf, int count, MPI_Datatype
     MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
     MPIU_Assert(sreq != NULL);
     MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
+
     MPIDI_VC_FAI_send_seqnum(vc, seqnum);
     MPIDI_Request_set_seqnum(sreq, seqnum);
     if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) {
@@ -541,7 +543,8 @@ int MPID_nem_mxm_issend(MPIDI_VC_t * vc, const void *buf, int count, MPI_Datatyp
     /* create a request */
     MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
     MPIU_Assert(sreq != NULL);
-    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
+    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SSEND);
+
     MPIDI_VC_FAI_send_seqnum(vc, seqnum);
     MPIDI_Request_set_seqnum(sreq, seqnum);
     if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) {
@@ -619,10 +622,9 @@ int MPID_nem_mxm_issend(MPIDI_VC_t * vc, const void *buf, int count, MPI_Datatyp
 }
 
 
-static int _mxm_handle_sreq(MPID_Request * req)
+int _mxm_handle_sreq(MPID_Request * req)
 {
     int complete = FALSE;
-    int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);
     MPID_nem_mxm_vc_area *vc_area = NULL;
     MPID_nem_mxm_req_area *req_area = NULL;
 
@@ -634,8 +636,10 @@ static int _mxm_handle_sreq(MPID_Request * req)
                       16 ? 16 : req_area->iov_buf[0].length));
 
     vc_area->pending_sends -= 1;
-    if (((req->dev.datatype_ptr != NULL) && (req->dev.tmpbuf != NULL))) {
-        MPIU_Free(req->dev.tmpbuf);
+    if (req->dev.tmpbuf) {
+        if (req->dev.datatype_ptr || req->ch.noncontig) {
+            MPIU_Free(req->dev.tmpbuf);
+        }
     }
 
     if (req_area->iov_count > MXM_MPICH_MAX_IOV) {
@@ -644,19 +648,8 @@ static int _mxm_handle_sreq(MPID_Request * req)
         req_area->iov_count = 0;
     }
 
-    reqFn = req->dev.OnDataAvail;
-    if (!reqFn) {
-        MPIDI_CH3U_Request_complete(req);
-        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
-    }
-    else {
-        MPIDI_VC_t *vc = req->ch.vc;
-
-        reqFn(vc, req, &complete);
-        if (!complete) {
-            MPIU_Assert(complete == TRUE);
-        }
-    }
+    MPIDI_CH3U_Handle_send_req(req->ch.vc, req, &complete);
+    MPIU_Assert(complete == TRUE);
 
     return complete;
 }
@@ -683,7 +676,7 @@ static void _mxm_send_completion_cb(void *context)
                     req, req->status.MPI_ERROR);
 
     if (likely(!MPIR_STATUS_GET_CANCEL_BIT(req->status))) {
-        _mxm_handle_sreq(req);
+        MPID_nem_mxm_queue_enqueue(&mxm_obj->sreq_queue, req);
     }
 }
 

-----------------------------------------------------------------------

Summary of changes:
 .../ch3/channels/nemesis/netmod/mxm/mxm_impl.h     |   22 ++++++++++++
 .../ch3/channels/nemesis/netmod/mxm/mxm_init.c     |    2 +
 .../ch3/channels/nemesis/netmod/mxm/mxm_poll.c     |   23 ++++++++----
 .../ch3/channels/nemesis/netmod/mxm/mxm_send.c     |   35 ++++++++------------
 src/mpid/ch3/src/ch3u_handle_recv_req.c            |   13 +++++++-
 src/mpid/ch3/src/ch3u_request.c                    |    1 +
 6 files changed, 66 insertions(+), 30 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list