[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.2-79-g206af6d

Service Account noreply at mpich.org
Thu Aug 7 11:21:29 CDT 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  206af6d56ce9e1f965a26c5f82b8c484f8bb7596 (commit)
       via  d1df5c9c792397dde7b4ddcea859c89aaa9e3955 (commit)
       via  5c328ccb3d1752224a30740e5e1b5c3ed4e129f8 (commit)
       via  dfce63a02289fbaebd776c76d2c90a7a21e96450 (commit)
       via  88f05f80a9e5b4c41a5a670484503fc413d7868f (commit)
      from  43bbd02bab428e1f6dab359ba77d49f5be9efd15 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/206af6d56ce9e1f965a26c5f82b8c484f8bb7596

commit 206af6d56ce9e1f965a26c5f82b8c484f8bb7596
Author: Antonio Pena Monferrer <apenya at mcs.anl.gov>
Date:   Mon Aug 4 18:54:52 2014 -0500

    Fixed MPI_Probe/MPI/Mprobe/MPI_Mrecv (+I variants)
    
    First working version. See #2152.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/Makefile.mk b/src/mpid/ch3/channels/nemesis/netmod/portals4/Makefile.mk
index 40b1539..3901503 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/Makefile.mk
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/Makefile.mk
@@ -14,7 +14,8 @@ mpi_core_sources +=					\
     src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_probe.c		\
     src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c		\
     src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_nm.c	        \
-    src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_send.c 
+    src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_send.c            \
+    src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_lmt.c
 
 noinst_HEADERS +=                                                \
     src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h 
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h
index 9a2d3cf..9681bae 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h
@@ -181,6 +181,13 @@ void MPID_nem_ptl_anysource_posted(MPID_Request *rreq);
 int MPID_nem_ptl_anysource_matched(MPID_Request *rreq);
 int MPID_nem_ptl_init_id(MPIDI_VC_t *vc);
 
+int MPID_nem_ptl_lmt_initiate_lmt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *rts_pkt, MPID_Request *req);
+int MPID_nem_ptl_lmt_start_recv(MPIDI_VC_t *vc,  MPID_Request *rreq, MPID_IOV s_cookie);
+int MPID_nem_ptl_lmt_start_send(MPIDI_VC_t *vc, MPID_Request *sreq, MPID_IOV r_cookie);
+int MPID_nem_ptl_lmt_handle_cookie(MPIDI_VC_t *vc, MPID_Request *req, MPID_IOV s_cookie);
+int MPID_nem_ptl_lmt_done_send(MPIDI_VC_t *vc, MPID_Request *req);
+int MPID_nem_ptl_lmt_done_recv(MPIDI_VC_t *vc, MPID_Request *req);
+
 
 /* debugging */
 const char *MPID_nem_ptl_strerror(int ret);
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c
index 9496048..f5161db 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c
@@ -97,7 +97,9 @@ static int ptl_init(MPIDI_PG_t *pg_p, int pg_rank, char **bc_val_p, int *val_max
 
     mpi_errno = MPIDI_CH3I_Register_anysource_notification(MPID_nem_ptl_anysource_posted, MPID_nem_ptl_anysource_matched);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    
+
+    MPIDI_Anysource_improbe_fn = MPID_nem_ptl_anysource_improbe;
+
     /* init portals */
     ret = PtlInit();
     MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlinit", "**ptlinit %s", MPID_nem_ptl_strerror(ret));
@@ -281,6 +283,13 @@ static int vc_init(MPIDI_VC_t *vc)
     vc_ch->iStartContigMsg = MPID_nem_ptl_iStartContigMsg;
     vc_ch->iSendContig     = MPID_nem_ptl_iSendContig;
 
+    vc_ch->lmt_initiate_lmt  = MPID_nem_ptl_lmt_initiate_lmt;
+    vc_ch->lmt_start_recv    = MPID_nem_ptl_lmt_start_recv;
+    vc_ch->lmt_start_send    = MPID_nem_ptl_lmt_start_send;
+    vc_ch->lmt_handle_cookie = MPID_nem_ptl_lmt_handle_cookie;
+    vc_ch->lmt_done_send     = MPID_nem_ptl_lmt_done_send;
+    vc_ch->lmt_done_recv     = MPID_nem_ptl_lmt_done_recv;
+
     vc->comm_ops = &comm_ops;
 
     vc_ch->next = NULL;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_lmt.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_lmt.c
new file mode 100644
index 0000000..7979db8
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_lmt.c
@@ -0,0 +1,70 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2014 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "ptl_impl.h"
+
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_ptl_lmt_initiate_lmt
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPID_nem_ptl_lmt_initiate_lmt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *rts_pkt, MPID_Request *req)
+{
+    /* Nothing to do here, but has to be defined for CH3 to follow the right path to
+       MPID_nem_ptl_lmt_start_recv */
+    return MPI_SUCCESS;
+}
+
+
+
+/* The following function is implemented in ptl_recv.c to make use of the handlers defined there */
+/* int MPID_nem_ptl_lmt_start_recv(MPIDI_VC_t *vc,  MPID_Request *rreq, MPID_IOV s_cookie) */
+
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_ptl_lmt_start_send
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPID_nem_ptl_lmt_start_send(MPIDI_VC_t *vc, MPID_Request *sreq, MPID_IOV r_cookie)
+{
+    MPIU_Assertp(0 && "This function shouldn't be called.");
+    return MPI_ERR_INTERN;
+}
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_ptl_lmt_handle_cookie
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPID_nem_ptl_lmt_handle_cookie(MPIDI_VC_t *vc, MPID_Request *req, MPID_IOV s_cookie)
+{
+    MPIU_Assertp(0 && "This function shouldn't be called.");
+    return MPI_ERR_INTERN;
+}
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_ptl_lmt_done_send
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPID_nem_ptl_lmt_done_send(MPIDI_VC_t *vc, MPID_Request *req)
+{
+    MPIU_Assertp(0 && "This function shouldn't be called.");
+    return MPI_ERR_INTERN;
+}
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_ptl_lmt_done_recv
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPID_nem_ptl_lmt_done_recv(MPIDI_VC_t *vc, MPID_Request *req)
+{
+    MPIU_Assertp(0 && "This function shouldn't be called.");
+    return MPI_ERR_INTERN;
+}
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_probe.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_probe.c
index 0dbfc7e..b7c7dcc 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_probe.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_probe.c
@@ -26,17 +26,66 @@ static int handle_probe(const ptl_event_t *e)
     REQ_PTL(req)->found = TRUE;
     req->status.MPI_SOURCE = NPTL_MATCH_GET_RANK(e->match_bits);
     req->status.MPI_TAG = NPTL_MATCH_GET_TAG(e->match_bits);
-    MPIR_STATUS_SET_COUNT(req->status, NPTL_HEADER_GET_LENGTH(e->match_bits));
+    MPIR_STATUS_SET_COUNT(req->status, NPTL_HEADER_GET_LENGTH(e->hdr_data));
 
+  fn_exit:
     MPIDI_CH3U_Request_complete(req);
-    
- fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_HANDLE_PROBE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
 }
 
+static int handle_mprobe(const ptl_event_t *e)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPID_Request *const req = e->user_ptr;
+    MPIU_CHKPMEM_DECL(1);
+    MPIDI_STATE_DECL(MPID_STATE_HANDLE_PROBE);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_HANDLE_PROBE);
+
+    if (e->ni_fail_type == PTL_NI_NO_MATCH) {
+        REQ_PTL(req)->found = FALSE;
+        goto fn_exit;
+    }
+
+    REQ_PTL(req)->found = TRUE;
+    req->status.MPI_SOURCE = NPTL_MATCH_GET_RANK(e->match_bits);
+    req->status.MPI_TAG = NPTL_MATCH_GET_TAG(e->match_bits);
+    MPIR_STATUS_SET_COUNT(req->status, NPTL_HEADER_GET_LENGTH(e->hdr_data));
+    MPIDI_Request_set_sync_send_flag(req, e->hdr_data & NPTL_SSEND);
+
+    MPIU_CHKPMEM_MALLOC(req->dev.tmpbuf, void *, e->mlength, mpi_errno, "tmpbuf");
+    MPIU_Memcpy((char *)req->dev.tmpbuf, e->start, e->mlength);
+    req->dev.recv_data_sz = e->mlength;
+
+    if (!(e->hdr_data & NPTL_LARGE)) {
+        MPIDI_Request_set_msg_type(req, MPIDI_REQUEST_EAGER_MSG);
+    }
+    else {
+        MPIU_Assert (e->mlength == PTL_LARGE_THRESHOLD);
+        req->dev.match.parts.tag = req->status.MPI_TAG;
+        req->dev.match.parts.context_id = NPTL_MATCH_GET_CTX(e->match_bits);
+        req->dev.match.parts.rank = req->status.MPI_SOURCE;
+        MPIDI_Request_set_msg_type(req, MPIDI_REQUEST_RNDV_MSG);
+    }
+
+    /* At this point we know the ME is unlinked. Invalidate the handle to
+       prevent further accesses, e.g. an attempted cancel. */
+    REQ_PTL(req)->me = PTL_INVALID_HANDLE;
+    req->dev.recv_pending_count = 1;
+
+  fn_exit:
+    MPIU_CHKPMEM_COMMIT();
+    MPIDI_CH3U_Request_complete(req);
+    MPIDI_FUNC_EXIT(MPID_STATE_HANDLE_PROBE);
+    return mpi_errno;
+ fn_fail:
+    MPIU_CHKPMEM_REAP();
+    goto fn_exit;
+}
+
 
 #undef FUNCNAME
 #define FUNCNAME MPID_nem_ptl_probe
@@ -84,8 +133,13 @@ int MPID_nem_ptl_iprobe(MPIDI_VC_t *vc, int source, int tag, MPID_Comm *comm, in
 
     if (source == MPI_ANY_SOURCE)
         me.match_id = id_any;
-    else
+    else {
+        if (!vc_ptl->id_initialized) {
+            mpi_errno = MPID_nem_ptl_init_id(vc);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        }
         me.match_id = vc_ptl->id;
+    }
 
     if (tag == MPI_ANY_TAG)
         me.ignore_bits = NPTL_MATCH_IGNORE_ANY_TAG;
@@ -95,8 +149,8 @@ int MPID_nem_ptl_iprobe(MPIDI_VC_t *vc, int source, int tag, MPID_Comm *comm, in
     /* submit a search request */
     ret = PtlMESearch(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_SEARCH_ONLY, req);
     MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmesearch", "**ptlmesearch %s", MPID_nem_ptl_strerror(ret));
-    DBG_MSG_MESearch("REG", vc->pg_rank, me, req);
-    
+    DBG_MSG_MESearch("REG", vc ? vc->pg_rank : MPI_ANY_SOURCE, me, req);
+
     /* wait for search request to complete */
     do {
         mpi_errno = MPID_nem_ptl_poll(FALSE);
@@ -123,11 +177,72 @@ int MPID_nem_ptl_improbe(MPIDI_VC_t *vc, int source, int tag, MPID_Comm *comm, i
                          MPID_Request **message, MPI_Status *status)
 {
     int mpi_errno = MPI_SUCCESS;
+    MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
+    int ret;
+    ptl_process_t id_any;
+    ptl_me_t me;
+    MPID_Request *req;
+
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_IMPROBE);
 
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_IMPROBE);
 
+    id_any.phys.nid = PTL_NID_ANY;
+    id_any.phys.pid = PTL_PID_ANY;
+
+    /* create a request */
+    req = MPID_Request_create();
+    MPID_nem_ptl_init_req(req);
+    MPIU_ERR_CHKANDJUMP1(!req, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Request_create");
+    MPIU_Object_set_ref(req, 2); /* 1 ref for progress engine and 1 ref for us */
+    REQ_PTL(req)->event_handler = handle_mprobe;
+    req->kind = MPID_REQUEST_MPROBE;
+
+    /* create a dummy ME to use for searching the list */
+    me.start = NULL;
+    me.length = 0;
+    me.ct_handle = PTL_CT_NONE;
+    me.uid = PTL_UID_ANY;
+    me.options = ( PTL_ME_OP_PUT | PTL_ME_USE_ONCE );
+    me.min_free = 0;
+    me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, source);
+
+    if (source == MPI_ANY_SOURCE)
+        me.match_id = id_any;
+    else {
+        if (!vc_ptl->id_initialized) {
+            mpi_errno = MPID_nem_ptl_init_id(vc);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        }
+        me.match_id = vc_ptl->id;
+    }
+
+    if (tag == MPI_ANY_TAG)
+        me.ignore_bits = NPTL_MATCH_IGNORE_ANY_TAG;
+    else
+        me.ignore_bits = NPTL_MATCH_IGNORE;
+    /* submit a search request */
+    ret = PtlMESearch(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_SEARCH_DELETE, req);
+    MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmesearch", "**ptlmesearch %s", MPID_nem_ptl_strerror(ret));
+    DBG_MSG_MESearch("REG", vc ? vc->pg_rank : 0, me, req);
 
+    /* wait for search request to complete */
+    do {
+        mpi_errno = MPID_nem_ptl_poll(FALSE);
+        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    } while (!MPID_Request_is_complete(req));
+
+    *flag = REQ_PTL(req)->found;
+    if (*flag) {
+        req->comm = comm;
+        MPIR_Comm_add_ref(comm);
+        MPIR_Request_extract_status(req, status);
+        *message = req;
+    }
+    else {
+        MPID_Request_release(req);
+        *message = MPI_MESSAGE_NULL;
+    }
 
  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_IMPROBE);
@@ -147,6 +262,7 @@ int MPID_nem_ptl_anysource_iprobe(int tag, MPID_Comm * comm, int context_offset,
 
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IPROBE);
 
+    return MPID_nem_ptl_iprobe(NULL, MPI_ANY_SOURCE, tag, comm, context_offset, flag, status);
 
  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IPROBE);
@@ -167,6 +283,7 @@ int MPID_nem_ptl_anysource_improbe(int tag, MPID_Comm * comm, int context_offset
 
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IMPROBE);
 
+    return MPID_nem_ptl_improbe(NULL, MPI_ANY_SOURCE, tag, comm, context_offset, flag, message, status);
 
  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IMPROBE);
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
index 5e323f6..4e97a8d 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
@@ -598,3 +598,117 @@ int MPID_nem_ptl_cancel_recv(MPIDI_VC_t *vc,  MPID_Request *rreq)
  fn_fail:
     goto fn_exit;
 }
+
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_ptl_lmt_start_recv
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPID_nem_ptl_lmt_start_recv(MPIDI_VC_t *vc,  MPID_Request *rreq, MPID_IOV s_cookie)
+{
+    /* This function should only be called as a result of an Mrecv because of the CH3 protocol for
+       Rendezvous Mrecvs. The regular CH3 protocol is not optimal for portals, since we don't need
+       to exchange CTS/RTS. We need this code here because at the time of the Mprobe we don't know
+       the target buffer, but we dequeue (and lose) the portals entry. This doesn't happen on
+       regular large transfers because we handle them directly on the netmod. */
+    int mpi_errno = MPI_SUCCESS;
+    int dt_contig;
+    MPIDI_msg_sz_t data_sz;
+    MPID_Datatype *dt_ptr;
+    MPI_Aint dt_true_lb;
+    ptl_match_bits_t match_bits;
+    int was_incomplete;
+    int ret;
+    MPID_nem_ptl_vc_area *vc_ptl = VC_PTL(vc);
+    MPIU_CHKPMEM_DECL(1);
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_LMT_START_RECV);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_LMT_START_RECV);
+
+    /* This Rendezvous protocol does not do RTS-CTS. Since we have all the data, we limit to get it */
+    /* The following code is inspired on handler_recv_dqueue_large */
+
+    match_bits = NPTL_MATCH(rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id,
+                            rreq->dev.match.parts.rank);
+    MPIDI_CH3U_Request_increment_cc(rreq, &was_incomplete);
+    MPIU_Assert(was_incomplete == 0);
+    MPIU_Object_add_ref(rreq);
+
+    MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, data_sz, dt_ptr,
+                            dt_true_lb);
+    if (dt_contig) {
+        void * real_user_buf = (char *)rreq->dev.user_buf + dt_true_lb;
+
+        REQ_PTL(rreq)->event_handler = handler_recv_complete;
+        ret = PtlGet(MPIDI_nem_ptl_global_md, (ptl_size_t)((char *)real_user_buf + PTL_LARGE_THRESHOLD),
+                     data_sz - PTL_LARGE_THRESHOLD, vc_ptl->id, vc_ptl->ptg, match_bits, 0, rreq);
+        DBG_MSG_GET("global", data_sz - PTL_LARGE_THRESHOLD, vc->pg_rank, match_bits);
+        MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "   buf=%p", (char *)real_user_buf + PTL_LARGE_THRESHOLD);
+        MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlget", "**ptlget %s",
+                             MPID_nem_ptl_strerror(ret));
+        /* The memcpy is done after the get purposely for overlapping */
+        MPIU_Memcpy(real_user_buf, rreq->dev.tmpbuf, PTL_LARGE_THRESHOLD);
+    }
+    else {
+        MPI_Aint last;
+
+        rreq->dev.segment_ptr = MPID_Segment_alloc();
+        MPIU_ERR_CHKANDJUMP1(rreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem",
+                             "**nomem %s", "MPID_Segment_alloc");
+        MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype,
+                          rreq->dev.segment_ptr, 0);
+        rreq->dev.segment_first = 0;
+        rreq->dev.segment_size = data_sz - PTL_LARGE_THRESHOLD;
+        last = PTL_LARGE_THRESHOLD;
+        MPID_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, rreq->dev.tmpbuf);
+        MPIU_Assert(last == PTL_LARGE_THRESHOLD);
+        rreq->dev.segment_first = PTL_LARGE_THRESHOLD;
+        last = data_sz - PTL_LARGE_THRESHOLD;
+        rreq->dev.iov_count = MPID_IOV_LIMIT;
+        MPID_Segment_pack_vector(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, rreq->dev.iov,
+                                 &rreq->dev.iov_count);
+        if (last == rreq->dev.segment_size) {
+            /* Rest of message fits in one IOV */
+            ptl_md_t md;
+
+            md.start = rreq->dev.iov;
+            md.length = rreq->dev.iov_count;
+            md.options = PTL_IOVEC;
+            md.eq_handle = MPIDI_nem_ptl_eq;
+            md.ct_handle = PTL_CT_NONE;
+            ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(rreq)->md);
+            MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s",
+                                 MPID_nem_ptl_strerror(ret));
+
+            REQ_PTL(rreq)->event_handler = handler_recv_complete;
+            ret = PtlGet(REQ_PTL(rreq)->md, 0, rreq->dev.segment_size, vc_ptl->id, vc_ptl->ptg,
+                         match_bits, PTL_LARGE_THRESHOLD, rreq);
+            MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlget", "**ptlget %s",
+                                 MPID_nem_ptl_strerror(ret));
+        }
+        else {
+            /* message won't fit in a single IOV, allocate buffer and unpack when received */
+            /* FIXME: For now, allocate a single large buffer to hold entire message */
+            MPIU_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, rreq->dev.segment_size,
+                                mpi_errno, "chunk_buffer");
+            REQ_PTL(rreq)->event_handler = handler_recv_unpack_complete;
+            ret = PtlGet(MPIDI_nem_ptl_global_md, (ptl_size_t)REQ_PTL(rreq)->chunk_buffer[0],
+                         rreq->dev.segment_size, vc_ptl->id, vc_ptl->ptg, match_bits,
+                         PTL_LARGE_THRESHOLD, rreq);
+            MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlget", "**ptlget %s",
+                                 MPID_nem_ptl_strerror(ret));
+        }
+    }
+    MPIU_Free(rreq->dev.tmpbuf);
+    rreq->ch.lmt_tmp_cookie.MPID_IOV_LEN = 0;  /* Required for do_cts in mpid_nem_lmt.c */
+
+ fn_exit:
+    MPIU_CHKPMEM_COMMIT();
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_LMT_START_RECV);
+    return mpi_errno;
+ fn_fail:
+    MPIU_CHKPMEM_REAP();
+    goto fn_exit;
+}

http://git.mpich.org/mpich.git/commitdiff/d1df5c9c792397dde7b4ddcea859c89aaa9e3955

commit d1df5c9c792397dde7b4ddcea859c89aaa9e3955
Author: Antonio Pena Monferrer <apenya at mcs.anl.gov>
Date:   Mon Aug 4 18:35:25 2014 -0500

    Added 4 more tests to mprobe
    
    The following tests are added:
      - MPI_Ssend
      - Large transfers (Rendezvous)
      - Small noncontiguous datatypes
      - Large noncontiguous datatypes
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/test/mpi/pt2pt/mprobe.c b/test/mpi/pt2pt/mprobe.c
index b45834e..07edde1 100644
--- a/test/mpi/pt2pt/mprobe.c
+++ b/test/mpi/pt2pt/mprobe.c
@@ -28,18 +28,22 @@
         }                                                                         \
     } while (0)
 
+#define LARGE_DIM 512
+#define LARGE_SZ (LARGE_DIM * LARGE_DIM)
+
 int main(int argc, char **argv)
 {
     int errs = 0;
     int found, completed;
     int rank, size;
-    int sendbuf[8], recvbuf[8];
-    int count;
+    int sendbuf[LARGE_SZ], recvbuf[LARGE_SZ];
+    int count, i;
 #ifdef TEST_MPROBE_ROUTINES
     MPI_Message msg;
 #endif
     MPI_Request rreq;
     MPI_Status s1, s2;
+    MPI_Datatype vectype;
 
     MPI_Init(&argc, &argv);
 
@@ -355,6 +359,162 @@ int main(int argc, char **argv)
         check(count == 0);
     }
 
+    /* test 8: simple ssend & mprobe+mrecv */
+    if (rank == 0) {
+        sendbuf[0] = 0xdeadbeef;
+        sendbuf[1] = 0xfeedface;
+        MPI_Ssend(sendbuf, 2, MPI_INT, 1, 5, MPI_COMM_WORLD);
+    }
+    else {
+        memset(&s1, 0xab, sizeof(MPI_Status));
+        memset(&s2, 0xab, sizeof(MPI_Status));
+        /* the error field should remain unmodified */
+        s1.MPI_ERROR = MPI_ERR_DIMS;
+        s2.MPI_ERROR = MPI_ERR_TOPOLOGY;
+
+        msg = MPI_MESSAGE_NULL;
+        MPI_Mprobe(0, 5, MPI_COMM_WORLD, &msg, &s1);
+        check(s1.MPI_SOURCE == 0);
+        check(s1.MPI_TAG == 5);
+        check(s1.MPI_ERROR == MPI_ERR_DIMS);
+        check(msg != MPI_MESSAGE_NULL);
+
+        count = -1;
+        MPI_Get_count(&s1, MPI_INT, &count);
+        check(count == 2);
+
+        recvbuf[0] = 0x01234567;
+        recvbuf[1] = 0x89abcdef;
+        MPI_Mrecv(recvbuf, count, MPI_INT, &msg, &s2);
+        check(recvbuf[0] == 0xdeadbeef);
+        check(recvbuf[1] == 0xfeedface);
+        check(s2.MPI_SOURCE == 0);
+        check(s2.MPI_TAG == 5);
+        check(s2.MPI_ERROR == MPI_ERR_TOPOLOGY);
+        check(msg == MPI_MESSAGE_NULL);
+    }
+
+    /* test 9: mprobe+mrecv LARGE */
+    if (rank == 0) {
+        for (i = 0; i < LARGE_SZ; i++)
+            sendbuf[i] = i;
+        MPI_Send(sendbuf, LARGE_SZ, MPI_INT, 1, 5, MPI_COMM_WORLD);
+    }
+    else {
+        memset(&s1, 0xab, sizeof(MPI_Status));
+        memset(&s2, 0xab, sizeof(MPI_Status));
+        /* the error field should remain unmodified */
+        s1.MPI_ERROR = MPI_ERR_DIMS;
+        s2.MPI_ERROR = MPI_ERR_TOPOLOGY;
+
+        msg = MPI_MESSAGE_NULL;
+        MPI_Mprobe(0, 5, MPI_COMM_WORLD, &msg, &s1);
+        check(s1.MPI_SOURCE == 0);
+        check(s1.MPI_TAG == 5);
+        check(s1.MPI_ERROR == MPI_ERR_DIMS);
+        check(msg != MPI_MESSAGE_NULL);
+
+        count = -1;
+        MPI_Get_count(&s1, MPI_INT, &count);
+        check(count == LARGE_SZ);
+
+        memset(recvbuf, 0xFF, LARGE_SZ * sizeof(int));
+        MPI_Mrecv(recvbuf, count, MPI_INT, &msg, &s2);
+        for (i = 0; i < LARGE_SZ; i++)
+            check(recvbuf[i] == i);
+        check(s2.MPI_SOURCE == 0);
+        check(s2.MPI_TAG == 5);
+        check(s2.MPI_ERROR == MPI_ERR_TOPOLOGY);
+        check(msg == MPI_MESSAGE_NULL);
+    }
+
+    /* test 10: mprobe+mrecv noncontiguous datatype */
+    MPI_Type_vector(2, 1, 4, MPI_INT, &vectype);
+    MPI_Type_commit(&vectype);
+    if (rank == 0) {
+        memset(sendbuf, 0, 8 * sizeof(int));
+        sendbuf[0] = 0xdeadbeef;
+        sendbuf[4] = 0xfeedface;
+        MPI_Send(sendbuf, 1, vectype, 1, 5, MPI_COMM_WORLD);
+    }
+    else {
+        memset(&s1, 0xab, sizeof(MPI_Status));
+        memset(&s2, 0xab, sizeof(MPI_Status));
+        /* the error field should remain unmodified */
+        s1.MPI_ERROR = MPI_ERR_DIMS;
+        s2.MPI_ERROR = MPI_ERR_TOPOLOGY;
+
+        msg = MPI_MESSAGE_NULL;
+        MPI_Mprobe(0, 5, MPI_COMM_WORLD, &msg, &s1);
+        check(s1.MPI_SOURCE == 0);
+        check(s1.MPI_TAG == 5);
+        check(s1.MPI_ERROR == MPI_ERR_DIMS);
+        check(msg != MPI_MESSAGE_NULL);
+
+        count = -1;
+        MPI_Get_count(&s1, vectype, &count);
+        check(count == 1);
+
+        memset(recvbuf, 0, 8 * sizeof(int));
+        MPI_Mrecv(recvbuf, 1, vectype, &msg, &s2);
+        check(recvbuf[0] == 0xdeadbeef);
+        for (i = 1; i < 4; i++)
+            check(recvbuf[i] == 0);
+        check(recvbuf[4] = 0xfeedface);
+        for (i = 5; i < 8; i++)
+            check(recvbuf[i] == 0);
+        check(s2.MPI_SOURCE == 0);
+        check(s2.MPI_TAG == 5);
+        check(s2.MPI_ERROR == MPI_ERR_TOPOLOGY);
+        check(msg == MPI_MESSAGE_NULL);
+    }
+    MPI_Type_free(&vectype);
+
+    /* test 11: mprobe+mrecv noncontiguous datatype LARGE */
+    MPI_Type_vector(LARGE_DIM, LARGE_DIM - 1, LARGE_DIM, MPI_INT, &vectype);
+    MPI_Type_commit(&vectype);
+    if (rank == 0) {
+        for (i = 0; i < LARGE_SZ; i++)
+            sendbuf[i] = i;
+        MPI_Send(sendbuf, 1, vectype, 1, 5, MPI_COMM_WORLD);
+    }
+    else {
+        int idx = 0;
+
+        memset(&s1, 0xab, sizeof(MPI_Status));
+        memset(&s2, 0xab, sizeof(MPI_Status));
+        /* the error field should remain unmodified */
+        s1.MPI_ERROR = MPI_ERR_DIMS;
+        s2.MPI_ERROR = MPI_ERR_TOPOLOGY;
+
+        msg = MPI_MESSAGE_NULL;
+        MPI_Mprobe(0, 5, MPI_COMM_WORLD, &msg, &s1);
+        check(s1.MPI_SOURCE == 0);
+        check(s1.MPI_TAG == 5);
+        check(s1.MPI_ERROR == MPI_ERR_DIMS);
+        check(msg != MPI_MESSAGE_NULL);
+
+        count = -1;
+        MPI_Get_count(&s1, vectype, &count);
+        check(count == 1);
+
+        memset(recvbuf, 0, LARGE_SZ * sizeof(int));
+        MPI_Mrecv(recvbuf, 1, vectype, &msg, &s2);
+        for (i = 0; i < LARGE_DIM; i++) {
+            int j;
+            for (j = 0; j < LARGE_DIM - 1; j++) {
+                check(recvbuf[idx] == idx);
+                ++idx;
+            }
+            check(recvbuf[idx++] == 0);
+        }
+        check(s2.MPI_SOURCE == 0);
+        check(s2.MPI_TAG == 5);
+        check(s2.MPI_ERROR == MPI_ERR_TOPOLOGY);
+        check(msg == MPI_MESSAGE_NULL);
+    }
+    MPI_Type_free(&vectype);
+
     /* TODO MPI_ANY_SOURCE and MPI_ANY_TAG should be tested as well */
     /* TODO a full range of message sizes should be tested too */
     /* TODO threaded tests are also needed, but they should go in a separate

http://git.mpich.org/mpich.git/commitdiff/5c328ccb3d1752224a30740e5e1b5c3ed4e129f8

commit 5c328ccb3d1752224a30740e5e1b5c3ed4e129f8
Author: Antonio Pena Monferrer <apenya at mcs.anl.gov>
Date:   Mon Aug 4 09:42:57 2014 -0500

    portals4: Avoid setting error on cancelling a request
    
    Sometimes the ch3 layer asks to cancel a request which is not in the netmod
    queues any more. If portals reports anything but PTL_OK or PTL_IN_USE, just
    return and don't set an error. We can consider the request no longer active.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
index 2db6691..5e323f6 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
@@ -533,8 +533,10 @@ static int cancel_recv(MPID_Request *rreq, int *cancelled)
         ptl_err = PtlMEUnlink(REQ_PTL(rreq)->me);
         if (ptl_err == PTL_OK)
             *cancelled = TRUE;
-        else if (ptl_err != PTL_IN_USE)
-            mpi_errno = MPI_ERR_INTERN;
+        /* FIXME: if we properly invalidate matching list entry handles, we should be
+           able to ensure an unlink operation results in either PTL_OK or PTL_IN_USE.
+           Anything else would be an error. For now, though, we assume anything but PTL_OK
+           is uncancelable and return. */
     }
 
  fn_exit:
@@ -560,12 +562,12 @@ int MPID_nem_ptl_anysource_matched(MPID_Request *rreq)
     mpi_errno = cancel_recv(rreq, &cancelled);
     /* FIXME: This function is does not return an error because the queue
        functions (where the posted_recv hooks are called) return no error
-       code. */
+       code. See also comment on cancel_recv. */
     MPIU_Assertp(mpi_errno == MPI_SUCCESS);
 
  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_MATCHED);
-    return !cancelled;
+    return MPI_SUCCESS;
  fn_fail:
     goto fn_exit;
 }

http://git.mpich.org/mpich.git/commitdiff/dfce63a02289fbaebd776c76d2c90a7a21e96450

commit dfce63a02289fbaebd776c76d2c90a7a21e96450
Author: Antonio Pena Monferrer <apenya at mcs.anl.gov>
Date:   Mon Aug 4 09:29:18 2014 -0500

    Fixed accessing to NULL struct in portals receive
    
    This happened in anysource with debug logging enabled.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
index 57e5dc3..2db6691 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
@@ -477,7 +477,7 @@ int MPID_nem_ptl_recv_posted(MPIDI_VC_t *vc, MPID_Request *rreq)
 
     ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_PRIORITY_LIST, rreq, &REQ_PTL(rreq)->me);
     MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
-    DBG_MSG_MEAPPEND("REG", vc->pg_rank, me, rreq);
+    DBG_MSG_MEAPPEND("REG", vc ? vc->pg_rank : MPI_ANY_SOURCE, me, rreq);
     MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "    buf=%p", me.start);
     MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "MPIDI_nem_ptl_pt = %d", MPIDI_nem_ptl_pt);
 

http://git.mpich.org/mpich.git/commitdiff/88f05f80a9e5b4c41a5a670484503fc413d7868f

commit 88f05f80a9e5b4c41a5a670484503fc413d7868f
Author: Antonio Pena Monferrer <apenya at mcs.anl.gov>
Date:   Mon Aug 4 09:21:39 2014 -0500

    Fixed Portals matching masking
    
    The out-of-interest bits must be zeroed-out to avoid them colliding with their neighbor bits.
    This is relevant In cases of special values, i.e., negative values such as MPI_*_ANY.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h
index 43c1c1d..9a2d3cf 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h
@@ -129,9 +129,9 @@ typedef struct {
 #define NPTL_MATCH_RANK_MASK (((ptl_match_bits_t)(1) << 16) - 1)
 #define NPTL_MATCH_CTX_MASK ((((ptl_match_bits_t)(1) << 16) - 1) << NPTL_MATCH_CTX_OFFSET)
 #define NPTL_MATCH_TAG_MASK ((((ptl_match_bits_t)(1) << 32) - 1) << NPTL_MATCH_TAG_OFFSET)
-#define NPTL_MATCH(tag_, ctx_, rank_) (((ptl_match_bits_t)(tag_) << NPTL_MATCH_TAG_OFFSET) |     \
-                                       ((ptl_match_bits_t)(ctx_) << NPTL_MATCH_CTX_OFFSET) |     \
-                                       ((ptl_match_bits_t)(rank_)))
+#define NPTL_MATCH(tag_, ctx_, rank_) ((((ptl_match_bits_t)(tag_) << NPTL_MATCH_TAG_OFFSET) & NPTL_MATCH_TAG_MASK) | \
+                                       (((ptl_match_bits_t)(ctx_) << NPTL_MATCH_CTX_OFFSET) & NPTL_MATCH_CTX_MASK) | \
+                                       ((ptl_match_bits_t)(rank_) & NPTL_MATCH_RANK_MASK))
 #define NPTL_MATCH_IGNORE NPTL_MATCH_RANK_MASK
 #define NPTL_MATCH_IGNORE_ANY_TAG (NPTL_MATCH_IGNORE | NPTL_MATCH_TAG_MASK)
 

-----------------------------------------------------------------------

Summary of changes:
 .../channels/nemesis/netmod/portals4/Makefile.mk   |    3 +-
 .../channels/nemesis/netmod/portals4/ptl_impl.h    |   13 ++-
 .../channels/nemesis/netmod/portals4/ptl_init.c    |   11 ++-
 .../ch3/channels/nemesis/netmod/portals4/ptl_lmt.c |   70 +++++++++
 .../channels/nemesis/netmod/portals4/ptl_probe.c   |  129 +++++++++++++++-
 .../channels/nemesis/netmod/portals4/ptl_recv.c    |  126 ++++++++++++++-
 test/mpi/pt2pt/mprobe.c                            |  164 +++++++++++++++++++-
 7 files changed, 498 insertions(+), 18 deletions(-)
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_lmt.c


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list