[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.2-56-g0631718

Service Account noreply at mpich.org
Thu Jul 31 09:45:20 CDT 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  0631718f8dc25304c62f0ac755e46f38256eb880 (commit)
       via  b52300e549e56e148b8c1abefd2bd2f7122958ed (commit)
       via  9db2bb04dab3864143848c66b94ef358fcc27eee (commit)
      from  29d4c54f7a0adba04464f3cfc2ba917a3dc3f8b5 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/0631718f8dc25304c62f0ac755e46f38256eb880

commit 0631718f8dc25304c62f0ac755e46f38256eb880
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Wed Jul 30 20:38:36 2014 -0500

    portals4: implement cancel_recv and anysource_matched
    
    These functions share similar functionality. The end goal is being
    able to remove a posted matching list entry from portals4. E.g. if
    a recv request is cancelled, or if an anysource recv was matched in
    shared memory and the netmod portion should be removed.
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
index 712e006..57e5dc3 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
@@ -23,6 +23,10 @@ static void dequeue_req(const ptl_event_t *e)
     rreq->status.MPI_SOURCE = NPTL_MATCH_GET_RANK(e->match_bits);
     rreq->status.MPI_TAG = NPTL_MATCH_GET_TAG(e->match_bits);
 
+    /* At this point we know the ME is unlinked. Invalidate the handle to
+       prevent further accesses, e.g. an attempted cancel. */
+    REQ_PTL(rreq)->me = PTL_INVALID_HANDLE;
+
     MPID_Datatype_get_size_macro(rreq->dev.datatype, r_len);
     r_len *= rreq->dev.user_count;
 
@@ -515,11 +519,23 @@ void MPID_nem_ptl_anysource_posted(MPID_Request *rreq)
 static int cancel_recv(MPID_Request *rreq, int *cancelled)
 {
     int mpi_errno = MPI_SUCCESS;
+    int ptl_err   = PTL_OK;
     MPIDI_STATE_DECL(MPID_STATE_CANCEL_RECV);
 
     MPIDI_FUNC_ENTER(MPID_STATE_CANCEL_RECV);
 
-    MPIU_Assert(0 && "FIXME: Need to implement cancel_recv");
+    *cancelled = FALSE;
+
+    /* An invalid handle indicates the operation has been completed
+       and the matching list entry unlinked. At that point, the operation
+       cannot be cancelled. */
+    if (REQ_PTL(rreq)->me != PTL_INVALID_HANDLE) {
+        ptl_err = PtlMEUnlink(REQ_PTL(rreq)->me);
+        if (ptl_err == PTL_OK)
+            *cancelled = TRUE;
+        else if (ptl_err != PTL_IN_USE)
+            mpi_errno = MPI_ERR_INTERN;
+    }
 
  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_CANCEL_RECV);
@@ -535,8 +551,8 @@ static int cancel_recv(MPID_Request *rreq, int *cancelled)
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPID_nem_ptl_anysource_matched(MPID_Request *rreq)
 {
-    int mpi_errno = MPI_SUCCESS;
-    int cancelled = 0;
+    int mpi_errno, cancelled;
+
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_MATCHED);
 
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_MATCHED);
@@ -547,9 +563,11 @@ int MPID_nem_ptl_anysource_matched(MPID_Request *rreq)
        code. */
     MPIU_Assertp(mpi_errno == MPI_SUCCESS);
 
-    return !cancelled;
-
+ fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_MATCHED);
+    return !cancelled;
+ fn_fail:
+    goto fn_exit;
 }
 
 
@@ -560,16 +578,21 @@ int MPID_nem_ptl_anysource_matched(MPID_Request *rreq)
 #define FCNAME MPIU_QUOTE(FUNCNAME)
 int MPID_nem_ptl_cancel_recv(MPIDI_VC_t *vc,  MPID_Request *rreq)
 {
-    int mpi_errno = MPI_SUCCESS;
+    int mpi_errno, cancelled;
+
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_CANCEL_RECV);
 
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_CANCEL_RECV);
 
-    MPIU_Assert(0 && "implement me");
+    mpi_errno = cancel_recv(rreq, &cancelled);
+    /* FIXME: This function is does not return an error because the queue
+       functions (where the posted_recv hooks are called) return no error
+       code. */
+    MPIU_Assertp(mpi_errno == MPI_SUCCESS);
 
  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_CANCEL_RECV);
-    return mpi_errno;
+    return !cancelled;
  fn_fail:
     goto fn_exit;
 }

http://git.mpich.org/mpich.git/commitdiff/b52300e549e56e148b8c1abefd2bd2f7122958ed

commit b52300e549e56e148b8c1abefd2bd2f7122958ed
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Fri Jul 25 16:44:15 2014 -0500

    portals4: set destination in send request object
    
    Portals4 encodes the destination rank into its own matching information,
    but the upper layers do not have access to it. In the case of a cancel
    operation, it is still necessary to have this information available.
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_send.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_send.c
index a642fbd..88ff54e 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_send.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_send.c
@@ -192,6 +192,7 @@ static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *
     MPIDI_FUNC_ENTER(MPID_STATE_SEND_MSG);
 
     MPID_nem_ptl_request_create_sreq(sreq, mpi_errno, comm);
+    sreq->dev.match.parts.rank = dest;
 
     if (!vc_ptl->id_initialized) {
         mpi_errno = MPID_nem_ptl_init_id(vc);

http://git.mpich.org/mpich.git/commitdiff/9db2bb04dab3864143848c66b94ef358fcc27eee

commit 9db2bb04dab3864143848c66b94ef358fcc27eee
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Wed Jul 30 19:31:09 2014 -0500

    ch3: Call netmod cancel functions
    
    Previously, the netmod cancel_recv and cancel_send functions were
    not called. Calling them ensures that the request cancelled status
    is accurate and that any necessary cleanup has been done.
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/src/mpid/ch3/src/mpid_cancel_recv.c b/src/mpid/ch3/src/mpid_cancel_recv.c
index 24a04f6..d1d3902 100644
--- a/src/mpid/ch3/src/mpid_cancel_recv.c
+++ b/src/mpid/ch3/src/mpid_cancel_recv.c
@@ -12,13 +12,28 @@
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 int MPID_Cancel_recv(MPID_Request * rreq)
 {
+    int netmod_cancelled = TRUE;
+
     MPIDI_STATE_DECL(MPID_STATE_MPID_CANCEL_RECV);
     
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_CANCEL_RECV);
     
     MPIU_Assert(rreq->kind == MPID_REQUEST_RECV);
     
-    if (MPIDI_CH3U_Recvq_DP(rreq))
+    /* If the netmod has its own cancel_recv function, we need to call
+       it here. ANYSOURCE cancels (netmod and otherwise) are handled by
+       MPIDI_CH3U_Recvq_DP below. */
+#ifdef ENABLE_COMM_OVERRIDES
+    if (rreq->dev.match.parts.rank != MPI_ANY_SOURCE)
+    {
+        MPIDI_VC_t *vc;
+        MPIDI_Comm_get_vc_set_active(rreq->comm, rreq->dev.match.parts.rank, &vc);
+        if (vc->comm_ops && vc->comm_ops->cancel_recv)
+            netmod_cancelled = !vc->comm_ops->cancel_recv(NULL, rreq);
+    }
+#endif
+
+    if (netmod_cancelled && MPIDI_CH3U_Recvq_DP(rreq))
     {
 	MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,
 		       "request 0x%08x cancelled", rreq->handle);
diff --git a/src/mpid/ch3/src/mpid_cancel_send.c b/src/mpid/ch3/src/mpid_cancel_send.c
index 4772c8c..302d793 100644
--- a/src/mpid/ch3/src/mpid_cancel_send.c
+++ b/src/mpid/ch3/src/mpid_cancel_send.c
@@ -90,6 +90,16 @@ int MPID_Cancel_send(MPID_Request * sreq)
 	goto fn_exit;
     }
 
+    /* If the message went over a netmod and it provides a cancel_send
+       function, call it here. */
+#ifdef ENABLE_COMM_OVERRIDES
+    if (vc->comm_ops && vc->comm_ops->cancel_send)
+    {
+        mpi_errno = vc->comm_ops->cancel_send(vc, sreq);
+        goto fn_exit;
+    }
+#endif
+
     /* Check to see if the send is still in the send queue.  If so, remove it, 
        mark the request and cancelled and complete, and
        release the device's reference to the request object.  

-----------------------------------------------------------------------

Summary of changes:
 .../channels/nemesis/netmod/portals4/ptl_recv.c    |   39 ++++++++++++++++----
 .../channels/nemesis/netmod/portals4/ptl_send.c    |    1 +
 src/mpid/ch3/src/mpid_cancel_recv.c                |   17 ++++++++-
 src/mpid/ch3/src/mpid_cancel_send.c                |   10 +++++
 4 files changed, 58 insertions(+), 9 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list