[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.3-155-g848a0f3

Service Account noreply at mpich.org
Fri Nov 7 09:21:56 CST 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  848a0f312ef9418aa2bca4d63464b71441c59beb (commit)
       via  0f9a4417e46cae4c8aec4f741b92cf3de2f54b9c (commit)
       via  25f3c2de82a25adde125c1f63ae067887996f593 (commit)
      from  35de3d16d75207b1a7270a469c954e1226f25a8c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/848a0f312ef9418aa2bca4d63464b71441c59beb

commit 848a0f312ef9418aa2bca4d63464b71441c59beb
Author: Wesley Bland <wbland at anl.gov>
Date:   Thu Nov 6 14:46:36 2014 -0600

    Enable revoke_nofail in test
    
    Signed-off-by: Huiwei Lu <huiweilu at mcs.anl.gov>

diff --git a/test/mpi/ft/testlist b/test/mpi/ft/testlist
index d2706ce..503dfaf 100644
--- a/test/mpi/ft/testlist
+++ b/test/mpi/ft/testlist
@@ -12,7 +12,7 @@ gather 4 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=T
 reduce 4 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10
 bcast 4 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
 scatter 4 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
-anysource 3 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
-revoke_nofail 2 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
+anysource 3 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10
+revoke_nofail 2 env=MPIR_CVAR_ENABLE_FT=1 strict=false timeLimit=10
 shrink 8 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
 agree 4 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945

http://git.mpich.org/mpich.git/commitdiff/0f9a4417e46cae4c8aec4f741b92cf3de2f54b9c

commit 0f9a4417e46cae4c8aec4f741b92cf3de2f54b9c
Author: Wesley Bland <wbland at anl.gov>
Date:   Thu Nov 6 13:17:42 2014 -0600

    Correctly match requests when revoking
    
    Some of the code to do the matching for requests in the posted queue was
    missing. This caused local collectives to hang if the communicator had
    been revoked.
    
    See #1945
    
    Signed-off-by: Huiwei Lu <huiweilu at mcs.anl.gov>

diff --git a/src/mpid/ch3/src/ch3u_recvq.c b/src/mpid/ch3/src/ch3u_recvq.c
index 9290470..bf59978 100644
--- a/src/mpid/ch3/src/ch3u_recvq.c
+++ b/src/mpid/ch3/src/ch3u_recvq.c
@@ -1082,6 +1082,7 @@ int MPIDI_CH3U_Clean_recvq(MPID_Comm *comm_ptr)
             }
 
             offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ?  MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
+            match.parts.context_id = comm_ptr->recvcontext_id + MPID_CONTEXT_INTRANODE_OFFSET + offset;
 
             if (MATCH_WITH_LEFT_RIGHT_MASK(rreq->dev.match, match, mask)) {
                 if (MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_AGREE_TAG &&
@@ -1109,6 +1110,7 @@ int MPIDI_CH3U_Clean_recvq(MPID_Comm *comm_ptr)
             }
 
             offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ?  MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
+            match.parts.context_id = comm_ptr->recvcontext_id + MPID_CONTEXT_INTERNODE_OFFSET + offset;
 
             if (MATCH_WITH_LEFT_RIGHT_MASK(rreq->dev.match, match, mask)) {
                 if (MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_AGREE_TAG &&

http://git.mpich.org/mpich.git/commitdiff/25f3c2de82a25adde125c1f63ae067887996f593

commit 25f3c2de82a25adde125c1f63ae067887996f593
Author: Wesley Bland <wbland at anl.gov>
Date:   Thu Nov 6 12:49:24 2014 -0600

    Clean up revoke function
    
    Set the counter for processes to be revoked before calling sending out
    the revoke notifications.
    
    Clean up some unused code.
    
    Signed-off-by: Huiwei Lu <huiweilu at mcs.anl.gov>

diff --git a/src/mpid/ch3/src/mpid_comm_revoke.c b/src/mpid/ch3/src/mpid_comm_revoke.c
index ab26cf2..1fae5da 100644
--- a/src/mpid/ch3/src/mpid_comm_revoke.c
+++ b/src/mpid/ch3/src/mpid_comm_revoke.c
@@ -24,7 +24,7 @@ int MPID_Comm_revoke(MPID_Comm *comm_ptr, int is_remote)
     MPIDI_VC_t *vc;
     MPID_IOV iov[MPID_IOV_LIMIT];
     int mpi_errno = MPI_SUCCESS;
-    int i, size, my_rank, failed=0;
+    int i, size, my_rank;
     MPID_Request *request;
     MPIDI_CH3_Pkt_t upkt;
     MPIDI_CH3_Pkt_revoke_t *revoke_pkt = &upkt.revoke;
@@ -38,6 +38,11 @@ int MPID_Comm_revoke(MPID_Comm *comm_ptr, int is_remote)
         if (comm_ptr->node_comm) comm_ptr->node_comm->revoked = 1;
         if (comm_ptr->node_roots_comm) comm_ptr->node_roots_comm->revoked = 1;
 
+        /* Start a counter to track how many revoke messages we've received from
+         * other ranks */
+        comm_ptr->dev.waiting_for_revoke = comm_ptr->local_size - 1 - is_remote; /* Subtract the processes who already know about the revoke */
+        MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE, (MPIU_DBG_FDEST, "Comm %08x waiting_for_revoke: %d", comm_ptr->handle, comm_ptr->dev.waiting_for_revoke));
+
         /* Keep a reference to this comm so it doesn't get destroyed while
          * it's being revoked */
         MPIR_Comm_add_ref(comm_ptr);
@@ -60,7 +65,7 @@ int MPID_Comm_revoke(MPID_Comm *comm_ptr, int is_remote)
             MPIU_THREAD_CS_ENTER(CH3COMM, vc);
             mpi_errno = MPIDI_CH3_iStartMsgv(vc, iov, 1, &request);
             MPIU_THREAD_CS_EXIT(CH3COMM, vc);
-            if (mpi_errno) failed++;
+            if (mpi_errno) comm_ptr->dev.waiting_for_revoke--;
             if (NULL != request)
                 /* We don't need to keep a reference to this request. The
                  * progress engine will keep a reference until it completes
@@ -68,11 +73,6 @@ int MPID_Comm_revoke(MPID_Comm *comm_ptr, int is_remote)
                 MPID_Request_release(request);
         }
 
-        /* Start a counter to track how many revoke messages we've received from
-         * other ranks */
-        comm_ptr->dev.waiting_for_revoke = comm_ptr->local_size - 1 - is_remote - failed; /* Subtract the processes who already know about the revoke */
-        MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE, (MPIU_DBG_FDEST, "Comm %08x waiting_for_revoke: %d", comm_ptr->handle, comm_ptr->dev.waiting_for_revoke));
-
         /* Check to see if we are done revoking */
         if (comm_ptr->dev.waiting_for_revoke == 0) {
             MPIR_Comm_release(comm_ptr, 0);
@@ -98,13 +98,6 @@ int MPID_Comm_revoke(MPID_Comm *comm_ptr, int is_remote)
         }
     }
 
-fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_COMM_REVOKE);
     return MPI_SUCCESS;
-fn_fail:
-    if (request) {
-        MPIU_Object_set_ref(request, 0);
-        MPIDI_CH3_Request_destroy(request);
-    }
-    goto fn_exit;
 }

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/ch3/src/ch3u_recvq.c       |    2 ++
 src/mpid/ch3/src/mpid_comm_revoke.c |   21 +++++++--------------
 test/mpi/ft/testlist                |    4 ++--
 3 files changed, 11 insertions(+), 16 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list