[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.3-155-g848a0f3
Service Account
noreply at mpich.org
Fri Nov 7 09:21:56 CST 2014
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".
The branch, master has been updated
via 848a0f312ef9418aa2bca4d63464b71441c59beb (commit)
via 0f9a4417e46cae4c8aec4f741b92cf3de2f54b9c (commit)
via 25f3c2de82a25adde125c1f63ae067887996f593 (commit)
from 35de3d16d75207b1a7270a469c954e1226f25a8c (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/848a0f312ef9418aa2bca4d63464b71441c59beb
commit 848a0f312ef9418aa2bca4d63464b71441c59beb
Author: Wesley Bland <wbland at anl.gov>
Date: Thu Nov 6 14:46:36 2014 -0600
Enable revoke_nofail in test
Signed-off-by: Huiwei Lu <huiweilu at mcs.anl.gov>
diff --git a/test/mpi/ft/testlist b/test/mpi/ft/testlist
index d2706ce..503dfaf 100644
--- a/test/mpi/ft/testlist
+++ b/test/mpi/ft/testlist
@@ -12,7 +12,7 @@ gather 4 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=T
reduce 4 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10
bcast 4 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
scatter 4 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
-anysource 3 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
-revoke_nofail 2 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
+anysource 3 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10
+revoke_nofail 2 env=MPIR_CVAR_ENABLE_FT=1 strict=false timeLimit=10
shrink 8 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
agree 4 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
http://git.mpich.org/mpich.git/commitdiff/0f9a4417e46cae4c8aec4f741b92cf3de2f54b9c
commit 0f9a4417e46cae4c8aec4f741b92cf3de2f54b9c
Author: Wesley Bland <wbland at anl.gov>
Date: Thu Nov 6 13:17:42 2014 -0600
Correctly match requests when revoking
Some of the code to do the matching for requests in the posted queue was
missing. This caused local collectives to hang if the communicator had
been revoked.
See #1945
Signed-off-by: Huiwei Lu <huiweilu at mcs.anl.gov>
diff --git a/src/mpid/ch3/src/ch3u_recvq.c b/src/mpid/ch3/src/ch3u_recvq.c
index 9290470..bf59978 100644
--- a/src/mpid/ch3/src/ch3u_recvq.c
+++ b/src/mpid/ch3/src/ch3u_recvq.c
@@ -1082,6 +1082,7 @@ int MPIDI_CH3U_Clean_recvq(MPID_Comm *comm_ptr)
}
offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ? MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
+ match.parts.context_id = comm_ptr->recvcontext_id + MPID_CONTEXT_INTRANODE_OFFSET + offset;
if (MATCH_WITH_LEFT_RIGHT_MASK(rreq->dev.match, match, mask)) {
if (MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_AGREE_TAG &&
@@ -1109,6 +1110,7 @@ int MPIDI_CH3U_Clean_recvq(MPID_Comm *comm_ptr)
}
offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ? MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
+ match.parts.context_id = comm_ptr->recvcontext_id + MPID_CONTEXT_INTERNODE_OFFSET + offset;
if (MATCH_WITH_LEFT_RIGHT_MASK(rreq->dev.match, match, mask)) {
if (MPIR_TAG_MASK_ERROR_BIT(rreq->dev.match.parts.tag) != MPIR_AGREE_TAG &&
http://git.mpich.org/mpich.git/commitdiff/25f3c2de82a25adde125c1f63ae067887996f593
commit 25f3c2de82a25adde125c1f63ae067887996f593
Author: Wesley Bland <wbland at anl.gov>
Date: Thu Nov 6 12:49:24 2014 -0600
Clean up revoke function
Set the counter for processes to be revoked before calling sending out
the revoke notifications.
Clean up some unused code.
Signed-off-by: Huiwei Lu <huiweilu at mcs.anl.gov>
diff --git a/src/mpid/ch3/src/mpid_comm_revoke.c b/src/mpid/ch3/src/mpid_comm_revoke.c
index ab26cf2..1fae5da 100644
--- a/src/mpid/ch3/src/mpid_comm_revoke.c
+++ b/src/mpid/ch3/src/mpid_comm_revoke.c
@@ -24,7 +24,7 @@ int MPID_Comm_revoke(MPID_Comm *comm_ptr, int is_remote)
MPIDI_VC_t *vc;
MPID_IOV iov[MPID_IOV_LIMIT];
int mpi_errno = MPI_SUCCESS;
- int i, size, my_rank, failed=0;
+ int i, size, my_rank;
MPID_Request *request;
MPIDI_CH3_Pkt_t upkt;
MPIDI_CH3_Pkt_revoke_t *revoke_pkt = &upkt.revoke;
@@ -38,6 +38,11 @@ int MPID_Comm_revoke(MPID_Comm *comm_ptr, int is_remote)
if (comm_ptr->node_comm) comm_ptr->node_comm->revoked = 1;
if (comm_ptr->node_roots_comm) comm_ptr->node_roots_comm->revoked = 1;
+ /* Start a counter to track how many revoke messages we've received from
+ * other ranks */
+ comm_ptr->dev.waiting_for_revoke = comm_ptr->local_size - 1 - is_remote; /* Subtract the processes who already know about the revoke */
+ MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE, (MPIU_DBG_FDEST, "Comm %08x waiting_for_revoke: %d", comm_ptr->handle, comm_ptr->dev.waiting_for_revoke));
+
/* Keep a reference to this comm so it doesn't get destroyed while
* it's being revoked */
MPIR_Comm_add_ref(comm_ptr);
@@ -60,7 +65,7 @@ int MPID_Comm_revoke(MPID_Comm *comm_ptr, int is_remote)
MPIU_THREAD_CS_ENTER(CH3COMM, vc);
mpi_errno = MPIDI_CH3_iStartMsgv(vc, iov, 1, &request);
MPIU_THREAD_CS_EXIT(CH3COMM, vc);
- if (mpi_errno) failed++;
+ if (mpi_errno) comm_ptr->dev.waiting_for_revoke--;
if (NULL != request)
/* We don't need to keep a reference to this request. The
* progress engine will keep a reference until it completes
@@ -68,11 +73,6 @@ int MPID_Comm_revoke(MPID_Comm *comm_ptr, int is_remote)
MPID_Request_release(request);
}
- /* Start a counter to track how many revoke messages we've received from
- * other ranks */
- comm_ptr->dev.waiting_for_revoke = comm_ptr->local_size - 1 - is_remote - failed; /* Subtract the processes who already know about the revoke */
- MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE, (MPIU_DBG_FDEST, "Comm %08x waiting_for_revoke: %d", comm_ptr->handle, comm_ptr->dev.waiting_for_revoke));
-
/* Check to see if we are done revoking */
if (comm_ptr->dev.waiting_for_revoke == 0) {
MPIR_Comm_release(comm_ptr, 0);
@@ -98,13 +98,6 @@ int MPID_Comm_revoke(MPID_Comm *comm_ptr, int is_remote)
}
}
-fn_exit:
MPIDI_FUNC_EXIT(MPID_STATE_MPID_COMM_REVOKE);
return MPI_SUCCESS;
-fn_fail:
- if (request) {
- MPIU_Object_set_ref(request, 0);
- MPIDI_CH3_Request_destroy(request);
- }
- goto fn_exit;
}
-----------------------------------------------------------------------
Summary of changes:
src/mpid/ch3/src/ch3u_recvq.c | 2 ++
src/mpid/ch3/src/mpid_comm_revoke.c | 21 +++++++--------------
test/mpi/ft/testlist | 4 ++--
3 files changed, 11 insertions(+), 16 deletions(-)
hooks/post-receive
--
MPICH primary repository
More information about the commits
mailing list