[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2b2-51-gc3184ef
Service Account
noreply at mpich.org
Thu May 28 10:46:50 CDT 2015
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".
The branch, master has been updated
via c3184ef2f6fff86503d2892142add19060bf94a1 (commit)
from 914f461b1a171c3a29043b548af842fb6159379b (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/c3184ef2f6fff86503d2892142add19060bf94a1
commit c3184ef2f6fff86503d2892142add19060bf94a1
Author: Huiwei Lu <huiweilu at mcs.anl.gov>
Date: Wed May 27 12:08:16 2015 -0500
Fix threaded MPI_Comm_idup
Removes unnecessary thread yielding in threaded nonblocking context id
allocation algorithm. The error was introduced by "copy-pasting" from
the blocking context id allocation algorithm
(MPIR_Get_contextid_sparse_group) when implementing the nonblocking
algorithm. Note the subtle difference on thread handling between the
two. In the blocking algorithm, yield is needed to allow another thread
to make progress. In nonblocking algorithm, there is no need to yield to
another thread because this thread will not block the progress. On the
contrary, unnecessary yield will allow other threads to execute and
insert wrong order of entries to the nonblocking schedule and cause
errors.
Fixes #2183
Signed-off-by: Junchao Zhang <jczhang at mcs.anl.gov>
diff --git a/src/mpi/comm/commutil.c b/src/mpi/comm/commutil.c
index cbe659e..4628a41 100644
--- a/src/mpi/comm/commutil.c
+++ b/src/mpi/comm/commutil.c
@@ -1277,6 +1277,19 @@ fn_fail:
return mpi_errno;
}
+/* Try to find a valid context id.
+ *
+ * If the context id is found, then broadcast it; if not, then retry the
+ * nonblocking context id allocation algorithm again.
+ *
+ * Note the subtle difference on thread handling between the nonblocking
+ * algorithm (sched_cb_gcn_allocate_cid) and the blocking algorithm
+ * (MPIR_Get_contextid_sparse_group). In nonblocking algorithm, there is no
+ * need to yield to another thread because this thread will not block the
+ * progress. On the contrary, unnecessary yield will allow other threads to
+ * execute and insert wrong order of entries to the nonblocking schedule and
+ * cause errors.
+ */
#undef FUNCNAME
#define FUNCNAME sched_cb_gcn_allocate_cid
#undef FCNAME
@@ -1287,7 +1300,6 @@ static int sched_cb_gcn_allocate_cid(MPID_Comm *comm, int tag, void *state)
struct gcn_state *st = state;
MPIR_Context_id_t newctxid;
- MPIU_THREAD_CS_ENTER(CONTEXTID,);
if (st->own_eager_mask) {
newctxid = MPIR_Find_and_allocate_context_id(st->local_mask);
if (st->ctx0)
@@ -1297,14 +1309,6 @@ static int sched_cb_gcn_allocate_cid(MPID_Comm *comm, int tag, void *state)
st->own_eager_mask = 0;
eager_in_use = 0;
-
- if (newctxid <= 0) {
- /* else we did not find a context id. Give up the mask in case
- * there is another thread (with a lower input context id)
- * waiting for it. We need to ensure that any other threads
- * have the opportunity to run, hence yielding */
- MPIU_THREAD_CS_YIELD(CONTEXTID,);
- }
} else if (st->own_mask) {
newctxid = MPIR_Find_and_allocate_context_id(st->local_mask);
@@ -1319,16 +1323,7 @@ static int sched_cb_gcn_allocate_cid(MPID_Comm *comm, int tag, void *state)
if (newctxid > 0) {
if (lowestContextId == st->comm_ptr->context_id)
lowestContextId = MPIR_MAXID;
- } else {
- /* else we did not find a context id. Give up the mask in case
- * there is another thread (with a lower input context id)
- * waiting for it. We need to ensure that any other threads
- * have the opportunity to run, hence yielding */
- MPIU_THREAD_CS_YIELD(CONTEXTID,);
}
- } else {
- /* As above, force this thread to yield */
- MPIU_THREAD_CS_YIELD(CONTEXTID,);
}
if (*st->ctx0 == 0) {
@@ -1343,8 +1338,6 @@ static int sched_cb_gcn_allocate_cid(MPID_Comm *comm, int tag, void *state)
MPID_SCHED_BARRIER(st->s);
}
- MPIU_THREAD_CS_EXIT(CONTEXTID,);
-
/* --BEGIN ERROR HANDLING-- */
/* --END ERROR HANDLING-- */
fn_fail:
@@ -1360,7 +1353,6 @@ static int sched_cb_gcn_copy_mask(MPID_Comm *comm, int tag, void *state)
int mpi_errno = MPI_SUCCESS;
struct gcn_state *st = state;
- MPIU_THREAD_CS_ENTER(CONTEXTID,);
if (st->first_iter) {
memset(st->local_mask, 0, MPIR_MAX_CONTEXT_MASK * sizeof(int));
st->own_eager_mask = 0;
@@ -1394,7 +1386,6 @@ static int sched_cb_gcn_copy_mask(MPID_Comm *comm, int tag, void *state)
st->own_mask = 1;
}
}
- MPIU_THREAD_CS_EXIT(CONTEXTID,);
mpi_errno = st->comm_ptr->coll_fns->Iallreduce_sched(MPI_IN_PLACE, st->local_mask, MPIR_MAX_CONTEXT_MASK,
MPI_UINT32_T, MPI_BAND, st->comm_ptr, st->s);
@@ -1457,7 +1448,6 @@ static int sched_get_cid_nonblock(MPID_Comm *comm_ptr, MPIR_Context_id_t *ctx0,
struct gcn_state *st = NULL;
MPIU_CHKPMEM_DECL(1);
- MPIU_THREAD_CS_ENTER(CONTEXTID,);
if (initialize_context_mask) {
MPIR_Init_contextid();
}
@@ -1483,7 +1473,6 @@ static int sched_get_cid_nonblock(MPID_Comm *comm_ptr, MPIR_Context_id_t *ctx0,
MPIU_Assert( MPIR_CVAR_CTXID_EAGER_SIZE >= 0 && MPIR_CVAR_CTXID_EAGER_SIZE < MPIR_MAX_CONTEXT_MASK-1 );
eager_nelem = MPIR_CVAR_CTXID_EAGER_SIZE;
}
- MPIU_THREAD_CS_EXIT(CONTEXTID,);
mpi_errno = MPID_Sched_cb(&sched_cb_gcn_copy_mask, st, s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-----------------------------------------------------------------------
Summary of changes:
src/mpi/comm/commutil.c | 37 +++++++++++++------------------------
1 files changed, 13 insertions(+), 24 deletions(-)
hooks/post-receive
--
MPICH primary repository
More information about the commits
mailing list