[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2b2-51-gc3184ef

Service Account noreply at mpich.org
Thu May 28 10:46:50 CDT 2015


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  c3184ef2f6fff86503d2892142add19060bf94a1 (commit)
      from  914f461b1a171c3a29043b548af842fb6159379b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/c3184ef2f6fff86503d2892142add19060bf94a1

commit c3184ef2f6fff86503d2892142add19060bf94a1
Author: Huiwei Lu <huiweilu at mcs.anl.gov>
Date:   Wed May 27 12:08:16 2015 -0500

    Fix threaded MPI_Comm_idup
    
    Removes unnecessary thread yielding in threaded nonblocking context id
    allocation algorithm. The error was introduced by "copy-pasting" from
    the blocking context id allocation algorithm
    (MPIR_Get_contextid_sparse_group) when implementing the nonblocking
    algorithm. Note the subtle difference on thread handling between the
    two. In the blocking algorithm, yield is needed to allow another thread
    to make progress. In nonblocking algorithm, there is no need to yield to
    another thread because this thread will not block the progress. On the
    contrary, unnecessary yield will allow other threads to execute and
    insert wrong order of entries to the nonblocking schedule and cause
    errors.
    
    Fixes #2183
    
    Signed-off-by: Junchao Zhang <jczhang at mcs.anl.gov>

diff --git a/src/mpi/comm/commutil.c b/src/mpi/comm/commutil.c
index cbe659e..4628a41 100644
--- a/src/mpi/comm/commutil.c
+++ b/src/mpi/comm/commutil.c
@@ -1277,6 +1277,19 @@ fn_fail:
     return mpi_errno;
 }
 
+/* Try to find a valid context id.
+ *
+ * If the context id is found, then broadcast it; if not, then retry the
+ * nonblocking context id allocation algorithm again.
+ *
+ * Note the subtle difference on thread handling between the nonblocking
+ * algorithm (sched_cb_gcn_allocate_cid) and the blocking algorithm
+ * (MPIR_Get_contextid_sparse_group). In nonblocking algorithm, there is no
+ * need to yield to another thread because this thread will not block the
+ * progress. On the contrary, unnecessary yield will allow other threads to
+ * execute and insert wrong order of entries to the nonblocking schedule and
+ * cause errors.
+ */
 #undef FUNCNAME
 #define FUNCNAME sched_cb_gcn_allocate_cid
 #undef FCNAME
@@ -1287,7 +1300,6 @@ static int sched_cb_gcn_allocate_cid(MPID_Comm *comm, int tag, void *state)
     struct gcn_state *st = state;
     MPIR_Context_id_t newctxid;
 
-    MPIU_THREAD_CS_ENTER(CONTEXTID,);
     if (st->own_eager_mask) {
         newctxid = MPIR_Find_and_allocate_context_id(st->local_mask);
         if (st->ctx0)
@@ -1297,14 +1309,6 @@ static int sched_cb_gcn_allocate_cid(MPID_Comm *comm, int tag, void *state)
 
         st->own_eager_mask = 0;
         eager_in_use = 0;
-
-        if (newctxid <= 0) {
-            /* else we did not find a context id. Give up the mask in case
-             * there is another thread (with a lower input context id)
-             * waiting for it.  We need to ensure that any other threads
-             * have the opportunity to run, hence yielding */
-            MPIU_THREAD_CS_YIELD(CONTEXTID,);
-        }
     } else if (st->own_mask) {
         newctxid = MPIR_Find_and_allocate_context_id(st->local_mask);
 
@@ -1319,16 +1323,7 @@ static int sched_cb_gcn_allocate_cid(MPID_Comm *comm, int tag, void *state)
         if (newctxid > 0) {
             if (lowestContextId == st->comm_ptr->context_id)
                 lowestContextId = MPIR_MAXID;
-        } else {
-            /* else we did not find a context id. Give up the mask in case
-             * there is another thread (with a lower input context id)
-             * waiting for it.  We need to ensure that any other threads
-             * have the opportunity to run, hence yielding */
-            MPIU_THREAD_CS_YIELD(CONTEXTID,);
         }
-    } else {
-        /* As above, force this thread to yield */
-        MPIU_THREAD_CS_YIELD(CONTEXTID,);
     }
 
     if (*st->ctx0 == 0) {
@@ -1343,8 +1338,6 @@ static int sched_cb_gcn_allocate_cid(MPID_Comm *comm, int tag, void *state)
         MPID_SCHED_BARRIER(st->s);
     }
 
-    MPIU_THREAD_CS_EXIT(CONTEXTID,);
-
     /* --BEGIN ERROR HANDLING-- */
     /* --END ERROR HANDLING-- */
 fn_fail:
@@ -1360,7 +1353,6 @@ static int sched_cb_gcn_copy_mask(MPID_Comm *comm, int tag, void *state)
     int mpi_errno = MPI_SUCCESS;
     struct gcn_state *st = state;
 
-    MPIU_THREAD_CS_ENTER(CONTEXTID,);
     if (st->first_iter) {
         memset(st->local_mask, 0, MPIR_MAX_CONTEXT_MASK * sizeof(int));
         st->own_eager_mask = 0;
@@ -1394,7 +1386,6 @@ static int sched_cb_gcn_copy_mask(MPID_Comm *comm, int tag, void *state)
             st->own_mask = 1;
         }
     }
-    MPIU_THREAD_CS_EXIT(CONTEXTID,);
 
     mpi_errno = st->comm_ptr->coll_fns->Iallreduce_sched(MPI_IN_PLACE, st->local_mask, MPIR_MAX_CONTEXT_MASK,
                                                MPI_UINT32_T, MPI_BAND, st->comm_ptr, st->s);
@@ -1457,7 +1448,6 @@ static int sched_get_cid_nonblock(MPID_Comm *comm_ptr, MPIR_Context_id_t *ctx0,
     struct gcn_state *st = NULL;
     MPIU_CHKPMEM_DECL(1);
 
-    MPIU_THREAD_CS_ENTER(CONTEXTID,);
     if (initialize_context_mask) {
         MPIR_Init_contextid();
     }
@@ -1483,7 +1473,6 @@ static int sched_get_cid_nonblock(MPID_Comm *comm_ptr, MPIR_Context_id_t *ctx0,
         MPIU_Assert( MPIR_CVAR_CTXID_EAGER_SIZE >= 0 && MPIR_CVAR_CTXID_EAGER_SIZE < MPIR_MAX_CONTEXT_MASK-1 );
         eager_nelem = MPIR_CVAR_CTXID_EAGER_SIZE;
     }
-    MPIU_THREAD_CS_EXIT(CONTEXTID,);
 
     mpi_errno = MPID_Sched_cb(&sched_cb_gcn_copy_mask, st, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);

-----------------------------------------------------------------------

Summary of changes:
 src/mpi/comm/commutil.c |   37 +++++++++++++------------------------
 1 files changed, 13 insertions(+), 24 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list