[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2b4-31-gf48ce34

Service Account noreply at mpich.org
Thu Jul 30 13:11:37 CDT 2015


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  f48ce34a5bc2c7eb1b0ba3e13efec72a5d674f49 (commit)
      from  ee6451ab763c7cc7baa3a4ffe3f8a9225bc12d6a (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/f48ce34a5bc2c7eb1b0ba3e13efec72a5d674f49

commit f48ce34a5bc2c7eb1b0ba3e13efec72a5d674f49
Author: Lena Oden <loden at anl.gov>
Date:   Fri Jul 24 13:16:16 2015 -0500

    Move commit_comm to the end of the comm_idup
    
    The function commit_comm was called for comm_idup
    before the new context_id was allocated. This leads to the
    problem that the network-hook (mxm) is called with a
    wrong/fake context_id. Now the function is scheduled at the
    end, when all processes have found the correct context_id
    and recvcontext_id.
    
    Fixes #2283
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/src/mpi/comm/commutil.c b/src/mpi/comm/commutil.c
index 58b0522..4bc328d 100644
--- a/src/mpi/comm/commutil.c
+++ b/src/mpi/comm/commutil.c
@@ -1366,6 +1366,7 @@ struct gcn_state {
     MPID_Comm *comm_ptr;
     MPID_Comm *comm_ptr_inter;
     MPID_Sched_t s;
+    MPID_Comm *new_comm;
     MPID_Comm_kind_t gcn_cid_kind;
     uint32_t local_mask[MPIR_MAX_CONTEXT_MASK];
 };
@@ -1373,6 +1374,22 @@ struct gcn_state {
 static int sched_cb_gcn_copy_mask(MPID_Comm *comm, int tag, void *state);
 static int sched_cb_gcn_allocate_cid(MPID_Comm *comm, int tag, void *state);
 static int sched_cb_gcn_bcast(MPID_Comm *comm, int tag, void *state);
+#undef FUNCNAME
+#define FUNCNAME sched_cb_commit_comm
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+static int sched_cb_commit_comm(MPID_Comm *comm, int tag, void *state)
+{
+    int mpi_errno = MPI_SUCCESS;
+    struct gcn_state *st = state;
+
+    mpi_errno = MPIR_Comm_commit(st->new_comm);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+fn_fail:
+    return mpi_errno;
+
+}
 
 #undef FUNCNAME
 #define FUNCNAME sched_cb_gcn_bcast
@@ -1398,6 +1415,8 @@ static int sched_cb_gcn_bcast(MPID_Comm *comm, int tag, void *state)
         MPID_SCHED_BARRIER(st->s);
     }
 
+    mpi_errno = MPID_Sched_cb(&sched_cb_commit_comm, st, st->s);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_cb(&MPIR_Sched_cb_free_buf, st, st->s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
@@ -1585,7 +1604,7 @@ fn_fail:
 #define FUNCNAME sched_get_cid_nonblock
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-static int sched_get_cid_nonblock(MPID_Comm *comm_ptr, MPIR_Context_id_t *ctx0,
+static int sched_get_cid_nonblock(MPID_Comm *comm_ptr, MPID_Comm *newcomm,  MPIR_Context_id_t *ctx0,
         MPIR_Context_id_t *ctx1, MPID_Sched_t s, MPID_Comm_kind_t gcn_cid_kind)
 {
     int mpi_errno = MPI_SUCCESS;
@@ -1611,6 +1630,7 @@ static int sched_get_cid_nonblock(MPID_Comm *comm_ptr, MPIR_Context_id_t *ctx0,
     *(st->ctx0) = 0;
     st->own_eager_mask = 0;
     st->first_iter = 1;
+    st->new_comm = newcomm;
     /* idup_count > 1 means there are multiple communicators duplicating
      * from the current communicator at the same time. And
      * idup_curr_seqnum gives each duplication operation a priority */
@@ -1659,7 +1679,7 @@ int MPIR_Get_contextid_nonblock(MPID_Comm *comm_ptr, MPID_Comm *newcommp, MPID_R
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     /* add some entries to it */
-    mpi_errno = sched_get_cid_nonblock(comm_ptr, &newcommp->context_id, &newcommp->recvcontext_id, s, MPID_INTRACOMM);
+    mpi_errno = sched_get_cid_nonblock(comm_ptr, newcommp,  &newcommp->context_id, &newcommp->recvcontext_id, s, MPID_INTRACOMM);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     /* finally, kick off the schedule and give the caller a request */
@@ -1703,7 +1723,7 @@ int MPIR_Get_intercomm_contextid_nonblock(MPID_Comm *comm_ptr, MPID_Comm *newcom
     /* add some entries to it */
 
     /* first get a context ID over the local comm */
-    mpi_errno = sched_get_cid_nonblock(comm_ptr, &newcommp->recvcontext_id, &newcommp->context_id, s, MPID_INTERCOMM);
+    mpi_errno = sched_get_cid_nonblock(comm_ptr, newcommp, &newcommp->recvcontext_id, &newcommp->context_id, s, MPID_INTERCOMM);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     /* finally, kick off the schedule and give the caller a request */
@@ -2077,9 +2097,6 @@ int MPIR_Comm_copy_data(MPID_Comm *comm_ptr, MPID_Comm **outcomm_ptr)
 
     /* FIXME do we want to copy coll_fns here? */
 
-    mpi_errno = MPIR_Comm_commit(newcomm_ptr);
-    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
     /* Start with no attributes on this communicator */
     newcomm_ptr->attributes = 0;
     *outcomm_ptr = newcomm_ptr;

-----------------------------------------------------------------------

Summary of changes:
 src/mpi/comm/commutil.c |   29 +++++++++++++++++++++++------
 1 files changed, 23 insertions(+), 6 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list