[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2b4-31-gf48ce34
Service Account
noreply at mpich.org
Thu Jul 30 13:11:37 CDT 2015
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".
The branch, master has been updated
via f48ce34a5bc2c7eb1b0ba3e13efec72a5d674f49 (commit)
from ee6451ab763c7cc7baa3a4ffe3f8a9225bc12d6a (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/f48ce34a5bc2c7eb1b0ba3e13efec72a5d674f49
commit f48ce34a5bc2c7eb1b0ba3e13efec72a5d674f49
Author: Lena Oden <loden at anl.gov>
Date: Fri Jul 24 13:16:16 2015 -0500
Move commit_comm to the end of the comm_idup
The function commit_comm was called for comm_idup
before the new context_id was allocated. This leads to the
problem that the network-hook (mxm) is called with a
wrong/fake context_id. Now the function is scheduled at the
end, when all processes have found the correct context_id
and recvcontext_id.
Fixes #2283
Signed-off-by: Pavan Balaji <balaji at anl.gov>
diff --git a/src/mpi/comm/commutil.c b/src/mpi/comm/commutil.c
index 58b0522..4bc328d 100644
--- a/src/mpi/comm/commutil.c
+++ b/src/mpi/comm/commutil.c
@@ -1366,6 +1366,7 @@ struct gcn_state {
MPID_Comm *comm_ptr;
MPID_Comm *comm_ptr_inter;
MPID_Sched_t s;
+ MPID_Comm *new_comm;
MPID_Comm_kind_t gcn_cid_kind;
uint32_t local_mask[MPIR_MAX_CONTEXT_MASK];
};
@@ -1373,6 +1374,22 @@ struct gcn_state {
static int sched_cb_gcn_copy_mask(MPID_Comm *comm, int tag, void *state);
static int sched_cb_gcn_allocate_cid(MPID_Comm *comm, int tag, void *state);
static int sched_cb_gcn_bcast(MPID_Comm *comm, int tag, void *state);
+#undef FUNCNAME
+#define FUNCNAME sched_cb_commit_comm
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+static int sched_cb_commit_comm(MPID_Comm *comm, int tag, void *state)
+{
+ int mpi_errno = MPI_SUCCESS;
+ struct gcn_state *st = state;
+
+ mpi_errno = MPIR_Comm_commit(st->new_comm);
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+fn_fail:
+ return mpi_errno;
+
+}
#undef FUNCNAME
#define FUNCNAME sched_cb_gcn_bcast
@@ -1398,6 +1415,8 @@ static int sched_cb_gcn_bcast(MPID_Comm *comm, int tag, void *state)
MPID_SCHED_BARRIER(st->s);
}
+ mpi_errno = MPID_Sched_cb(&sched_cb_commit_comm, st, st->s);
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
mpi_errno = MPID_Sched_cb(&MPIR_Sched_cb_free_buf, st, st->s);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -1585,7 +1604,7 @@ fn_fail:
#define FUNCNAME sched_get_cid_nonblock
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
-static int sched_get_cid_nonblock(MPID_Comm *comm_ptr, MPIR_Context_id_t *ctx0,
+static int sched_get_cid_nonblock(MPID_Comm *comm_ptr, MPID_Comm *newcomm, MPIR_Context_id_t *ctx0,
MPIR_Context_id_t *ctx1, MPID_Sched_t s, MPID_Comm_kind_t gcn_cid_kind)
{
int mpi_errno = MPI_SUCCESS;
@@ -1611,6 +1630,7 @@ static int sched_get_cid_nonblock(MPID_Comm *comm_ptr, MPIR_Context_id_t *ctx0,
*(st->ctx0) = 0;
st->own_eager_mask = 0;
st->first_iter = 1;
+ st->new_comm = newcomm;
/* idup_count > 1 means there are multiple communicators duplicating
* from the current communicator at the same time. And
* idup_curr_seqnum gives each duplication operation a priority */
@@ -1659,7 +1679,7 @@ int MPIR_Get_contextid_nonblock(MPID_Comm *comm_ptr, MPID_Comm *newcommp, MPID_R
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
/* add some entries to it */
- mpi_errno = sched_get_cid_nonblock(comm_ptr, &newcommp->context_id, &newcommp->recvcontext_id, s, MPID_INTRACOMM);
+ mpi_errno = sched_get_cid_nonblock(comm_ptr, newcommp, &newcommp->context_id, &newcommp->recvcontext_id, s, MPID_INTRACOMM);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
/* finally, kick off the schedule and give the caller a request */
@@ -1703,7 +1723,7 @@ int MPIR_Get_intercomm_contextid_nonblock(MPID_Comm *comm_ptr, MPID_Comm *newcom
/* add some entries to it */
/* first get a context ID over the local comm */
- mpi_errno = sched_get_cid_nonblock(comm_ptr, &newcommp->recvcontext_id, &newcommp->context_id, s, MPID_INTERCOMM);
+ mpi_errno = sched_get_cid_nonblock(comm_ptr, newcommp, &newcommp->recvcontext_id, &newcommp->context_id, s, MPID_INTERCOMM);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
/* finally, kick off the schedule and give the caller a request */
@@ -2077,9 +2097,6 @@ int MPIR_Comm_copy_data(MPID_Comm *comm_ptr, MPID_Comm **outcomm_ptr)
/* FIXME do we want to copy coll_fns here? */
- mpi_errno = MPIR_Comm_commit(newcomm_ptr);
- if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
/* Start with no attributes on this communicator */
newcomm_ptr->attributes = 0;
*outcomm_ptr = newcomm_ptr;
-----------------------------------------------------------------------
Summary of changes:
src/mpi/comm/commutil.c | 29 +++++++++++++++++++++++------
1 files changed, 23 insertions(+), 6 deletions(-)
hooks/post-receive
--
MPICH primary repository
More information about the commits
mailing list