[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.0.4-182-g4325f45
mysql vizuser
noreply at mpich.org
Fri May 10 22:49:08 CDT 2013
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".
The branch, master has been updated
via 4325f45932a64cf6b170192f7d4b3ca35924537c (commit)
via 788dca4a79230a822af8610fe055ea9cc2511b47 (commit)
from a811897209cc3087b4a4379438be31884a637b21 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/4325f45932a64cf6b170192f7d4b3ca35924537c
commit 4325f45932a64cf6b170192f7d4b3ca35924537c
Author: Haizhu Liu <haizhu at us.ibm.com>
Date: Thu Apr 25 11:11:05 2013 -0400
Dynamic tasking MPI_Comm_disconnect HANG
(ibm) D190081
(ibm) 25939d9ffd5267261f70e714aaec434d84e07093
Signed-off-by: Michael Blocksome <blocksom at us.ibm.com>
diff --git a/src/mpid/pamid/src/dyntask/mpidi_port.c b/src/mpid/pamid/src/dyntask/mpidi_port.c
index dffc327..7bd2806 100644
--- a/src/mpid/pamid/src/dyntask/mpidi_port.c
+++ b/src/mpid/pamid/src/dyntask/mpidi_port.c
@@ -35,7 +35,7 @@ typedef struct {
conn_info *_conn_info_list = NULL;
extern int mpidi_dynamic_tasking;
-long long _global_world_intercomm_cntr;
+long long _global_world_intercomm_cntr = 0;
typedef struct MPIDI_Acceptq
{
@@ -571,7 +571,7 @@ int MPIDI_Comm_connect(const char *port_name, MPID_Info *info, int root,
TRACE_ERR("MPIC_Sendrecv returned with mpi_errno=%d\n", mpi_errno);
}
- mpi_errno = MPIC_Sendrecv_replace(&comm_cntr, 1, MPI_INT, 0,
+ mpi_errno = MPIC_Sendrecv_replace(&comm_cntr, 1, MPI_LONG_LONG_INT, 0,
sendtag++, 0, recvtag++, tmp_comm->handle,
MPI_STATUS_IGNORE);
if (mpi_errno != MPI_SUCCESS) {
@@ -1226,7 +1226,7 @@ int MPIDI_Comm_accept(const char *port_name, MPID_Info *info, int root,
TRACE_ERR("MPIC_Sendrecv returned with mpi_errno=%d\n", mpi_errno);
}
#endif
- mpi_errno = MPIC_Sendrecv_replace(&comm_cntr, 1, MPI_INT, 0,
+ mpi_errno = MPIC_Sendrecv_replace(&comm_cntr, 1, MPI_LONG_LONG_INT, 0,
sendtag++, 0, recvtag++, tmp_comm->handle,
MPI_STATUS_IGNORE);
if (mpi_errno != MPI_SUCCESS) {
http://git.mpich.org/mpich.git/commitdiff/788dca4a79230a822af8610fe055ea9cc2511b47
commit 788dca4a79230a822af8610fe055ea9cc2511b47
Author: Haizhu Liu <haizhu at us.ibm.com>
Date: Mon Apr 22 23:31:55 2013 -0400
MPI_Comm_disconnect hang
Also remove MPIR_Comm_group_ext() function declaration and reference.
(ibm) D189966
(ibm) 5681895bcb45798680b249dbf993b9a634954b80
Signed-off-by: Michael Blocksome <blocksom at us.ibm.com>
diff --git a/src/mpid/pamid/src/dyntask/mpid_comm_disconnect.c b/src/mpid/pamid/src/dyntask/mpid_comm_disconnect.c
index 3e5378e..1b84f53 100644
--- a/src/mpid/pamid/src/dyntask/mpid_comm_disconnect.c
+++ b/src/mpid/pamid/src/dyntask/mpid_comm_disconnect.c
@@ -207,13 +207,16 @@ int MPID_Comm_disconnect(MPID_Comm *comm_ptr)
pami_task_t *leader_tids;
int expected_firstAM=0, expected_secondAM=0, expected_lastAM=0;
MPID_Comm *commworld_ptr;
+ MPID_Group *group_ptr = NULL, *new_group_ptr = NULL;
MPID_VCR *glist;
MPID_Comm *lcomm;
+ int *ranks;
int local_tasks=0, localtasks_in_remglist=0;
int jobIdSize=64;
char jobId[jobIdSize];
int MY_TASKID = PAMIX_Client_query(MPIDI_Client, PAMI_CLIENT_TASK_ID ).value.intval;
+ /*if( (comm_ptr->comm_kind == MPID_INTERCOMM) && (comm_ptr->mpid.world_ids != NULL)) { */
if(comm_ptr->mpid.world_ids != NULL) {
rc = MPID_Iprobe(comm_ptr->rank, MPI_ANY_TAG, comm_ptr, MPID_CONTEXT_INTER_PT2PT, &probe_flag, &status);
if(rc || probe_flag) {
@@ -223,7 +226,15 @@ int MPID_Comm_disconnect(MPID_Comm *comm_ptr)
/* make commSubWorld */
{
+ /* MPID_Comm_get_ptr( MPI_COMM_WORLD, commworld_ptr ); */
commworld_ptr = MPIR_Process.comm_world;
+ mpi_errno = MPIR_Comm_group_impl(commworld_ptr, &group_ptr);
+ if (mpi_errno)
+ {
+ TRACE_ERR("Error while creating group_ptr from MPI_COMM_WORLD in MPIDI_Comm_create_from_pami_geom\n");
+ return PAMI_ERROR;
+ }
+
glist = commworld_ptr->vcr;
gsize = commworld_ptr->local_size;
@@ -250,25 +261,48 @@ int MPID_Comm_disconnect(MPID_Comm *comm_ptr)
}
}
k=0;
- local_list = MPIU_Malloc(local_tasks*sizeof(pami_task_t));
+ /* local_list = MPIU_Malloc(local_tasks*sizeof(pami_task_t)); */
+ ranks = MPIU_Malloc(local_tasks*sizeof(int));
for(i=0;i<comm_ptr->local_size;i++) {
for(j=0;j<gsize;j++) {
if(comm_ptr->local_vcr[i]->taskid == glist[j]->taskid)
- local_list[k++] = glist[j]->taskid;
+ /* local_list[k] = glist[j]->taskid; */
+ ranks[k++] = j;
}
}
if((comm_ptr->comm_kind == MPID_INTERCOMM) && localtasks_in_remglist) {
for(i=0;i<comm_ptr->remote_size;i++) {
for(j=0;j<gsize;j++) {
if(comm_ptr->vcr[i]->taskid == glist[j]->taskid)
- local_list[k++] = glist[j]->taskid;
+ /* local_list[k] = glist[j]->taskid; */
+ ranks[k++] = j;
}
}
/* Sort the local_list when there are localtasks_in_remglist */
- _qsort_dyntask(local_list, 0, local_tasks-1);
+/* _qsort_dyntask(local_list, 0, local_tasks-1); */
+ _qsort_dyntask(ranks, 0, local_tasks-1);
}
+
+ /* Now we have all we need to create the new group. Create it */
+ /* mpi_errno = MPIR_Group_incl_impl(group_ptr, local_tasks, ranks, &new_group_ptr); */
+ mpi_errno = MPIR_Group_incl_impl(group_ptr, local_tasks, ranks, &new_group_ptr);
+ if (mpi_errno)
+ {
+ TRACE_ERR("Error while creating new_group_ptr from group_ptr in MPIDI_Comm_create_from_pami_geom\n");
+ return PAMI_ERROR;
+ }
+ /* Now create the communicator using the new_group_ptr */
+ mpi_errno = MPIR_Comm_create_group(commworld_ptr, new_group_ptr, 0, &lcomm);
+ /* mpi_errno = MPIR_Comm_create_intra(commworld_ptr, new_group_ptr, &lcomm); */
+ if (mpi_errno)
+ {
+ TRACE_ERR("Error while creating new_comm_ptr from group_ptr in MPIDI_Comm_create_from_pami_geom\n");
+ return PAMI_ERROR;
+ }
+
+#if 0
mpi_errno = MPIR_Comm_create(&lcomm);
if (mpi_errno != MPI_SUCCESS) {
TRACE_ERR("MPIR_Comm_create returned with mpi_errno=%d\n", mpi_errno);
@@ -310,6 +344,8 @@ int MPID_Comm_disconnect(MPID_Comm *comm_ptr)
if(MY_TASKID == local_list[i]) lcomm->rank = i;
lcomm->vcr[i]->taskid = local_list[i];
}
+#endif
+
}
TRACE_ERR("subcomm for disconnect is established local_tasks=%d calling MPIR_Barrier_intra\n", local_tasks);
@@ -403,7 +439,8 @@ int MPID_Comm_disconnect(MPID_Comm *comm_ptr)
MPIDI_free_tranid_node(comm_ptr->mpid.world_intercomm_cntr);
mpi_errno = MPIR_Comm_release(comm_ptr,1);
if (mpi_errno) TRACE_ERR("MPIR_Comm_release returned with mpi_errno=%d\n", mpi_errno);
- MPIU_Free(local_list);
+ /* MPIU_Free(local_list); */
+ MPIU_Free(ranks);
}
return mpi_errno;
}
diff --git a/src/mpid/pamid/src/mpid_init.c b/src/mpid/pamid/src/mpid_init.c
index 44e0137..8925c48 100644
--- a/src/mpid/pamid/src/mpid_init.c
+++ b/src/mpid/pamid/src/mpid_init.c
@@ -1003,7 +1003,9 @@ MPIDI_VCRT_init(int rank, int size, char *world_tasks, MPIDI_PG_t *pg)
TRACE_ERR("Adding ref for comm=%x vcr=%x pg=%x\n", comm, comm->vcr[0], comm->vcr[0]->pg);
MPIDI_PG_add_ref(comm->vcr[0]->pg);
}
+ comm->local_vcr = comm->vcr;
}
+
#endif
/* -------------------------------- */
@@ -1051,8 +1053,7 @@ MPIDI_VCRT_init(int rank, int size, char *world_tasks, MPIDI_PG_t *pg)
TRACE_ERR("TASKID -- comm->vcr[%d]=%d\n", p, comm->vcr[p]->taskid);
}
- i = 0;
-
+ comm->local_vcr = comm->vcr;
}else {
for (i=0; i<size; i++) {
comm->vcr[i]->taskid = i;
-----------------------------------------------------------------------
Summary of changes:
src/mpid/pamid/src/dyntask/mpid_comm_disconnect.c | 47 ++++++++++++++++++--
src/mpid/pamid/src/dyntask/mpidi_port.c | 6 +-
src/mpid/pamid/src/mpid_init.c | 5 +-
3 files changed, 48 insertions(+), 10 deletions(-)
hooks/post-receive
--
MPICH primary repository
More information about the commits
mailing list