[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.0.4-182-g4325f45

mysql vizuser noreply at mpich.org
Fri May 10 22:49:08 CDT 2013


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  4325f45932a64cf6b170192f7d4b3ca35924537c (commit)
       via  788dca4a79230a822af8610fe055ea9cc2511b47 (commit)
      from  a811897209cc3087b4a4379438be31884a637b21 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/4325f45932a64cf6b170192f7d4b3ca35924537c

commit 4325f45932a64cf6b170192f7d4b3ca35924537c
Author: Haizhu Liu <haizhu at us.ibm.com>
Date:   Thu Apr 25 11:11:05 2013 -0400

    Dynamic tasking MPI_Comm_disconnect HANG
    
    (ibm) D190081
    (ibm) 25939d9ffd5267261f70e714aaec434d84e07093
    
    Signed-off-by: Michael Blocksome <blocksom at us.ibm.com>

diff --git a/src/mpid/pamid/src/dyntask/mpidi_port.c b/src/mpid/pamid/src/dyntask/mpidi_port.c
index dffc327..7bd2806 100644
--- a/src/mpid/pamid/src/dyntask/mpidi_port.c
+++ b/src/mpid/pamid/src/dyntask/mpidi_port.c
@@ -35,7 +35,7 @@ typedef struct {
 
 conn_info  *_conn_info_list = NULL;
 extern int mpidi_dynamic_tasking;
-long long _global_world_intercomm_cntr;
+long long _global_world_intercomm_cntr = 0;
 
 typedef struct MPIDI_Acceptq
 {
@@ -571,7 +571,7 @@ int MPIDI_Comm_connect(const char *port_name, MPID_Info *info, int root,
 	    TRACE_ERR("MPIC_Sendrecv returned with mpi_errno=%d\n", mpi_errno);
 	}
 
-        mpi_errno = MPIC_Sendrecv_replace(&comm_cntr, 1, MPI_INT, 0,
+        mpi_errno = MPIC_Sendrecv_replace(&comm_cntr, 1, MPI_LONG_LONG_INT, 0,
                                   sendtag++, 0, recvtag++, tmp_comm->handle,
                                   MPI_STATUS_IGNORE);
         if (mpi_errno != MPI_SUCCESS) {
@@ -1226,7 +1226,7 @@ int MPIDI_Comm_accept(const char *port_name, MPID_Info *info, int root,
 	    TRACE_ERR("MPIC_Sendrecv returned with mpi_errno=%d\n", mpi_errno);
 	}
 #endif
-        mpi_errno = MPIC_Sendrecv_replace(&comm_cntr, 1, MPI_INT, 0,
+        mpi_errno = MPIC_Sendrecv_replace(&comm_cntr, 1, MPI_LONG_LONG_INT, 0,
                                   sendtag++, 0, recvtag++, tmp_comm->handle,
                                   MPI_STATUS_IGNORE);
         if (mpi_errno != MPI_SUCCESS) {

http://git.mpich.org/mpich.git/commitdiff/788dca4a79230a822af8610fe055ea9cc2511b47

commit 788dca4a79230a822af8610fe055ea9cc2511b47
Author: Haizhu Liu <haizhu at us.ibm.com>
Date:   Mon Apr 22 23:31:55 2013 -0400

    MPI_Comm_disconnect hang
    
    Also remove MPIR_Comm_group_ext() function declaration and reference.
    
    (ibm) D189966
    (ibm) 5681895bcb45798680b249dbf993b9a634954b80
    
    Signed-off-by: Michael Blocksome <blocksom at us.ibm.com>

diff --git a/src/mpid/pamid/src/dyntask/mpid_comm_disconnect.c b/src/mpid/pamid/src/dyntask/mpid_comm_disconnect.c
index 3e5378e..1b84f53 100644
--- a/src/mpid/pamid/src/dyntask/mpid_comm_disconnect.c
+++ b/src/mpid/pamid/src/dyntask/mpid_comm_disconnect.c
@@ -207,13 +207,16 @@ int MPID_Comm_disconnect(MPID_Comm *comm_ptr)
     pami_task_t *leader_tids;
     int expected_firstAM=0, expected_secondAM=0, expected_lastAM=0;
     MPID_Comm *commworld_ptr;
+    MPID_Group *group_ptr = NULL,  *new_group_ptr = NULL;
     MPID_VCR *glist;
     MPID_Comm *lcomm;
+    int *ranks;
     int local_tasks=0, localtasks_in_remglist=0;
     int jobIdSize=64;
     char jobId[jobIdSize];
     int MY_TASKID = PAMIX_Client_query(MPIDI_Client, PAMI_CLIENT_TASK_ID  ).value.intval;
 
+    /*if( (comm_ptr->comm_kind == MPID_INTERCOMM) && (comm_ptr->mpid.world_ids != NULL)) { */
     if(comm_ptr->mpid.world_ids != NULL) {
 	rc = MPID_Iprobe(comm_ptr->rank, MPI_ANY_TAG, comm_ptr, MPID_CONTEXT_INTER_PT2PT, &probe_flag, &status);
         if(rc || probe_flag) {
@@ -223,7 +226,15 @@ int MPID_Comm_disconnect(MPID_Comm *comm_ptr)
 
 	/* make commSubWorld */
 	{
+	  /*           MPID_Comm_get_ptr( MPI_COMM_WORLD, commworld_ptr ); */
 	  commworld_ptr = MPIR_Process.comm_world;
+	  mpi_errno = MPIR_Comm_group_impl(commworld_ptr, &group_ptr);
+	  if (mpi_errno)
+	    {
+	      TRACE_ERR("Error while creating group_ptr from MPI_COMM_WORLD in MPIDI_Comm_create_from_pami_geom\n");
+	      return PAMI_ERROR;
+	    }
+
 	  
 	  glist = commworld_ptr->vcr;
 	  gsize = commworld_ptr->local_size;
@@ -250,25 +261,48 @@ int MPID_Comm_disconnect(MPID_Comm *comm_ptr)
 	    }
 	  }
 	  k=0;
-	  local_list = MPIU_Malloc(local_tasks*sizeof(pami_task_t));
+	  /*	  local_list = MPIU_Malloc(local_tasks*sizeof(pami_task_t)); */
+	  ranks = MPIU_Malloc(local_tasks*sizeof(int));
 
 	  for(i=0;i<comm_ptr->local_size;i++) {
 	    for(j=0;j<gsize;j++) {
 	      if(comm_ptr->local_vcr[i]->taskid == glist[j]->taskid)
-		local_list[k++] = glist[j]->taskid;
+		/*	local_list[k] = glist[j]->taskid; */
+		ranks[k++] = j;
 	    }
 	  }
 	  if((comm_ptr->comm_kind == MPID_INTERCOMM) && localtasks_in_remglist) {
 	    for(i=0;i<comm_ptr->remote_size;i++) {
 	      for(j=0;j<gsize;j++) {
 		if(comm_ptr->vcr[i]->taskid == glist[j]->taskid)
-		  local_list[k++] = glist[j]->taskid;
+		  /*	  local_list[k] = glist[j]->taskid; */
+		  ranks[k++] = j;
 	      }
 	    }
 	    /* Sort the local_list when there are localtasks_in_remglist */
-	    _qsort_dyntask(local_list, 0, local_tasks-1);
+/*	    _qsort_dyntask(local_list, 0, local_tasks-1); */
+	    _qsort_dyntask(ranks, 0, local_tasks-1);
 	  }
+
+	  /* Now we have all we need to create the new group. Create it */
+	  /*  mpi_errno = MPIR_Group_incl_impl(group_ptr, local_tasks, ranks, &new_group_ptr); */
+	  mpi_errno = MPIR_Group_incl_impl(group_ptr, local_tasks, ranks, &new_group_ptr);
+	  if (mpi_errno)
+	    {
+	      TRACE_ERR("Error while creating new_group_ptr from group_ptr in MPIDI_Comm_create_from_pami_geom\n");
+	      return PAMI_ERROR;
+	    }
 	  
+	  /* Now create the communicator using the new_group_ptr */
+	  mpi_errno = MPIR_Comm_create_group(commworld_ptr, new_group_ptr, 0, &lcomm);
+	  /*  mpi_errno = MPIR_Comm_create_intra(commworld_ptr, new_group_ptr, &lcomm); */
+	  if (mpi_errno)
+	    {
+	      TRACE_ERR("Error while creating new_comm_ptr from group_ptr in MPIDI_Comm_create_from_pami_geom\n");
+	      return PAMI_ERROR;
+	    }
+
+#if 0
 	  mpi_errno = MPIR_Comm_create(&lcomm);
 	  if (mpi_errno != MPI_SUCCESS) {
 	    TRACE_ERR("MPIR_Comm_create returned with mpi_errno=%d\n", mpi_errno);
@@ -310,6 +344,8 @@ int MPID_Comm_disconnect(MPID_Comm *comm_ptr)
 	    if(MY_TASKID == local_list[i]) lcomm->rank = i;
 	    lcomm->vcr[i]->taskid = local_list[i];
 	  }
+#endif
+
 	}
 
 	TRACE_ERR("subcomm for disconnect is established local_tasks=%d calling MPIR_Barrier_intra\n", local_tasks);
@@ -403,7 +439,8 @@ int MPID_Comm_disconnect(MPID_Comm *comm_ptr)
 	MPIDI_free_tranid_node(comm_ptr->mpid.world_intercomm_cntr);
         mpi_errno = MPIR_Comm_release(comm_ptr,1);
         if (mpi_errno) TRACE_ERR("MPIR_Comm_release returned with mpi_errno=%d\n", mpi_errno);
-	MPIU_Free(local_list);
+	/*	MPIU_Free(local_list); */
+	MPIU_Free(ranks);
     }
     return mpi_errno;
 }
diff --git a/src/mpid/pamid/src/mpid_init.c b/src/mpid/pamid/src/mpid_init.c
index 44e0137..8925c48 100644
--- a/src/mpid/pamid/src/mpid_init.c
+++ b/src/mpid/pamid/src/mpid_init.c
@@ -1003,7 +1003,9 @@ MPIDI_VCRT_init(int rank, int size, char *world_tasks, MPIDI_PG_t *pg)
       TRACE_ERR("Adding ref for comm=%x vcr=%x pg=%x\n", comm, comm->vcr[0], comm->vcr[0]->pg);
       MPIDI_PG_add_ref(comm->vcr[0]->pg);
     }
+    comm->local_vcr = comm->vcr;
   }
+ 
 #endif
 
   /* -------------------------------- */
@@ -1051,8 +1053,7 @@ MPIDI_VCRT_init(int rank, int size, char *world_tasks, MPIDI_PG_t *pg)
 	  TRACE_ERR("TASKID -- comm->vcr[%d]=%d\n", p, comm->vcr[p]->taskid);
     }
 
-  i = 0;
-
+   comm->local_vcr = comm->vcr;
   }else {
 	for (i=0; i<size; i++) {
 	  comm->vcr[i]->taskid = i;

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/pamid/src/dyntask/mpid_comm_disconnect.c |   47 ++++++++++++++++++--
 src/mpid/pamid/src/dyntask/mpidi_port.c           |    6 +-
 src/mpid/pamid/src/mpid_init.c                    |    5 +-
 3 files changed, 48 insertions(+), 10 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list