[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2a2-24-g4ad367d

Service Account noreply at mpich.org
Wed Nov 26 14:57:53 CST 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  4ad367d0d17f6cecf5d0a9ce6def7d92779f7be2 (commit)
       via  131828204ccab2e9d4f5b4620c3a5727bc60e80a (commit)
      from  cff52d18350ea48bf03fd5badc6b800866028be7 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/4ad367d0d17f6cecf5d0a9ce6def7d92779f7be2

commit 4ad367d0d17f6cecf5d0a9ce6def7d92779f7be2
Author: Wesley Bland <wbland at anl.gov>
Date:   Wed Nov 26 15:58:15 2014 -0500

    Add agree_shrink to testlist
    
    This test was left out of the testlist for some reason
    
    No reviewer

diff --git a/test/mpi/ft/testlist b/test/mpi/ft/testlist
index 67676f3..52dde9a 100644
--- a/test/mpi/ft/testlist
+++ b/test/mpi/ft/testlist
@@ -16,4 +16,5 @@ anysource 3 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTes
 revoke_nofail 2 env=MPIR_CVAR_ENABLE_FT=1 strict=false timeLimit=10
 shrink 8 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10
 agree 4 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10
+agree_shrink 4 env=MPIR_CVAR_ENABLE_FT=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10
 revoke_shrink 4 env=MPIR_CVAR_ENABLE=1 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=2198

http://git.mpich.org/mpich.git/commitdiff/131828204ccab2e9d4f5b4620c3a5727bc60e80a

commit 131828204ccab2e9d4f5b4620c3a5727bc60e80a
Author: Wesley Bland <wbland at anl.gov>
Date:   Wed Nov 26 10:33:35 2014 -0500

    Fix bug in shrink when serializing failed procs
    
    The function to convert the group of failed procs to a bitarray was
    incorrectly quiting early if one of the globally known failed processes
    was not in the communciator being dealt with.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/src/mpi/comm/comm_shrink.c b/src/mpi/comm/comm_shrink.c
index 6bdd987..631edb5 100644
--- a/src/mpi/comm/comm_shrink.c
+++ b/src/mpi/comm/comm_shrink.c
@@ -50,7 +50,7 @@ int MPIR_Comm_shrink(MPID_Comm *comm_ptr, MPID_Comm **newcomm_ptr)
     int mpi_errno = MPI_SUCCESS;
     MPID_Group *global_failed, *comm_grp, *new_group_ptr;
     int attempts = 0;
-    mpir_errflag_t errflag = MPIR_ERR_NONE, tmp_errflag = MPIR_ERR_NONE;
+    mpir_errflag_t errflag = MPIR_ERR_NONE;
 
     MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_SHRINK);
     MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_SHRINK);
@@ -59,7 +59,9 @@ int MPIR_Comm_shrink(MPID_Comm *comm_ptr, MPID_Comm **newcomm_ptr)
     MPIR_Comm_group_impl(comm_ptr, &comm_grp);
 
     do {
-        mpi_errno = MPID_Comm_get_all_failed_procs(comm_ptr, &global_failed, MPIR_SHRINK_TAG);
+        errflag = MPIR_ERR_NONE;
+
+        MPID_Comm_get_all_failed_procs(comm_ptr, &global_failed, MPIR_SHRINK_TAG);
         /* Ignore the mpi_errno value here as it will definitely communicate
          * with failed procs */
 
@@ -68,10 +70,15 @@ int MPIR_Comm_shrink(MPID_Comm *comm_ptr, MPID_Comm **newcomm_ptr)
         if (MPID_Group_empty != global_failed) MPIR_Group_release(global_failed);
 
         mpi_errno = MPIR_Comm_create_group(comm_ptr, new_group_ptr, MPIR_SHRINK_TAG, newcomm_ptr);
-        errflag = mpi_errno || *newcomm_ptr == NULL;
+        if (*newcomm_ptr == NULL) {
+            errflag = MPIR_ERR_PROC_FAILED;
+        } else if (mpi_errno) {
+            errflag = MPIR_ERR_GET_CLASS(mpi_errno);
+            MPIR_Comm_release(*newcomm_ptr, 0);
+        }
 
         mpi_errno = MPIR_Allreduce_group(MPI_IN_PLACE, &errflag, 1, MPI_INT, MPI_MAX, comm_ptr,
-            new_group_ptr, MPIR_SHRINK_TAG, &tmp_errflag);
+            new_group_ptr, MPIR_SHRINK_TAG, &errflag);
         MPIR_Group_release(new_group_ptr);
 
         if (errflag) MPIU_Object_set_ref(new_group_ptr, 0);
diff --git a/src/mpid/ch3/src/mpid_comm_get_all_failed_procs.c b/src/mpid/ch3/src/mpid_comm_get_all_failed_procs.c
index 4cacc76..43a2faf 100644
--- a/src/mpid/ch3/src/mpid_comm_get_all_failed_procs.c
+++ b/src/mpid/ch3/src/mpid_comm_get_all_failed_procs.c
@@ -33,7 +33,8 @@ static int *group_to_bitarray(MPID_Group *group, MPID_Comm *orig_comm) {
     MPIR_Group_translate_ranks_impl(group, group->size, group_ranks,
                                     orig_comm->local_group, comm_ranks);
 
-    for (i = 0; i < group->size && comm_ranks[i] != MPI_UNDEFINED; i++) {
+    for (i = 0; i < group->size ; i++) {
+        if (comm_ranks[i] == MPI_UNDEFINED) continue;
         index = comm_ranks[i] / 32;
         mask = 0x80000000 >> comm_ranks[i] % 32;
         bitarray[index] |= mask;
@@ -128,7 +129,7 @@ int MPID_Comm_get_all_failed_procs(MPID_Comm *comm_ptr, MPID_Group **failed_grou
             /* Send the list to each rank to be processed locally */
             mpi_errno = MPIC_Send(bitarray, bitarray_size, MPI_UINT32_T, i,
                 tag, comm_ptr->handle, &errflag);
-            if (mpi_errno) errflag = 1;
+            if (mpi_errno) errflag = MPIR_ERR_PROC_FAILED;
         }
 
         /* Convert the bitarray into a group */
@@ -137,12 +138,10 @@ int MPID_Comm_get_all_failed_procs(MPID_Comm *comm_ptr, MPID_Group **failed_grou
         /* Send my bitarray to rank 0 */
         mpi_errno = MPIC_Send(bitarray, bitarray_size, MPI_UINT32_T, 0,
             tag, comm_ptr->handle, &errflag);
-        if (mpi_errno) errflag = 1;
 
         /* Get the resulting bitarray back from rank 0 */
         mpi_errno = MPIC_Recv(remote_bitarray, bitarray_size, MPI_UINT32_T, 0,
             tag, comm_ptr->handle, MPI_STATUS_IGNORE, &errflag);
-        if (mpi_errno) errflag = 1;
 
         /* Convert the bitarray into a group */
         *failed_group = bitarray_to_group(comm_ptr, remote_bitarray);

-----------------------------------------------------------------------

Summary of changes:
 src/mpi/comm/comm_shrink.c                        |   15 +++++++++++----
 src/mpid/ch3/src/mpid_comm_get_all_failed_procs.c |    7 +++----
 test/mpi/ft/testlist                              |    1 +
 3 files changed, 15 insertions(+), 8 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list