[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.1-32-gc8435ec

Service Account noreply at mpich.org
Thu Jun 26 19:27:37 CDT 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  c8435ec4b080add9c4ec3c39ecabbc64d8321ecd (commit)
      from  bce35b9f5993450c70913b6f58e24fcbb5bc90d0 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/c8435ec4b080add9c4ec3c39ecabbc64d8321ecd

commit c8435ec4b080add9c4ec3c39ecabbc64d8321ecd
Author: Lisandro Dalcin <dalcinl at gmail.com>
Date:   Thu Jun 26 19:23:27 2014 -0500

    Better implementation of MPI_Allreduce for intercommunicator.
    
    The patch provides a better implementation where each group does a
    intra-reduce concurrently, exchanges the reduce result, and broadcasts
    in the group. The current implementation had a problem of serializing
    intra-reduces of each group.
    
    Fixes ticket #2074.
    
    Signed-off-by: Sangmin Seo <sseo at anl.gov>

diff --git a/src/mpi/coll/allreduce.c b/src/mpi/coll/allreduce.c
index 74758f0..4087df6 100644
--- a/src/mpi/coll/allreduce.c
+++ b/src/mpi/coll/allreduce.c
@@ -625,60 +625,55 @@ int MPIR_Allreduce_inter (
     int *errflag )
 {
 /* Intercommunicator Allreduce.
-   We first do an intercommunicator reduce to rank 0 on left group,
-   then an intercommunicator reduce to rank 0 on right group, followed
-   by local intracommunicator broadcasts in each group.
-
-   We don't do local reduces first and then intercommunicator
-   broadcasts because it would require allocation of a temporary buffer. 
+   We first do intracommunicator reduces to rank 0 on left and right
+   groups, then an exchange between left and right rank 0, and finally
+   intracommunicator broadcasts from rank 0 on left and right group.
 */
-    int rank, mpi_errno, root;
+    int mpi_errno;
     int mpi_errno_ret = MPI_SUCCESS;
+    MPI_Aint true_extent, true_lb, extent;
+    void *tmp_buf=NULL;
     MPID_Comm *newcomm_ptr = NULL;
-    
-    rank = comm_ptr->rank;
-
-    /* first do a reduce from right group to rank 0 in left group,
-       then from left group to rank 0 in right group*/
-    if (comm_ptr->is_low_group) {
-        /* reduce from right group to rank 0*/
-        root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-        mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
-				      root, comm_ptr, errflag);
-        if (mpi_errno) {
-            /* for communication errors, just record the error but continue */
-            *errflag = TRUE;
-            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
-            MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
-        }
+    MPI_Comm comm;
+    MPIU_CHKLMEM_DECL(1);
 
-        /* reduce to rank 0 of right group */
-        root = 0;
-        mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
-				      root, comm_ptr, errflag);
-        if (mpi_errno) {
-            /* for communication errors, just record the error but continue */
-            *errflag = TRUE;
-            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
-            MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
-        }
+    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
+
+    if (comm_ptr->rank == 0) {
+        MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
+        MPID_Datatype_get_extent_macro(datatype, extent);
+        /* I think this is the worse case, so we can avoid an assert()
+         * inside the for loop */
+        /* Should MPIU_CHKLMEM_MALLOC do this? */
+        MPID_Ensure_Aint_fits_in_pointer(count * MPIR_MAX(extent, true_extent));
+        MPIU_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPIR_MAX(extent,true_extent)), mpi_errno, "temporary buffer");
+        /* adjust for potential negative lower bound in datatype */
+        tmp_buf = (void *)((char*)tmp_buf - true_lb);
+    }
+
+    comm = comm_ptr->handle;
+
+    /* Get the local intracommunicator */
+    if (!comm_ptr->local_comm)
+        MPIR_Setup_intercomm_localcomm( comm_ptr );
+
+    newcomm_ptr = comm_ptr->local_comm;
+
+    /* Do a local reduce on this intracommunicator */
+    mpi_errno = MPIR_Reduce_intra(sendbuf, tmp_buf, count, datatype,
+                                  op, 0, newcomm_ptr, errflag);
+    if (mpi_errno) {
+        /* for communication errors, just record the error but continue */
+        *errflag = TRUE;
+        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
+        MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
     }
-    else {
-        /* reduce to rank 0 of left group */
-        root = 0;
-        mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
-				      root, comm_ptr, errflag);
-        if (mpi_errno) {
-            /* for communication errors, just record the error but continue */
-            *errflag = TRUE;
-            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
-            MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
-        }
 
-        /* reduce from right group to rank 0 */
-        root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-        mpi_errno = MPIR_Reduce_inter(sendbuf, recvbuf, count, datatype, op,
-				      root, comm_ptr, errflag);
+    /* Do a exchange between local and remote rank 0 on this intercommunicator */
+    if (comm_ptr->rank == 0) {
+        mpi_errno = MPIC_Sendrecv(tmp_buf, count, datatype, 0, MPIR_REDUCE_TAG,
+                                  recvbuf, count, datatype, 0, MPIR_REDUCE_TAG,
+                                  comm, MPI_STATUS_IGNORE, errflag);
         if (mpi_errno) {
             /* for communication errors, just record the error but continue */
             *errflag = TRUE;
@@ -687,13 +682,9 @@ int MPIR_Allreduce_inter (
         }
     }
 
-    /* Get the local intracommunicator */
-    if (!comm_ptr->local_comm)
-	MPIR_Setup_intercomm_localcomm( comm_ptr );
-
-    newcomm_ptr = comm_ptr->local_comm;
-
-    mpi_errno = MPIR_Bcast_impl(recvbuf, count, datatype, 0, newcomm_ptr, errflag);
+    /* Do a local broadcast on this intracommunicator */
+    mpi_errno = MPIR_Bcast_impl(recvbuf, count, datatype,
+                                0, newcomm_ptr, errflag);
     if (mpi_errno) {
         /* for communication errors, just record the error but continue */
         *errflag = TRUE;
@@ -702,6 +693,8 @@ int MPIR_Allreduce_inter (
     }
 
   fn_exit:
+    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
+    MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag)

-----------------------------------------------------------------------

Summary of changes:
 src/mpi/coll/allreduce.c |  103 +++++++++++++++++++++------------------------
 1 files changed, 48 insertions(+), 55 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list