[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2-19-gda5a5bb

Service Account noreply at mpich.org
Tue Dec 8 17:47:14 CST 2015


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  da5a5bba60896c202e8a0ae2e331738654ae2c42 (commit)
       via  0681d6976055185960b0ceee91f2554c3355c97e (commit)
      from  b7b118dab7cd8c7d21a5fb8e16f9dfdcee29c2a6 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/da5a5bba60896c202e8a0ae2e331738654ae2c42

commit da5a5bba60896c202e8a0ae2e331738654ae2c42
Author: Lena Oden <loden at anl.gov>
Date:   Sun Sep 20 21:08:15 2015 -0500

    mpi/coll/ibcast: handle mismatched length
    
    Instead of a regular receive, we now use a "receive with status" call
    internally in MPI_Ibcast. After receiving, a function is scheduled which
    uses the status to determine the actual received bytes and compare them
    to the expected data. Fixes #2300
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/src/mpi/coll/ibcast.c b/src/mpi/coll/ibcast.c
index 819bb93..a4223e3 100644
--- a/src/mpi/coll/ibcast.c
+++ b/src/mpi/coll/ibcast.c
@@ -20,6 +20,65 @@ int MPI_Ibcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Com
 #endif
 /* -- End Profiling Symbol Block */
 
+struct MPIR_Ibcast_status{
+    int curr_bytes;
+    int n_bytes;
+    MPI_Status status;
+};
+/* Add some functions for asynchronous error detection */
+
+#undef FUNCNAME
+#define FUNCNAME sched_test_length
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+
+static sched_test_length(MPID_Comm * comm, int tag, void *state)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int recv_size;
+    struct  MPIR_Ibcast_status *status = (struct MPIR_Ibcast_status*) state;
+    MPIR_Get_count_impl(&status->status, MPI_BYTE, &recv_size);
+    if(status->n_bytes != recv_size || status->status.MPI_ERROR != MPI_SUCCESS) {
+         mpi_errno = MPIR_Err_create_code( mpi_errno, MPIR_ERR_RECOVERABLE,
+               FCNAME, __LINE__, MPI_ERR_OTHER,
+                     "**collective_size_mismatch",
+		          "**collective_size_mismatch %d %d", status->n_bytes, recv_size);
+    }
+   return mpi_errno;
+}
+
+#undef FUNCNAME
+#define FUNCNAME sched_test_curr_length
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+
+static sched_test_curr_length(MPID_Comm * comm, int tag, void *state)
+{
+    int mpi_errno = MPI_SUCCESS;
+    struct  MPIR_Ibcast_status *status = (struct MPIR_Ibcast_status*) state;
+    if(status->n_bytes != status->curr_bytes) {
+        mpi_errno = MPIR_Err_create_code( mpi_errno, MPIR_ERR_RECOVERABLE,
+               FCNAME, __LINE__, MPI_ERR_OTHER,
+                     "**collective_size_mismatch",
+		          "**collective_size_mismatch %d %d", status->n_bytes, status->curr_bytes);
+    }
+   return mpi_errno;
+}
+
+#undef FUNCNAME
+#define FUNCNAME sched_add_length
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+
+static sched_add_length(MPID_Comm * comm, int tag, void *state)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int recv_size;
+    struct  MPIR_Ibcast_status *status = (struct MPIR_Ibcast_status*) state;
+    MPIR_Get_count_impl(&status->status, MPI_BYTE, &recv_size);
+    status->curr_bytes+= recv_size;
+    return mpi_errno;
+}
 /* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
    the MPI routines */
 #ifndef MPICH_MPI_FROM_PMPI
@@ -47,8 +106,9 @@ int MPIR_Ibcast_binomial(void *buffer, int count, MPI_Datatype datatype, int roo
     MPI_Aint nbytes, type_size;
     int relative_rank;
     int src, dst;
+    struct MPIR_Ibcast_status *status;
     void *tmp_buf = NULL;
-    MPIR_SCHED_CHKPMEM_DECL(1);
+    MPIR_SCHED_CHKPMEM_DECL(2);
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -65,6 +125,9 @@ int MPIR_Ibcast_binomial(void *buffer, int count, MPI_Datatype datatype, int roo
     if (comm_ptr->is_hetero)
         is_homogeneous = 0;
 #endif
+    MPIR_SCHED_CHKPMEM_MALLOC(status, struct MPIR_Ibcast_status *,
+                              sizeof(struct MPIR_Ibcast_status), mpi_errno, "MPI_Stauts");
+
 
     /* MPI_Type_size() might not give the accurate size of the packed
      * datatype for heterogeneous systems (because of padding, encoding,
@@ -80,6 +143,8 @@ int MPIR_Ibcast_binomial(void *buffer, int count, MPI_Datatype datatype, int roo
 
     nbytes = type_size * count;
 
+    status->n_bytes = nbytes;
+
     if (!is_contig || !is_homogeneous)
     {
         MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, nbytes, mpi_errno, "tmp_buf");
@@ -125,13 +190,19 @@ int MPIR_Ibcast_binomial(void *buffer, int count, MPI_Datatype datatype, int roo
             src = rank - mask; 
             if (src < 0) src += comm_size;
             if (!is_contig || !is_homogeneous)
-                mpi_errno = MPID_Sched_recv(tmp_buf, nbytes, MPI_BYTE, src, comm_ptr, s);
+                mpi_errno = MPID_Sched_recv_status(tmp_buf, nbytes, MPI_BYTE, src,
+                                                    comm_ptr, &status->status, s);
             else
-                mpi_errno = MPID_Sched_recv(buffer, count, datatype, src, comm_ptr, s);
+                mpi_errno = MPID_Sched_recv_status(buffer, count, datatype, src,
+                                                   comm_ptr, &status->status, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
             MPID_SCHED_BARRIER(s);
-
+            if(is_homogeneous){
+                mpi_errno = MPID_Sched_cb(&sched_test_length, status, s);
+                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
+                MPID_SCHED_BARRIER(s);
+            }
             break;
         }
         mask <<= 1;
@@ -326,7 +397,8 @@ int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype
     MPID_Datatype *dtp;
     MPI_Aint true_extent, true_lb;
     void *tmp_buf;
-    MPIR_SCHED_CHKPMEM_DECL(1);
+    struct MPIR_Ibcast_status *status;
+    MPIR_SCHED_CHKPMEM_DECL(2);
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -344,6 +416,8 @@ int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype
         is_contig = dtp->is_contig;
     }
 
+    MPIR_SCHED_CHKPMEM_MALLOC(status, struct MPIR_Ibcast_status*,
+                              sizeof(struct MPIR_Ibcast_status), mpi_errno, "MPI_Status");
     is_homogeneous = 1;
 #ifdef MPID_HAS_HETERO
     if (comm_ptr->is_hetero)
@@ -354,7 +428,8 @@ int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype
     MPID_Datatype_get_size_macro(datatype, type_size);
 
     nbytes = type_size * count;
-
+    status->n_bytes = nbytes;
+    status->curr_bytes = 0;
     if (is_contig) {
         /* contiguous and homogeneous. no need to pack. */
         MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
@@ -423,9 +498,12 @@ int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype
                                         curr_size, MPI_BYTE, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             /* sendrecv, no barrier */
-            mpi_errno = MPID_Sched_recv(((char *)tmp_buf + recv_offset),
+            mpi_errno = MPID_Sched_recv_status(((char *)tmp_buf + recv_offset),
                                         incoming_count,
-                                        MPI_BYTE, dst, comm_ptr, s);
+                                        MPI_BYTE, dst, comm_ptr,&status->status, s);
+            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
+            MPID_SCHED_BARRIER(s);
+            mpi_errno = MPID_Sched_cb(&sched_add_length, status, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             MPID_SCHED_BARRIER(s);
 
@@ -502,8 +580,12 @@ int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype
 
                     /* nprocs_completed is also equal to the no. of processes
                        whose data we don't have */
-                    mpi_errno = MPID_Sched_recv(((char *)tmp_buf + offset),
-                                                incoming_count, MPI_BYTE, dst, comm_ptr, s);
+                    mpi_errno = MPID_Sched_recv_status(((char *)tmp_buf + offset),
+                                                incoming_count, MPI_BYTE, dst, comm_ptr,
+                                                &status->status, s);
+                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
+                    MPID_SCHED_BARRIER(s);
+                    mpi_errno = MPID_Sched_cb(&sched_add_length, status, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                     MPID_SCHED_BARRIER(s);
 
@@ -518,7 +600,10 @@ int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype
         mask <<= 1;
         i++;
     }
-
+    if(is_homogeneous){
+        mpi_errno = MPID_Sched_cb(&sched_test_curr_length, status, s);
+        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
+    }
     if (!is_contig) {
         if (rank != root) {
             mpi_errno = MPID_Sched_copy(tmp_buf, nbytes, MPI_BYTE,
@@ -565,7 +650,9 @@ int MPIR_Ibcast_scatter_ring_allgather(void *buffer, int count, MPI_Datatype dat
     MPI_Aint true_extent, true_lb;
     void *tmp_buf = NULL;
     MPID_Datatype *dtp = NULL;
-    MPIR_SCHED_CHKPMEM_DECL(3);
+
+    struct MPIR_Ibcast_status *status;
+    MPIR_SCHED_CHKPMEM_DECL(4);
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -587,10 +674,12 @@ int MPIR_Ibcast_scatter_ring_allgather(void *buffer, int count, MPI_Datatype dat
         is_homogeneous = 0;
 #endif
     MPIU_Assert(is_homogeneous); /* we don't handle the hetero case yet */
-
+    MPIR_SCHED_CHKPMEM_MALLOC(status, struct MPIR_Ibcast_status*,
+                              sizeof(struct MPIR_Ibcast_status), mpi_errno, "MPI_Status");
     MPID_Datatype_get_size_macro(datatype, type_size);
     nbytes = type_size * count;
-
+    status->n_bytes = nbytes;
+    status->curr_bytes = 0;
     if (is_contig) {
         /* contiguous, no need to pack. */
         MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
@@ -639,14 +728,19 @@ int MPIR_Ibcast_scatter_ring_allgather(void *buffer, int count, MPI_Datatype dat
                                     right_count, MPI_BYTE, right, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         /* sendrecv, no barrier here */
-        mpi_errno = MPID_Sched_recv(((char *)tmp_buf + left_disp),
-                                    left_count, MPI_BYTE, left, comm_ptr, s);
+        mpi_errno = MPID_Sched_recv_status(((char *)tmp_buf + left_disp),
+                                    left_count, MPI_BYTE, left, comm_ptr, &status->status, s);
+        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
+        MPID_SCHED_BARRIER(s);
+        mpi_errno = MPID_Sched_cb(&sched_add_length, status, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         MPID_SCHED_BARRIER(s);
 
         j     = jnext;
         jnext = (comm_size + jnext - 1) % comm_size;
     }
+    mpi_errno = MPID_Sched_cb(&sched_test_curr_length, status, s);
+    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
     if (!is_contig && rank != root) {
         mpi_errno = MPID_Sched_copy(tmp_buf, nbytes, MPI_BYTE, buffer, count, datatype, s);
@@ -674,10 +768,14 @@ int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MP
     int mpi_errno = MPI_SUCCESS;
     int is_homogeneous;
     MPI_Aint type_size;
+    struct MPIR_Ibcast_status *status;
+    MPIR_SCHED_CHKPMEM_DECL(1);
 
     if (!MPIR_CVAR_ENABLE_SMP_COLLECTIVES || !MPIR_CVAR_ENABLE_SMP_BCAST)
         MPID_Abort(comm_ptr, MPI_ERR_OTHER, 1, "SMP collectives are disabled!");
     MPIU_Assert(MPIR_Comm_is_node_aware(comm_ptr));
+    MPIR_SCHED_CHKPMEM_MALLOC(status, struct MPIR_Ibcast_status*,
+                              sizeof(struct MPIR_Ibcast_status), mpi_errno, "MPI_Status");
 
     is_homogeneous = 1;
 #ifdef MPID_HAS_HETERO
@@ -707,6 +805,7 @@ int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MP
     else
         MPIR_Pack_size_impl(1, datatype, &type_size);
 
+     status->n_bytes = type_size * count;
     /* TODO insert packing here */
 
     /* send to intranode-rank 0 on the root's node */
@@ -717,11 +816,15 @@ int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MP
             mpi_errno = MPID_Sched_send(buffer, count, datatype, 0, comm_ptr->node_comm, s);
         }
         else if (0 == comm_ptr->node_comm->rank) {
-            mpi_errno = MPID_Sched_recv(buffer, count, datatype, MPIU_Get_intranode_rank(comm_ptr, root),
-                                        comm_ptr->node_comm, s);
+            mpi_errno = MPID_Sched_recv_status(buffer, count, datatype, MPIU_Get_intranode_rank(comm_ptr, root),
+                                        comm_ptr->node_comm, &status->status, s);
         }
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         MPID_SCHED_BARRIER(s);
+        MPID_SCHED_BARRIER(s);
+        mpi_errno = MPID_Sched_cb(&sched_test_length, status, s);
+        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
+        MPID_SCHED_BARRIER(s);
     }
 
     /* perform the internode broadcast */
@@ -742,10 +845,11 @@ int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MP
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
+    MPIR_SCHED_CHKPMEM_COMMIT(s);
 fn_exit:
     return mpi_errno;
 fn_fail:
-    goto fn_exit;
+    MPIR_SCHED_CHKPMEM_REAP(s);
 }
 
 

http://git.mpich.org/mpich.git/commitdiff/0681d6976055185960b0ceee91f2554c3355c97e

commit 0681d6976055185960b0ceee91f2554c3355c97e
Author: Lena Oden <loden at anl.gov>
Date:   Mon Sep 21 17:58:20 2015 -0500

    mpid/common: fix scheduling of non-blocking recvs
    
    This patches fixes the handling of irecv calls which are added to a scheduler.
    The following things are fixed:
    1.) If the MPIC_Irecv call fails, we still have to wait for the completion of
        the request, otherwise there will be a memory leak.
    2.) If receive with status is used, the "received bytes count" must be set.
        This count is used by some non-blocking collectives (e.g. iallreduce).
    3.) If "irecv with status" is used, now the MPI_ERROR field is set.
        This was not done by the previously used MPIR_Request_extract_status.
    
    Refs #2300
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/src/mpid/common/sched/mpid_sched.c b/src/mpid/common/sched/mpid_sched.c
index 6f873d8..e669b22 100644
--- a/src/mpid/common/sched/mpid_sched.c
+++ b/src/mpid/common/sched/mpid_sched.c
@@ -183,8 +183,10 @@ static int MPIDU_Sched_start_entry(struct MPIDU_Sched *s, size_t idx, struct MPI
                         r->errflag = MPIR_ERR_OTHER;
                     }
                 }
-                e->status = MPIDU_SCHED_ENTRY_STATUS_FAILED;
-                MPIU_DBG_MSG_D(COMM, VERBOSE, "Sched SEND failed. Errflag: %d\n", (int) r->errflag);
+               /* We should set the status to failed here - since the request is not freed. this
+                * will be handled later in MPIDU_Sched_progress_state, so set to started here */
+                e->status = MPIDU_SCHED_ENTRY_STATUS_STARTED;
+                MPIU_DBG_MSG_D(COMM, VERBOSE, "Sched RECV failed. Errflag: %d\n", (int) r->errflag);
             } else {
                 e->status = MPIDU_SCHED_ENTRY_STATUS_STARTED;
             }
@@ -231,7 +233,7 @@ static int MPIDU_Sched_start_entry(struct MPIDU_Sched *s, size_t idx, struct MPI
                     }
                     e->status = MPIDU_SCHED_ENTRY_STATUS_FAILED;
                 } else {
-                    e->status = MPIDU_SCHED_ENTRY_STATUS_STARTED;
+                    e->status = MPIDU_SCHED_ENTRY_STATUS_COMPLETE;
                 }
             }
             else if (e->u.cb.cb_type == MPIDU_SCHED_CB_TYPE_2) {
@@ -248,13 +250,14 @@ static int MPIDU_Sched_start_entry(struct MPIDU_Sched *s, size_t idx, struct MPI
                     }
                     e->status = MPIDU_SCHED_ENTRY_STATUS_FAILED;
                 } else {
-                    e->status = MPIDU_SCHED_ENTRY_STATUS_STARTED;
+                    e->status = MPIDU_SCHED_ENTRY_STATUS_COMPLETE;
                 }
             }
             else {
                 MPIU_DBG_MSG_D(COMM, TYPICAL, "unknown callback type, e->u.cb.cb_type=%d", e->u.cb.cb_type);
+                e->status = MPIDU_SCHED_ENTRY_STATUS_COMPLETE;
             }
-            e->status = MPIDU_SCHED_ENTRY_STATUS_COMPLETE;
+
             break;
         default:
             MPIU_DBG_MSG_D(COMM, TYPICAL, "unknown entry type, e->type=%d", e->type);
@@ -608,7 +611,7 @@ int MPID_Sched_recv_status(void *buf, MPI_Aint count, MPI_Datatype datatype, int
     e->u.recv.rreq = NULL; /* will be populated by _start_entry */
     e->u.recv.comm = comm;
     e->u.recv.status = status;
-
+    status->MPI_ERROR = MPI_SUCCESS;
     MPIR_Comm_add_ref(comm);
     dtype_add_ref_if_not_builtin(datatype);
 
@@ -837,7 +840,6 @@ static int MPIDU_Sched_progress_state(struct MPIDU_Sched_state *state, int *made
     size_t i;
     struct MPIDU_Sched *s;
     struct MPIDU_Sched *tmp;
-
     if (made_progress)
         *made_progress = FALSE;
 
@@ -865,7 +867,12 @@ static int MPIDU_Sched_progress_state(struct MPIDU_Sched_state *state, int *made
                     if (e->u.recv.rreq != NULL && MPID_Request_is_complete(e->u.recv.rreq)) {
                         MPIU_DBG_MSG_FMT(COMM, VERBOSE, (MPIU_DBG_FDEST, "completed RECV entry %d, rreq=%p\n", (int) i, e->u.recv.rreq));
                         MPIR_Process_status(&e->u.recv.rreq->status, &s->req->errflag);
-                        MPIR_Request_extract_status(e->u.recv.rreq, e->u.recv.status);
+                        if (e->u.recv.status != MPI_STATUS_IGNORE) {
+                            int recvd;
+                            e->u.recv.status->MPI_ERROR = e->u.recv.rreq->status.MPI_ERROR;
+                            MPIR_Get_count_impl(&e->u.recv.rreq->status, MPI_BYTE, &recvd);
+                            MPIR_STATUS_SET_COUNT(*(e->u.recv.status), recvd);
+                        }
                         if (s->req->errflag != MPIR_ERR_NONE)
                             e->status = MPIDU_SCHED_ENTRY_STATUS_FAILED;
                         else

-----------------------------------------------------------------------

Summary of changes:
 src/mpi/coll/ibcast.c              |  142 +++++++++++++++++++++++++++++++-----
 src/mpid/common/sched/mpid_sched.c |   23 ++++--
 2 files changed, 138 insertions(+), 27 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list