[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.3-146-gdcd7ee6

Service Account noreply at mpich.org
Thu Nov 6 10:23:41 CST 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  dcd7ee6cd5fd0933651f1dc5a26637a33aff1112 (commit)
       via  9b6eea16e1302800f418451c6095f8eab4f5e36f (commit)
       via  5b0cfb3be4aa050709ec633c5cfa2860174cf51f (commit)
       via  c2be640e9d787337bf5260d1081b898668541183 (commit)
       via  e89e6c66ceeb93725f1024927347c2d2787d3934 (commit)
       via  d71154aa3a886f44c51e33ac045a2cf09301ff41 (commit)
      from  42ebe24073b4c68573327a646b3bea0a8b161d08 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/dcd7ee6cd5fd0933651f1dc5a26637a33aff1112

commit dcd7ee6cd5fd0933651f1dc5a26637a33aff1112
Author: Wesley Bland <wbland at anl.gov>
Date:   Wed Nov 5 14:32:38 2014 -0600

    Mark anysource as not failing anymore
    
    This test should be fixed by the latest commits.
    
    See #1945
    
    Signed-off-by: Huiwei Lu <huiweilu at mcs.anl.gov>

diff --git a/test/mpi/ft/testlist b/test/mpi/ft/testlist
index ed4ee5d..41fa987 100644
--- a/test/mpi/ft/testlist
+++ b/test/mpi/ft/testlist
@@ -12,7 +12,7 @@ gather 4 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=f
 reduce 4 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10
 bcast 4 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
 scatter 4 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
-anysource 3 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
+anysource 3 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10
 revoke_nofail 2 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
 shrink 8 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
 agree 4 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945

http://git.mpich.org/mpich.git/commitdiff/9b6eea16e1302800f418451c6095f8eab4f5e36f

commit 9b6eea16e1302800f418451c6095f8eab4f5e36f
Author: Wesley Bland <wbland at anl.gov>
Date:   Wed Nov 5 14:32:07 2014 -0600

    Improve the anysource test
    
    Improves the anysource test by doing more error checking to provide
    better output.
    
    Signed-off-by: Huiwei Lu <huiweilu at mcs.anl.gov>

diff --git a/test/mpi/ft/anysource.c b/test/mpi/ft/anysource.c
index 31459f6..650057a 100644
--- a/test/mpi/ft/anysource.c
+++ b/test/mpi/ft/anysource.c
@@ -15,8 +15,7 @@
 int main(int argc, char **argv)
 {
     int rank, size, err, ec;
-    char buf[10] = " No errors";
-    char error[MPI_MAX_ERROR_STRING];
+    char buf[10];
     MPI_Request request;
     MPI_Status status;
 
@@ -36,6 +35,7 @@ int main(int argc, char **argv)
 
     /* Make sure ANY_SOURCE returns correctly after a failure */
     if (rank == 0) {
+        char buf[10];
         err = MPI_Recv(buf, 10, MPI_CHAR, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
         if (MPI_SUCCESS == err) {
             fprintf(stderr, "Expected a failure for receive from ANY_SOURCE\n");
@@ -43,10 +43,29 @@ int main(int argc, char **argv)
         }
 
         /* Make sure that new ANY_SOURCE operations don't work yet */
-        MPI_Irecv(buf, 10, MPI_CHAR, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &request);
+        err = MPI_Irecv(buf, 10, MPI_CHAR, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &request);
+        if (request == MPI_REQUEST_NULL) {
+            fprintf(stderr, "Request for ANY_SOURCE receive is NULL\n");
+            MPI_Abort(MPI_COMM_WORLD, 1);
+        }
+        MPI_Error_class(err, &ec);
+        if (ec != MPI_SUCCESS && ec != MPIX_ERR_PROC_FAILED_PENDING) {
+            fprintf(stderr, "Expected SUCCESS or MPIX_ERR_PROC_FAILED_PENDING: %d\n", ec);
+            MPI_Abort(MPI_COMM_WORLD, 1);
+        }
+
         err = MPI_Wait(&request, &status);
-        if (MPI_SUCCESS == err) {
-            fprintf(stderr, "Expected a failure for receive from ANY_SOURCE\n");
+        MPI_Error_class(err, &ec);
+        if (MPIX_ERR_PROC_FAILED_PENDING != ec) {
+            fprintf(stderr, "Expected a MPIX_ERR_PROC_FAILED_PENDING (%d) for receive from ANY_SOURCE: %d\n", MPIX_ERR_PROC_FAILED_PENDING, ec);
+            fprintf(stderr, "BUF: %s\n", buf);
+            MPI_Abort(MPI_COMM_WORLD, 1);
+        }
+
+        err = MPI_Send(NULL, 0, MPI_INT, 2, 0, MPI_COMM_WORLD);
+        if (MPI_SUCCESS != err) {
+            MPI_Error_class(err, &ec);
+            fprintf(stderr, "MPI_SEND failed: %d", ec);
             MPI_Abort(MPI_COMM_WORLD, 1);
         }
 
@@ -55,21 +74,24 @@ int main(int argc, char **argv)
         err = MPI_Wait(&request, &status);
         if (MPI_SUCCESS != err) {
             MPI_Error_class(err, &ec);
-            MPI_Error_string(err, error, &size);
-            fprintf(stderr, "Unexpected failure after acknowledged failure (%d)\n%s", ec, error);
+            fprintf(stderr, "Unexpected failure after acknowledged failure (%d)\n", ec);
             MPI_Abort(MPI_COMM_WORLD, 1);
         }
 
-        fprintf(stdout, "%s\n", buf);
+        fprintf(stdout, " %s\n", buf);
     } else if (rank == 2) {
-        /* Make sure we don't send our first message too early */
-        sleep(2);
+        char buf[10] = "No errors";
+        err = MPI_Recv(NULL, 0, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+        if (MPI_SUCCESS != err) {
+            MPI_Error_class(err, &ec);
+            fprintf(stderr, "MPI_RECV failed: %d\n", ec);
+            MPI_Abort(MPI_COMM_WORLD, 1);
+        }
 
         err = MPI_Send(buf, 10, MPI_CHAR, 0, 0, MPI_COMM_WORLD);
         if (MPI_SUCCESS != err) {
             MPI_Error_class(err, &ec);
-            MPI_Error_string(err, error, &size);
-            fprintf(stderr, "Unexpected failure from MPI_Send (%d)\n%s", ec, error);
+            fprintf(stderr, "Unexpected failure from MPI_Send (%d)\n", ec);
             MPI_Abort(MPI_COMM_WORLD, 1);
         }
     }

http://git.mpich.org/mpich.git/commitdiff/5b0cfb3be4aa050709ec633c5cfa2860174cf51f

commit 5b0cfb3be4aa050709ec633c5cfa2860174cf51f
Author: Wesley Bland <wbland at anl.gov>
Date:   Wed Nov 5 14:28:03 2014 -0600

    Return request from IRECV even if failure
    
    We will now return a request handle from MPI_IRECV even if there is a
    failure. The reason for this is because the ULFM spec says that even if
    the function returns MPIX_ERR_PROC_FAILED_PENDING, it still should
    provide a valid request that can be completed later.
    
    This doesn't cause a problem for other situations because the value of
    the request is undefined in that scenario so it's fine for it to be
    garbage.
    
    Signed-off-by: Huiwei Lu <huiweilu at mcs.anl.gov>

diff --git a/src/mpi/pt2pt/irecv.c b/src/mpi/pt2pt/irecv.c
index ee9aa85..ab75840 100644
--- a/src/mpi/pt2pt/irecv.c
+++ b/src/mpi/pt2pt/irecv.c
@@ -125,14 +125,19 @@ int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source,
     
     mpi_errno = MPID_Irecv(buf, count, datatype, source, tag, comm_ptr, 
 			   MPID_CONTEXT_INTRA_PT2PT, &request_ptr);
-    if (mpi_errno != MPI_SUCCESS) goto fn_fail;
-
     /* return the handle of the request to the user */
     /* MPIU_OBJ_HANDLE_PUBLISH is unnecessary for irecv, lower-level access is
      * responsible for its own consistency, while upper-level field access is
      * controlled by the completion counter */
     *request = request_ptr->handle;
 
+    /* Put this part after setting the request so that if the request is
+     * pending (which is still considered an error), it will still be set
+     * correctly here. For real error cases, the user might get garbage as
+     * their request value, but that's fine since the definition is
+     * undefined anyway. */
+    if (mpi_errno != MPI_SUCCESS) goto fn_fail;
+
     /* ... end of body of routine ... */
     
   fn_exit:

http://git.mpich.org/mpich.git/commitdiff/c2be640e9d787337bf5260d1081b898668541183

commit c2be640e9d787337bf5260d1081b898668541183
Author: Wesley Bland <wbland at anl.gov>
Date:   Tue Nov 4 17:41:46 2014 -0600

    Check for pending any source ops
    
    Before calling the progress engine, make sure none of the operations
    should return an error for MPIX_ERR_PROC_FAILED_PENDING. They would
    cause the progress engine to hang (potentially) so we can't enter it.
    Instead, mark the appropriate error codes and return immediately.
    
    Signed-off-by: Huiwei Lu <huiweilu at mcs.anl.gov>

diff --git a/src/mpi/pt2pt/test.c b/src/mpi/pt2pt/test.c
index 5b5d325..38775bf 100644
--- a/src/mpi/pt2pt/test.c
+++ b/src/mpi/pt2pt/test.c
@@ -66,6 +66,10 @@ int MPIR_Test_impl(MPI_Request *request, int *flag, MPI_Status *status)
 	*flag = TRUE;
 	if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 	/* Fall through to the exit */
+    } else if (MPID_Request_is_pending_failure(request_ptr)) {
+        *flag = TRUE;
+        mpi_errno = request_ptr->status.MPI_ERROR;
+        goto fn_fail;
     }
         
  fn_exit:
diff --git a/src/mpi/pt2pt/testall.c b/src/mpi/pt2pt/testall.c
index fd6a41e..8acc813 100644
--- a/src/mpi/pt2pt/testall.c
+++ b/src/mpi/pt2pt/testall.c
@@ -166,13 +166,14 @@ int MPI_Testall(int count, MPI_Request array_of_requests[], int *flag,
                                                              &(array_of_statuses[i]));
 	    if (mpi_errno != MPI_SUCCESS) goto fn_fail;
 	}
-	if (request_ptrs[i] != NULL && MPID_Request_is_complete(request_ptrs[i]))
+	if (request_ptrs[i] != NULL && (MPID_Request_is_complete(request_ptrs[i]) || MPID_Request_is_pending_failure(request_ptrs[i])))
 	{
 	    n_completed++;
             rc = MPIR_Request_get_error(request_ptrs[i]);
             if (rc != MPI_SUCCESS)
             {
-                if (MPIX_ERR_PROC_FAILED == MPIR_ERR_GET_CLASS(rc))
+                if (MPIX_ERR_PROC_FAILED == MPIR_ERR_GET_CLASS(rc) ||
+                    MPIX_ERR_PROC_FAILED_PENDING == MPIR_ERR_GET_CLASS(rc))
                     proc_failure = 1;
                 mpi_errno = MPI_ERR_IN_STATUS;
             }
diff --git a/src/mpi/pt2pt/testany.c b/src/mpi/pt2pt/testany.c
index a3f0595..72112fa 100644
--- a/src/mpi/pt2pt/testany.c
+++ b/src/mpi/pt2pt/testany.c
@@ -183,7 +183,12 @@ int MPI_Testany(int count, MPI_Request array_of_requests[], int *indx,
 	    {
 		n_inactive += 1;
 	    }
-	}
+        } else if (request_ptrs[i] != NULL && MPID_Request_is_pending_failure(request_ptrs[i])) {
+            mpi_errno = request_ptrs[i]->status.MPI_ERROR;
+            *flag = TRUE;
+            *indx = i;
+            goto fn_fail;
+        }
     }
     
     if (n_inactive == count)
diff --git a/src/mpi/pt2pt/testsome.c b/src/mpi/pt2pt/testsome.c
index 58c1275..4f528b3 100644
--- a/src/mpi/pt2pt/testsome.c
+++ b/src/mpi/pt2pt/testsome.c
@@ -197,7 +197,13 @@ int MPI_Testsome(int incount, MPI_Request array_of_requests[], int *outcount,
 		request_ptrs[i] = NULL;
 		n_inactive += 1;
 	    }
-	}
+        } else if (request_ptrs[i] != NULL && MPID_Request_is_pending_failure(request_ptrs[i])) {
+            mpi_errno = MPI_ERR_IN_STATUS;
+            array_of_indices[n_active] = i;
+            n_active += 1;
+            rc = request_ptrs[i]->status.MPI_ERROR;
+            status_ptr->MPI_ERROR = rc;
+        }
     }
 
     if (mpi_errno == MPI_ERR_IN_STATUS)
diff --git a/src/mpi/pt2pt/wait.c b/src/mpi/pt2pt/wait.c
index 2d609ac..219dfd4 100644
--- a/src/mpi/pt2pt/wait.c
+++ b/src/mpi/pt2pt/wait.c
@@ -43,6 +43,11 @@ int MPIR_Wait_impl(MPI_Request *request, MPI_Status *status)
 
     MPID_Request_get_ptr(*request, request_ptr);
 
+    if (MPID_Request_is_pending_failure(request_ptr)) {
+        mpi_errno = request_ptr->status.MPI_ERROR;
+        goto fn_fail;
+    }
+
     if (!MPID_Request_is_complete(request_ptr))
     {
 	MPID_Progress_state progress_state;
diff --git a/src/mpi/pt2pt/waitall.c b/src/mpi/pt2pt/waitall.c
index b3a87fc..fe7c581 100644
--- a/src/mpi/pt2pt/waitall.c
+++ b/src/mpi/pt2pt/waitall.c
@@ -154,7 +154,7 @@ int MPIR_Waitall_impl(int count, MPI_Request array_of_requests[],
         }
         
         /* wait for ith request to complete */
-        while (!MPID_Request_is_complete(request_ptrs[i]))
+        while (!MPID_Request_is_complete(request_ptrs[i]) && !MPID_Request_is_pending_failure(request_ptrs[i]))
         {
             /* generalized requests should already be finished */
             MPIU_Assert(request_ptrs[i]->kind != MPID_UREQUEST);
@@ -168,9 +168,16 @@ int MPIR_Waitall_impl(int count, MPI_Request array_of_requests[],
             }
         }
 
-        /* complete the request and check the status */
-        status_ptr = (ignoring_statuses) ? MPI_STATUS_IGNORE : &array_of_statuses[i];
-        rc = MPIR_Request_complete(&array_of_requests[i], request_ptrs[i], status_ptr, &active_flag);
+        if (MPID_Request_is_complete(request_ptrs[i])) {
+            /* complete the request and check the status */
+            status_ptr = (ignoring_statuses) ? MPI_STATUS_IGNORE : &array_of_statuses[i];
+            rc = MPIR_Request_complete(&array_of_requests[i], request_ptrs[i], status_ptr, &active_flag);
+        } else {
+            /* If the request isn't complete, it's because it's pending due
+             * to a failure so set the rc accordingly. */
+            rc = request_ptrs[i]->status.MPI_ERROR;
+            proc_failure = 1;
+        }
         if (rc == MPI_SUCCESS)
         {
             request_ptrs[i] = NULL;
diff --git a/src/mpi/pt2pt/waitany.c b/src/mpi/pt2pt/waitany.c
index 7bb37d7..cd48d38 100644
--- a/src/mpi/pt2pt/waitany.c
+++ b/src/mpi/pt2pt/waitany.c
@@ -155,7 +155,12 @@ int MPI_Waitany(int count, MPI_Request array_of_requests[], int *indx,
                 continue;
             /* we found at least one non-null request */
             found_nonnull_req = TRUE;
-            
+
+            if (MPID_Request_is_pending_failure(request_ptrs[i])) {
+                mpi_errno = request_ptrs[i]->status.MPI_ERROR;
+                goto fn_progress_end_fail;
+            }
+
             if (request_ptrs[i]->kind == MPID_UREQUEST && request_ptrs[i]->greq_fns->poll_fn != NULL)
 	    {
                 /* this is a generalized request; make progress on it */
diff --git a/src/mpi/pt2pt/waitsome.c b/src/mpi/pt2pt/waitsome.c
index 00b9449..ee8cca3 100644
--- a/src/mpi/pt2pt/waitsome.c
+++ b/src/mpi/pt2pt/waitsome.c
@@ -221,7 +221,13 @@ int MPI_Waitsome(int incount, MPI_Request array_of_requests[],
 		    request_ptrs[i] = NULL;
 		    n_inactive += 1;
 		}
-	    }
+            } else if ( request_ptrs[i] != NULL && MPID_Request_is_pending_failure(request_ptrs[i])) {
+                n_active += 1;
+                mpi_errno = MPI_ERR_IN_STATUS;
+                if (status_ptr != MPI_STATUS_IGNORE) {
+                    status_ptr->MPI_ERROR = request_ptrs[i]->status.MPI_ERROR;
+                }
+            }
 	}
 
 	if (mpi_errno == MPI_ERR_IN_STATUS)

http://git.mpich.org/mpich.git/commitdiff/e89e6c66ceeb93725f1024927347c2d2787d3934

commit e89e6c66ceeb93725f1024927347c2d2787d3934
Author: Wesley Bland <wbland at anl.gov>
Date:   Tue Nov 4 17:39:13 2014 -0600

    Check for any source reqs that should be pending
    
    Anysource requests involving a communicator that isn't collectively
    active should be appended to the posted queue the error class should be
    marked as MPIX_ERR_PROC_FAILED_PENDING. The operation can still be
    completed later.
    
    Signed-off-by: Huiwei Lu <huiweilu at mcs.anl.gov>

diff --git a/src/mpid/ch3/src/ch3u_recvq.c b/src/mpid/ch3/src/ch3u_recvq.c
index e4b7a38..9290470 100644
--- a/src/mpid/ch3/src/ch3u_recvq.c
+++ b/src/mpid/ch3/src/ch3u_recvq.c
@@ -555,46 +555,47 @@ MPID_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag,
 	    } while (rreq);
 	}
 	else {
-	    if (tag == MPI_ANY_TAG)
-		match.parts.tag = mask.parts.tag = 0;
+        do { /* This loop is just to make it easy to break out if necessary */
+            if (tag == MPI_ANY_TAG)
+                match.parts.tag = mask.parts.tag = 0;
             if (source == MPI_ANY_SOURCE) {
                 if (!MPIDI_CH3I_Comm_AS_enabled(comm)) {
-                    MPIU_ERR_SET(mpi_errno, MPIX_ERR_PROC_FAILED, "**comm_fail");
-                    rreq->status.MPI_ERROR = mpi_errno;
-                    MPIDI_CH3U_Request_complete(rreq);
-                    goto lock_exit;
+                    /* If MPI_ANY_SOURCE is disabled right now, we should
+                     * just add this request to the posted queue instead and
+                     * return the appropriate error. */
+                    continue;
                 }
                 match.parts.rank = mask.parts.rank = 0;
             }
+            do {
+                MPIR_T_PVAR_COUNTER_INC(RECVQ, unexpected_recvq_match_attempts, 1);
+                if (MATCH_WITH_LEFT_MASK(rreq->dev.match, match, mask)) {
+                    if (prev_rreq != NULL) {
+                        prev_rreq->dev.next = rreq->dev.next;
+                    }
+                    else {
+                        recvq_unexpected_head = rreq->dev.next;
+                    }
+                    if (rreq->dev.next == NULL) {
+                        recvq_unexpected_tail = prev_rreq;
+                    }
+                    MPIR_T_PVAR_LEVEL_DEC(RECVQ, unexpected_recvq_length, 1);
 
-	    do {
-            MPIR_T_PVAR_COUNTER_INC(RECVQ, unexpected_recvq_match_attempts, 1);
-		if (MATCH_WITH_LEFT_MASK(rreq->dev.match, match, mask)) {
-		    if (prev_rreq != NULL) {
-			prev_rreq->dev.next = rreq->dev.next;
-		    }
-		    else {
-			recvq_unexpected_head = rreq->dev.next;
-		    }
-		    if (rreq->dev.next == NULL) {
-			recvq_unexpected_tail = prev_rreq;
-		    }
-            MPIR_T_PVAR_LEVEL_DEC(RECVQ, unexpected_recvq_length, 1);
-
-            if (MPIDI_Request_get_msg_type(rreq) == MPIDI_REQUEST_EAGER_MSG)
-                MPIR_T_PVAR_LEVEL_DEC(RECVQ, unexpected_recvq_buffer_size, rreq->dev.tmpbuf_sz);
+                    if (MPIDI_Request_get_msg_type(rreq) == MPIDI_REQUEST_EAGER_MSG)
+                        MPIR_T_PVAR_LEVEL_DEC(RECVQ, unexpected_recvq_buffer_size, rreq->dev.tmpbuf_sz);
 
-		    rreq->comm                 = comm;
-		    MPIR_Comm_add_ref(comm);
-		    rreq->dev.user_buf         = user_buf;
-		    rreq->dev.user_count       = user_count;
-		    rreq->dev.datatype         = datatype;
-		    found = TRUE;
-		    goto lock_exit;
-		}
-		prev_rreq = rreq;
-		rreq = rreq->dev.next;
-	    } while (rreq);
+                    rreq->comm                 = comm;
+                    MPIR_Comm_add_ref(comm);
+                    rreq->dev.user_buf         = user_buf;
+                    rreq->dev.user_count       = user_count;
+                    rreq->dev.datatype         = datatype;
+                    found = TRUE;
+                    goto lock_exit;
+                }
+                prev_rreq = rreq;
+                rreq = rreq->dev.next;
+            } while (rreq);
+        } while (false);
 	}
     }
     MPIR_T_PVAR_TIMER_END(RECVQ, time_matching_unexpectedq);
@@ -639,10 +640,14 @@ MPID_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag,
                 goto lock_exit;
             }
         } else if (!MPIDI_CH3I_Comm_AS_enabled(comm)) {
-            MPIU_ERR_SET(mpi_errno, MPIX_ERR_PROC_FAILED, "**comm_fail");
+            /* If this receive is for MPI_ANY_SOURCE, we will still add the
+            * request to the queue for now, but we will also set the error
+            * class to MPIX_ERR_PROC_FAILED_PENDING since the request shouldn't
+            * be matched as long as there is a failure pending. This will get
+            * checked again later during the completion function to see if the
+            * request can be completed at that time. */
+            MPIU_ERR_SET(mpi_errno, MPIX_ERR_PROC_FAILED_PENDING, "**failure_pending");
             rreq->status.MPI_ERROR = mpi_errno;
-            MPIDI_CH3U_Request_complete(rreq);
-            goto lock_exit;
         }
         
 	rreq->dev.next = NULL;
diff --git a/src/mpid/ch3/src/mpid_irecv.c b/src/mpid/ch3/src/mpid_irecv.c
index 7a30ad4..c0401e9 100644
--- a/src/mpid/ch3/src/mpid_irecv.c
+++ b/src/mpid/ch3/src/mpid_irecv.c
@@ -49,6 +49,10 @@ int MPID_Irecv(void * buf, int count, MPI_Datatype datatype, int rank, int tag,
 	MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomemreq");
     }
 
+    if (MPIR_ERR_GET_CLASS(rreq->status.MPI_ERROR) == MPIX_ERR_PROC_FAILED_PENDING) {
+        mpi_errno = rreq->status.MPI_ERROR;
+    }
+
     if (found)
     {
 	MPIDI_VC_t * vc;

http://git.mpich.org/mpich.git/commitdiff/d71154aa3a886f44c51e33ac045a2cf09301ff41

commit d71154aa3a886f44c51e33ac045a2cf09301ff41
Author: Wesley Bland <wbland at anl.gov>
Date:   Tue Nov 4 17:36:38 2014 -0600

    Add MPID_Request_is_pending_failure
    
    This function will check to see if a request can be completed if it
    involves MPI_ANY_SOURCE. An any source request cannot be completed if
    the communicator is not collectively active.
    
    Signed-off-by: Huiwei Lu <huiweilu at mcs.anl.gov>

diff --git a/src/include/mpiimpl.h b/src/include/mpiimpl.h
index 40389f5..a6c0ad0 100644
--- a/src/include/mpiimpl.h
+++ b/src/include/mpiimpl.h
@@ -3621,6 +3621,23 @@ void MPID_Request_set_completed(MPID_Request *);
 @*/
 void MPID_Request_release(MPID_Request *);
 
+/*@
+  MPID_Request_is_pending_failure - Check if a request is pending because of a process failures
+
+  Input Parameter:
+  request - request to check
+
+  Return value:
+  0 - The request is not pending because of a failure
+  Non-zero - The request is pending because of a failure
+
+  Notes:
+  This routine checks to see if the communicator used in the request can
+  participate in MPI_ANY_SOURCE operations and if this request is already
+  pending due to a process failure.
+@*/
+int MPID_Request_is_pending_failure(MPID_Request *);
+
 typedef struct MPID_Grequest_class {
      MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
      MPI_Grequest_query_function *query_fn;
diff --git a/src/mpid/ch3/src/mpid_comm_failure_ack.c b/src/mpid/ch3/src/mpid_comm_failure_ack.c
index e5034a1..700c57e 100644
--- a/src/mpid/ch3/src/mpid_comm_failure_ack.c
+++ b/src/mpid/ch3/src/mpid_comm_failure_ack.c
@@ -28,6 +28,9 @@ int MPID_Comm_failure_ack(MPID_Comm *comm_ptr)
      * communciator. */
     comm_ptr->dev.last_ack_rank = MPIDI_last_known_failed;
 
+    /* Mark the communicator as any source active */
+    comm_ptr->dev.anysource_enabled = 1;
+
 fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_COMM_FAILURE_ACK);
     return mpi_errno;
@@ -144,3 +147,34 @@ int MPID_Comm_failed_bitarray(MPID_Comm *comm_ptr, uint32_t **bitarray, int acke
   fn_fail:
     goto fn_exit;
 }
+
+#undef FUNCNAME
+#define FUNCNAME MPID_Request_is_pending_failure
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPID_Request_is_pending_failure(MPID_Request *request_ptr)
+{
+    int ret = 0;
+    MPIDI_STATE_DECL(MPID_STATE_REQUEST_IS_PENDING_FAILURE);
+    MPIDI_FUNC_ENTER(MPID_STATE_REQUEST_IS_PENDING_FAILURE);
+
+    if (request_ptr->dev.match.parts.rank != MPI_ANY_SOURCE) {
+        goto fn_exit;
+    }
+
+    /* If the request is pending and the communicator has MPI_ANY_SOURCE
+     * enabled, then we can mark the request as not pending and let the
+     * request continue. */
+    if (MPIDI_CH3I_Comm_AS_enabled(request_ptr->comm)) {
+        request_ptr->status.MPI_ERROR = MPI_SUCCESS;
+        goto fn_exit;
+    }
+
+    /* Otherwise, the request shouldn't go into the progress engine. */
+    ret = 1;
+
+fn_exit:
+    MPIU_DBG_MSG_S(CH3_OTHER, VERBOSE, "Request is%spending failure", ret ? " " : " not ");
+    MPIDI_FUNC_EXIT(MPID_STATE_REQUEST_IS_PENDING_FAILURE);
+    return ret;
+}
diff --git a/src/mpid/pamid/src/misc/mpid_unimpl.c b/src/mpid/pamid/src/misc/mpid_unimpl.c
index 4472c1e..c43e10e 100644
--- a/src/mpid/pamid/src/misc/mpid_unimpl.c
+++ b/src/mpid/pamid/src/misc/mpid_unimpl.c
@@ -101,3 +101,9 @@ int MPID_Comm_revoke(MPID_Comm *comm_ptr, int is_remote)
   MPID_abort();
   return 0;
 }
+
+int MPID_Request_is_pending_failure(MPID_Request *request_ptr)
+{
+  MPID_abort();
+  return 0;
+}

-----------------------------------------------------------------------

Summary of changes:
 src/include/mpiimpl.h                    |   17 +++++++
 src/mpi/pt2pt/irecv.c                    |    9 +++-
 src/mpi/pt2pt/test.c                     |    4 ++
 src/mpi/pt2pt/testall.c                  |    5 +-
 src/mpi/pt2pt/testany.c                  |    7 ++-
 src/mpi/pt2pt/testsome.c                 |    8 +++-
 src/mpi/pt2pt/wait.c                     |    5 ++
 src/mpi/pt2pt/waitall.c                  |   15 ++++--
 src/mpi/pt2pt/waitany.c                  |    7 ++-
 src/mpi/pt2pt/waitsome.c                 |    8 +++-
 src/mpid/ch3/src/ch3u_recvq.c            |   77 ++++++++++++++++--------------
 src/mpid/ch3/src/mpid_comm_failure_ack.c |   34 +++++++++++++
 src/mpid/ch3/src/mpid_irecv.c            |    4 ++
 src/mpid/pamid/src/misc/mpid_unimpl.c    |    6 ++
 test/mpi/ft/anysource.c                  |   46 +++++++++++++-----
 test/mpi/ft/testlist                     |    2 +-
 16 files changed, 193 insertions(+), 61 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list