[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1b1-78-g3b3bd06

mysql vizuser noreply at mpich.org
Wed Oct 2 11:14:54 CDT 2013


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  3b3bd0654f69b0415a7a5d07ef3379fac0b6b799 (commit)
       via  3e078e34d87f0ee664329cdd4515e6b1acd09906 (commit)
       via  49fa4d51d40d8d98a912cd158714cc03d5ed1e67 (commit)
      from  8c53c3c2b0879309a635822a4d6d4dd46dd6fff5 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/3b3bd0654f69b0415a7a5d07ef3379fac0b6b799

commit 3b3bd0654f69b0415a7a5d07ef3379fac0b6b799
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Tue Sep 3 13:27:46 2013 -0500

    Fault tolerance tests for some collectives
    
    All functions in these tests return errors to all processes when
    there is a failure in the communicator. Mark failing tests as xfail.
    
    Signed-off-by: Wesley Bland <wbland at mcs.anl.gov>

diff --git a/test/mpi/ft/Makefile.am b/test/mpi/ft/Makefile.am
index ccd6270..4b70c96 100644
--- a/test/mpi/ft/Makefile.am
+++ b/test/mpi/ft/Makefile.am
@@ -10,4 +10,4 @@ include $(top_srcdir)/Makefile.mtest
 ## for all programs that are just built from the single corresponding source
 ## file, we don't need per-target _SOURCES rules, automake will infer them
 ## correctly
-noinst_PROGRAMS = die abort sendalive isendalive senddead recvdead isenddead irecvdead
+noinst_PROGRAMS = die abort sendalive isendalive senddead recvdead isenddead irecvdead barrier gather reduce
diff --git a/test/mpi/ft/barrier.c b/test/mpi/ft/barrier.c
new file mode 100644
index 0000000..2072698
--- /dev/null
+++ b/test/mpi/ft/barrier.c
@@ -0,0 +1,59 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ *  (C) 2003 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+#include "mpi.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * This test attempts collective communication after a process in
+ * the communicator has failed. Since all processes contribute to
+ * the result of the operation, all process will receive an error.
+ */
+int main(int argc, char **argv)
+{
+    int rank, size;
+    int err, errclass;
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
+
+    if (size < 2) {
+        fprintf( stderr, "Must run with at least 2 processes\n" );
+        MPI_Abort( MPI_COMM_WORLD, 1 );
+    }
+
+    if (rank == 1) {
+        exit(EXIT_FAILURE);
+    }
+
+    err = MPI_Barrier(MPI_COMM_WORLD);
+
+    if (rank == 0) {
+#if defined (MPICH) && (MPICH_NUMVERSION >= 30100102)
+        MPI_Error_class(err, &errclass);
+        if (errclass == MPIX_ERR_PROC_FAIL_STOP) {
+            printf(" No Errors\n");
+            fflush(stdout);
+        } else {
+            fprintf(stderr, "Wrong error code (%d) returned. Expected MPIX_ERR_PROC_FAIL_STOP\n", errclass);
+        }
+#else
+        if (err) {
+            printf(" No Errors\n");
+            fflush(stdout);
+        } else {
+            fprintf(stderr, "Program reported MPI_SUCCESS, but an error code was expected.\n");
+        }
+#endif
+    }
+
+    MPI_Finalize();
+
+    return 0;
+}
diff --git a/test/mpi/ft/gather.c b/test/mpi/ft/gather.c
new file mode 100644
index 0000000..6ac2a6e
--- /dev/null
+++ b/test/mpi/ft/gather.c
@@ -0,0 +1,64 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ *  (C) 2003 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+#include "mpi.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * This test attempts collective communication after a process in
+ * the communicator has failed. Since all processes contribute to
+ * the result of the operation, all process will receive an error.
+ */
+int main(int argc, char **argv)
+{
+    int rank, size, err, errclass;
+    int sendbuf[1] = { 42 };
+    int *recvbuf;
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
+
+    if (size < 3) {
+        fprintf( stderr, "Must run with at least 3 processes\n" );
+        MPI_Abort( MPI_COMM_WORLD, 1 );
+    }
+
+    if (rank == 1) {
+        exit(EXIT_FAILURE);
+    }
+
+    recvbuf = (int *)malloc(size*sizeof(int));
+
+    err = MPI_Gather(sendbuf, 1, MPI_INT, recvbuf, size, MPI_INT, 0, MPI_COMM_WORLD);
+
+    if (rank == 0) {
+#if defined (MPICH) && (MPICH_NUMVERSION >= 30100102)
+        MPI_Error_class(err, &errclass);
+        if (errclass == MPIX_ERR_PROC_FAIL_STOP) {
+            printf(" No Errors\n");
+            fflush(stdout);
+        } else {
+            fprintf(stderr, "Wrong error code (%d) returned. Expected MPIX_ERR_PROC_FAIL_STOP\n", errclass);
+        }
+#else
+        if (err) {
+            printf(" No Errors\n");
+            fflush(stdout);
+        } else {
+            fprintf(stderr, "Program reported MPI_SUCCESS, but an error code was expected.\n");
+        }
+#endif
+    }
+
+    free(recvbuf);
+
+    MPI_Finalize();
+
+    return 0;
+}
diff --git a/test/mpi/ft/reduce.c b/test/mpi/ft/reduce.c
new file mode 100644
index 0000000..30664b5
--- /dev/null
+++ b/test/mpi/ft/reduce.c
@@ -0,0 +1,60 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ *  (C) 2003 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+#include "mpi.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * This test attempts collective communication after a process in
+ * the communicator has failed. Since all processes contribute to
+ * the result of the operation, all process will receive an error.
+ */
+int main(int argc, char **argv)
+{
+    int rank, size, err, errclass;
+    int sendbuf[1] = { 42 };
+    int recvbuf[1];
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
+
+    if (size < 3) {
+        fprintf( stderr, "Must run with at least 3 processes\n" );
+        MPI_Abort( MPI_COMM_WORLD, 1 );
+    }
+
+    if (rank == 1) {
+        exit(EXIT_FAILURE);
+    }
+
+    err = MPI_Reduce(sendbuf, recvbuf, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+
+    if (rank == 0) {
+#if defined (MPICH) && (MPICH_NUMVERSION >= 30100102)
+        MPI_Error_class(err, &errclass);
+        if (errclass == MPIX_ERR_PROC_FAIL_STOP) {
+            printf(" No Errors\n");
+            fflush(stdout);
+        } else {
+            fprintf(stderr, "Wrong error code (%d) returned. Expected MPIX_ERR_PROC_FAIL_STOP\n", errclass);
+        }
+#else
+        if (err) {
+            printf(" No Errors\n");
+            fflush(stdout);
+        } else {
+            fprintf(stderr, "Program reported MPI_SUCCESS, but an error code was expected.\n");
+        }
+#endif
+    }
+
+    MPI_Finalize();
+
+    return 0;
+}
diff --git a/test/mpi/ft/testlist b/test/mpi/ft/testlist
index 905aa6d..3bb71a0 100644
--- a/test/mpi/ft/testlist
+++ b/test/mpi/ft/testlist
@@ -6,3 +6,6 @@ senddead 2 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict
 recvdead 2 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10
 isenddead 2 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
 irecvdead 2 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10
+barrier 4 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
+gather 4 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
+reduce 4 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945

http://git.mpich.org/mpich.git/commitdiff/3e078e34d87f0ee664329cdd4515e6b1acd09906

commit 3e078e34d87f0ee664329cdd4515e6b1acd09906
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Fri Aug 23 11:18:02 2013 -0500

    Fixup and add pt2pt fault tolerance tests
    
    pt2pt tests for blocking and non-blocking sends and recvs within
    a communicator with a failed process. Mark failing tests as xfail.
    
    Signed-off-by: Wesley Bland <wbland at mcs.anl.gov>

diff --git a/test/mpi/ft/Makefile.am b/test/mpi/ft/Makefile.am
index 03e7578..ccd6270 100644
--- a/test/mpi/ft/Makefile.am
+++ b/test/mpi/ft/Makefile.am
@@ -10,4 +10,4 @@ include $(top_srcdir)/Makefile.mtest
 ## for all programs that are just built from the single corresponding source
 ## file, we don't need per-target _SOURCES rules, automake will infer them
 ## correctly
-noinst_PROGRAMS = die abort send
+noinst_PROGRAMS = die abort sendalive isendalive senddead recvdead isenddead irecvdead
diff --git a/test/mpi/ft/irecvdead.c b/test/mpi/ft/irecvdead.c
new file mode 100644
index 0000000..bf20dc1
--- /dev/null
+++ b/test/mpi/ft/irecvdead.c
@@ -0,0 +1,63 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ *  (C) 2003 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+#include "mpi.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * This test attempts MPI_Irecv with the source being a dead process. It should fail
+ * and return an error at completion. If we are testing sufficiently new MPICH, we
+ * look for the MPIX_ERR_PROC_FAIL_STOP error code. These should be converted to look
+ * for the standarized error code once it is finalized.
+ */
+int main(int argc, char **argv)
+{
+    int rank, size, err, errclass;
+    MPI_Request request;
+    char buf[10];
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    if (size < 2) {
+        fprintf( stderr, "Must run with at least 2 processes\n" );
+        MPI_Abort( MPI_COMM_WORLD, 1 );
+    }
+
+    if (rank == 1) {
+        exit(EXIT_FAILURE);
+    }
+
+    if (rank == 0) {
+        MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
+        err = MPI_Irecv(buf, 1, MPI_CHAR, 1, 0, MPI_COMM_WORLD, &request);
+        if (err)
+            fprintf(stderr, "MPI_Irecv returned an error");
+
+        err = MPI_Wait(&request, MPI_STATUS_IGNORE);
+#if defined (MPICH) && (MPICH_NUMVERSION >= 30100102)
+        MPI_Error_class(err, &errclass);
+        if (errclass == MPIX_ERR_PROC_FAIL_STOP) {
+            printf(" No Errors\n");
+            fflush(stdout);
+        } else {
+            fprintf(stderr, "Wrong error code (%d) returned. Expected MPIX_ERR_PROC_FAIL_STOP\n", errclass);
+        }
+#else
+        if (err) {
+            printf(" No Errors\n");
+            fflush(stdout);
+        } else {
+            fprintf(stderr, "Program reported MPI_SUCCESS, but an error code was expected.\n");
+        }
+#endif
+    }
+
+    MPI_Finalize();
+
+    return 0;
+}
diff --git a/test/mpi/ft/isendalive.c b/test/mpi/ft/isendalive.c
new file mode 100644
index 0000000..e705e2b
--- /dev/null
+++ b/test/mpi/ft/isendalive.c
@@ -0,0 +1,56 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ *  (C) 2003 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * This test attempts communication between 2 running processes
+ * after another process has failed. The communication should complete
+ * successfully.
+ */
+int main(int argc, char **argv)
+{
+    int rank, size, err;
+    char buf[10];
+    MPI_Request request;
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    if (size < 3) {
+        fprintf( stderr, "Must run with at least 3 processes\n" );
+        MPI_Abort(MPI_COMM_WORLD, 1);
+    }
+
+    if (rank == 1) {
+        exit(EXIT_FAILURE);
+    }
+
+    if (rank == 0) {
+        err =  MPI_Isend("No Errors", 10, MPI_CHAR, 2, 0, MPI_COMM_WORLD, &request);
+        err += MPI_Wait(&request, MPI_STATUS_IGNORE);
+        if (err) {
+            fprintf(stderr, "An error occurred during the send operation\n");
+        }
+    }
+
+    if (rank == 2) {
+        err =  MPI_Irecv(buf, 10, MPI_CHAR, 0, 0, MPI_COMM_WORLD, &request);
+        err += MPI_Wait(&request, MPI_STATUS_IGNORE);
+        if (err) {
+            fprintf(stderr, "An error occurred during the recv operation\n");
+        } else {
+            printf(" %s\n", buf);
+            fflush(stdout);
+        }
+    }
+
+    MPI_Finalize();
+
+    return 0;
+}
diff --git a/test/mpi/ft/isenddead.c b/test/mpi/ft/isenddead.c
new file mode 100644
index 0000000..3440b57
--- /dev/null
+++ b/test/mpi/ft/isenddead.c
@@ -0,0 +1,58 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ *  (C) 2003 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+#include "mpi.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * This test attempts to MPI_Isend with the destination being a dead process.
+ * The communication should succeed or report an error. It must not block
+ * indefinitely.
+ */
+int main(int argc, char **argv)
+{
+    int rank, size, err, errclass;
+    char buf[100000];
+    MPI_Request request;
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    if (size < 2) {
+        fprintf( stderr, "Must run with at least 2 processes\n" );
+        MPI_Abort( MPI_COMM_WORLD, 1 );
+    }
+
+    if (rank == 1) {
+        exit(EXIT_FAILURE);
+    }
+
+    if (rank == 0) {
+        MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
+        err = MPI_Isend(buf, 100000, MPI_CHAR, 1, 0, MPI_COMM_WORLD, &request);
+        if (err)
+            fprintf(stderr, "MPI_Isend returned error\n");
+
+        err = MPI_Wait(&request, MPI_STATUS_IGNORE);
+#if defined (MPICH) && (MPICH_NUMVERSION >= 30100102)
+        MPI_Error_class(err, &errclass);
+        if ((err) && (errclass != MPIX_ERR_PROC_FAIL_STOP)) {
+            fprintf(stderr, "Wrong error code (%d) returned. Expected MPIX_ERR_PROC_FAIL_STOP\n", errclass);
+        } else {
+            printf(" No Errors\n");
+            fflush(stdout);
+        }
+#else
+        printf(" No Errors\n");
+        fflush(stdout);
+#endif
+    }
+
+    MPI_Finalize();
+
+    return 0;
+}
diff --git a/test/mpi/ft/recvdead.c b/test/mpi/ft/recvdead.c
new file mode 100644
index 0000000..5b194ca
--- /dev/null
+++ b/test/mpi/ft/recvdead.c
@@ -0,0 +1,58 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ *  (C) 2003 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+#include "mpi.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * This test attempts MPI_Recv with the source being a dead process. It should fail
+ * and return an error. If we are testing sufficiently new MPICH, we look for the
+ * MPIX_ERR_PROC_FAIL_STOP error code. These should be converted to look for the
+ * standarized error code once it is finalized.
+ */
+int main(int argc, char **argv)
+{
+    int rank, size, err, errclass;
+    char buf[10];
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    if (size < 2) {
+        fprintf( stderr, "Must run with at least 2 processes\n" );
+        MPI_Abort( MPI_COMM_WORLD, 1 );
+    }
+
+    if (rank == 1) {
+        exit(EXIT_FAILURE);
+    }
+
+    if (rank == 0) {
+        MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
+        err = MPI_Recv(buf, 1, MPI_CHAR, 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+#if defined (MPICH) && (MPICH_NUMVERSION >= 30100102)
+        MPI_Error_class(err, &errclass);
+        if (errclass == MPIX_ERR_PROC_FAIL_STOP) {
+            printf(" No Errors\n");
+            fflush(stdout);
+        } else {
+            fprintf(stderr, "Wrong error code (%d) returned. Expected MPIX_ERR_PROC_FAIL_STOP\n", errclass);
+        }
+#else
+        if (err) {
+            printf(" No Errors\n");
+            fflush(stdout);
+        } else {
+            fprintf(stderr, "Program reported MPI_SUCCESS, but an error code was expected.\n");
+        }
+#endif
+    }
+
+    MPI_Finalize();
+
+    return 0;
+}
diff --git a/test/mpi/ft/send.c b/test/mpi/ft/sendalive.c
similarity index 64%
rename from test/mpi/ft/send.c
rename to test/mpi/ft/sendalive.c
index 6425bc2..4be21ff 100644
--- a/test/mpi/ft/send.c
+++ b/test/mpi/ft/sendalive.c
@@ -8,7 +8,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-/* 
+/*
  * This test attempts communication between 2 running processes
  * after another process has failed.
  */
@@ -26,12 +26,19 @@ int main(int argc, char **argv)
 
     if (rank == 0) {
         err = MPI_Send("No Errors", 10, MPI_CHAR, 2, 0, MPI_COMM_WORLD);
+        if (err) {
+            fprintf(stderr, "An error occurred during the send operation\n");
+        }
     }
 
     if (rank == 2) {
-        MPI_Recv(buf, 10, MPI_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-        printf(" %s\n", buf);
-        fflush( stdout );
+        err = MPI_Recv(buf, 10, MPI_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+        if (err) {
+            fprintf(stderr, "An error occurred during the recv operation\n");
+        } else {
+            printf(" %s\n", buf);
+            fflush(stdout);
+        }
     }
 
     MPI_Finalize();
diff --git a/test/mpi/ft/senddead.c b/test/mpi/ft/senddead.c
new file mode 100644
index 0000000..6df36f1
--- /dev/null
+++ b/test/mpi/ft/senddead.c
@@ -0,0 +1,53 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ *  (C) 2003 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+#include "mpi.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * This test attempts to MPI_Send with the destination being a dead process.
+ * The communication should succeed or report an error. It must not block
+ * indefinitely.
+ */
+int main(int argc, char **argv)
+{
+    int rank, size, err, errclass;
+    char buf[100000];
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    if (size < 2) {
+        fprintf( stderr, "Must run with at least 2 processes\n" );
+        MPI_Abort( MPI_COMM_WORLD, 1 );
+    }
+
+    if (rank == 1) {
+        exit(EXIT_FAILURE);
+    }
+
+    if (rank == 0) {
+        MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
+        err = MPI_Send(buf, 100000, MPI_CHAR, 1, 0, MPI_COMM_WORLD);
+#if defined (MPICH) && (MPICH_NUMVERSION >= 30100102)
+        MPI_Error_class(err, &errclass);
+        if ((err) && (errclass != MPIX_ERR_PROC_FAIL_STOP)) {
+            fprintf(stderr, "Wrong error code (%d) returned. Expected MPIX_ERR_PROC_FAIL_STOP\n", errclass);
+        } else {
+            printf(" No Errors\n");
+            fflush(stdout);
+        }
+#else
+        printf(" No Errors\n");
+        fflush(stdout);
+#endif
+    }
+
+    MPI_Finalize();
+
+    return 0;
+}
diff --git a/test/mpi/ft/testlist b/test/mpi/ft/testlist
index 560ada5..905aa6d 100644
--- a/test/mpi/ft/testlist
+++ b/test/mpi/ft/testlist
@@ -1,3 +1,8 @@
 die 4 mpiexecarg=-disable-auto-cleanup timeLimit=10 strict=false resultTest=TestStatusNoErrors
 abort 2 mpiexecarg=-disable-auto-cleanup timeLimit=10 strict=false xfail=ticket1537
-send 4 mpiexecarg=-disable-auto-cleanup timeLimit=10 strict=false resultTest=TestStatusNoErrors
+sendalive 4 mpiexecarg=-disable-auto-cleanup timeLimit=10 strict=false resultTest=TestStatusNoErrors
+isendalive 3 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false
+senddead 2 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
+recvdead 2 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10
+isenddead 2 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10 xfail=ticket1945
+irecvdead 2 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors strict=false timeLimit=10

http://git.mpich.org/mpich.git/commitdiff/49fa4d51d40d8d98a912cd158714cc03d5ed1e67

commit 49fa4d51d40d8d98a912cd158714cc03d5ed1e67
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Tue Aug 20 12:55:56 2013 -0500

    Add resultTest routine to test suite.
    
    Fault tolerance tests should test for program correctness even in
    cases where mpiexec returns a non-zero exit code.
    
    Signed-off-by: Wesley Bland <wbland at mcs.anl.gov>

diff --git a/test/mpi/ft/die.c b/test/mpi/ft/die.c
index 3c5c6d7..990d274 100644
--- a/test/mpi/ft/die.c
+++ b/test/mpi/ft/die.c
@@ -16,7 +16,7 @@ int main(int argc, char **argv)
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 
     if (rank == 1) {
-        exit(0);
+        exit(EXIT_FAILURE);
     }
 
     if (rank == 0) {
diff --git a/test/mpi/ft/send.c b/test/mpi/ft/send.c
index 963a066..6425bc2 100644
--- a/test/mpi/ft/send.c
+++ b/test/mpi/ft/send.c
@@ -21,7 +21,7 @@ int main(int argc, char **argv)
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 
     if (rank == 1) {
-        exit(0);
+        exit(EXIT_FAILURE);
     }
 
     if (rank == 0) {
diff --git a/test/mpi/ft/testlist b/test/mpi/ft/testlist
index 584e20e..560ada5 100644
--- a/test/mpi/ft/testlist
+++ b/test/mpi/ft/testlist
@@ -1,3 +1,3 @@
-die 4 mpiexecarg=-disable-auto-cleanup timeLimit=10 strict=false
+die 4 mpiexecarg=-disable-auto-cleanup timeLimit=10 strict=false resultTest=TestStatusNoErrors
 abort 2 mpiexecarg=-disable-auto-cleanup timeLimit=10 strict=false xfail=ticket1537
-send 4 mpiexecarg=-disable-auto-cleanup timeLimit=10 strict=false
+send 4 mpiexecarg=-disable-auto-cleanup timeLimit=10 strict=false resultTest=TestStatusNoErrors
diff --git a/test/mpi/runtests.in b/test/mpi/runtests.in
index b318980..8f8ea0a 100644
--- a/test/mpi/runtests.in
+++ b/test/mpi/runtests.in
@@ -874,6 +874,58 @@ sub TestStatus {
     }
     return ($found_error,$inline);
 }
+# ----------------------------------------------------------------------------
+#
+# TestStatusNoErrors is like TestStatus except that it also checks for " No Errors"
+# This is useful for fault tolerance tests where mpiexec returns a non-zero status
+# because of a failed process, but still outputs " No Errors" when the correct
+# behavior is detected.
+sub TestStatusNoErrors {
+    my $MPIOUT = $_[0];
+    my $programname = $_[1];
+    my $found_error = 0;
+    my $found_noerror = 0;
+
+    my $inline = "";
+    while (<MPIOUT>) {
+	print STDOUT $_ if $verbose;
+	# Skip FORTRAN STOP
+	if (/FORTRAN STOP/) { next; }
+	$inline .= $_;
+	if (/^\s*No [Ee]rrors\s*$/ && $found_noerror == 0) {
+	    $found_noerror = 1;
+	}
+	if (! /^\s*No [Ee]rrors\s*$/ && !/^\s*Test Passed\s*$/) {
+	    print STDERR "Unexpected output in $programname: $_";
+	    if (!$found_error) {
+		$found_error = 1;
+		$err_count ++;
+	    }
+	}
+    }
+    if ($found_noerror == 0) {
+	print STDERR "Program $programname exited without No Errors\n";
+	if (!$found_error) {
+	    $found_error = 1;
+	    $err_count ++;
+	}
+    }
+    $rc = close ( MPIOUT );
+    if ($rc == 0) {
+	$run_status = $?;
+	$signal_num = $run_status & 127;
+	if ($run_status > 255) { $run_status >>= 8; }
+    }
+    else {
+	# This test *requires* non-zero return codes
+        if (!$found_error) {
+	    $found_error = 1;
+	    $err_count ++;
+        }
+	$inline .= "$mpiexec returned a zero status but the program required a non-zero status\n";
+    }
+    return ($found_error,$inline);
+}
 #
 # TestTimeout is a special test that reports success *only* when the 
 # status return is NONZERO and there are no processes left over.

-----------------------------------------------------------------------

Summary of changes:
 test/mpi/ft/Makefile.am             |    2 +-
 test/mpi/ft/barrier.c               |   59 ++++++++++++++++++++++++++++++++
 test/mpi/ft/die.c                   |    2 +-
 test/mpi/ft/gather.c                |   64 +++++++++++++++++++++++++++++++++++
 test/mpi/ft/irecvdead.c             |   63 ++++++++++++++++++++++++++++++++++
 test/mpi/ft/isendalive.c            |   56 ++++++++++++++++++++++++++++++
 test/mpi/ft/isenddead.c             |   58 +++++++++++++++++++++++++++++++
 test/mpi/ft/recvdead.c              |   58 +++++++++++++++++++++++++++++++
 test/mpi/ft/reduce.c                |   60 ++++++++++++++++++++++++++++++++
 test/mpi/ft/{send.c => sendalive.c} |   17 ++++++---
 test/mpi/ft/senddead.c              |   53 +++++++++++++++++++++++++++++
 test/mpi/ft/testlist                |   12 +++++-
 test/mpi/runtests.in                |   52 ++++++++++++++++++++++++++++
 13 files changed, 547 insertions(+), 9 deletions(-)
 create mode 100644 test/mpi/ft/barrier.c
 create mode 100644 test/mpi/ft/gather.c
 create mode 100644 test/mpi/ft/irecvdead.c
 create mode 100644 test/mpi/ft/isendalive.c
 create mode 100644 test/mpi/ft/isenddead.c
 create mode 100644 test/mpi/ft/recvdead.c
 create mode 100644 test/mpi/ft/reduce.c
 rename test/mpi/ft/{send.c => sendalive.c} (62%)
 create mode 100644 test/mpi/ft/senddead.c


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list