// #include "mpi_test_incl.h" #include // #include #include #include #include // std::ofstream #include // std::stringstream #include using namespace std; int main(int argc, char *argv[]) { MPI::Init_thread(MPI_THREAD_MULTIPLE); MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN); // return an exception instead of Fatal Error, which finishes execution of all processes. int selfRank = -1; MPI_Comm_rank(MPI_COMM_WORLD, &selfRank); int wordSize = -1; MPI_Comm_size(MPI_COMM_WORLD, &wordSize); set failRanks; stringstream str; str << "out_r" << selfRank << ".txt"; std::ofstream ofs (str.str(), std::ofstream::out); const int iterations = 20; if(selfRank == 0) { for (int iter = 1; iter < iterations; ++iter) { for (int sendRank = 1; sendRank < wordSize; ++sendRank) { if (failRanks.find(sendRank) == failRanks.end()) { // rank not failed MPI_Send(&iter, 1, MPI::INT, sendRank, 0, MPI_COMM_WORLD); // send synchronous MPI_Request req; int value = -1; MPI_Irecv(&value, 1, MPI::INT, sendRank, 0, MPI_COMM_WORLD, &req); // asynchronous wait to catch failure. int retErr = MPI_Wait(&req, MPI_STATUS_IGNORE); if (retErr != MPI_SUCCESS) { // process failure detected ofs << "Rank " << sendRank << "failed" << endl; failRanks.insert(sendRank); if((int)failRanks.size() == wordSize - 1) { ofs << "No more slaves alive!!!" << endl; exit(-1); } } } } ofs << "Iter " << iter << " - passed" << endl; } } else { for (int iter = 1; iter < iterations; ++iter) { int value = -1; MPI_Recv(&value, 1, MPI::INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Send(&value, 1, MPI::INT, 0, 0, MPI_COMM_WORLD); if((selfRank == 1) && (value == 10)) { ofs << "rank " << selfRank << " iter " << value << endl; abort(); } } } ofs << "End rank" << selfRank << endl; MPI_Finalize(); return 0; }