[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2a2-174-g6523ad9
Service Account
noreply at mpich.org
Thu Feb 26 14:39:16 CST 2015
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".
The branch, master has been updated
via 6523ad970475dcbe71d8529d0928aa31ed416cdf (commit)
from 8a9d5c71449c9955b76deff7489a038d28dad9c8 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/6523ad970475dcbe71d8529d0928aa31ed416cdf
commit 6523ad970475dcbe71d8529d0928aa31ed416cdf
Author: Sangmin Seo <sseo at anl.gov>
Date: Wed Feb 18 22:33:33 2015 -0600
Fix async progress problem in NBC I/O.
When the async progress thread blocked the progress engine and yielded
control, if a thread started waiting inside a wait routine, e.g.,
ADIOI_GEN_iwc_wait_fn, of NBC I/O implementation, a deadlock happened.
The thread waiting continuously called MPI_Test to make progress, but
the progress engine did not make progress because it was blocked due to
the async progress thread. The async progress thread tried to acquire
the lock, but the waiting thread did not release the lock because it
did not finish the wait routine. Thus, it was a deadlock. This patch
fixes this deadlock problem by forcing the waiting thread to yield if
the progress engine has been blocked by another thread.
Fixes #2202
Signed-off-by: Rob Latham <robl at mcs.anl.gov>
diff --git a/src/glue/romio/glue_romio.c b/src/glue/romio/glue_romio.c
index 937f5b7..0f376c5 100644
--- a/src/glue/romio/glue_romio.c
+++ b/src/glue/romio/glue_romio.c
@@ -49,6 +49,25 @@ void MPIR_Ext_cs_exit_allfunc(void)
MPIU_THREAD_CS_EXIT(ALLFUNC,);
}
+/* This routine is for a thread to yield control when the thread is waiting for
+ * the completion of communication inside a ROMIO routine but the progress
+ * engine is blocked by another thread. */
+#ifdef MPICH_IS_THREADED
+extern volatile int MPIDI_CH3I_progress_blocked;
+#endif
+void MPIR_Ext_cs_yield_allfunc_if_progress_blocked(void)
+{
+#ifdef MPICH_IS_THREADED
+ MPIU_THREAD_CHECK_BEGIN;
+ {
+ if (MPIDI_CH3I_progress_blocked == TRUE) {
+ MPIU_THREAD_CS_YIELD(ALLFUNC,);
+ }
+ }
+ MPIU_THREAD_CHECK_END;
+#endif
+}
+
/* will consider MPI_DATATYPE_NULL to be an error */
#undef FUNCNAME
#define FUNCNAME MPIR_Ext_datatype_iscommitted
diff --git a/src/include/glue_romio.h.in b/src/include/glue_romio.h.in
index f074353..16271b1 100644
--- a/src/include/glue_romio.h.in
+++ b/src/include/glue_romio.h.in
@@ -53,6 +53,7 @@ int MPIR_Ext_init(void);
void MPIR_Ext_cs_enter_allfunc(void);
void MPIR_Ext_cs_exit_allfunc(void);
+void MPIR_Ext_cs_yield_allfunc_if_progress_blocked(void);
/* to facilitate error checking */
int MPIR_Ext_datatype_iscommitted(MPI_Datatype datatype);
diff --git a/src/mpi/romio/adio/common/ad_iread_coll.c b/src/mpi/romio/adio/common/ad_iread_coll.c
index 023a142..b1311e6 100644
--- a/src/mpi/romio/adio/common/ad_iread_coll.c
+++ b/src/mpi/romio/adio/common/ad_iread_coll.c
@@ -7,6 +7,7 @@
#include "adio.h"
#include "adio_extern.h"
#include "mpiu_greq.h"
+#include "mpioimpl.h"
#ifdef USE_DBG_LOGGING
#define RDCOLL_DEBUG 1
@@ -1303,6 +1304,10 @@ static int ADIOI_GEN_irc_wait_fn(int count, void **array_of_states,
if ((timeout > 0) && (timeout < (MPI_Wtime() - starttime)))
goto fn_exit;
+
+ /* If the progress engine is blocked, we have to yield for another
+ thread to be able to unblock the progress engine. */
+ MPIU_THREAD_CS_YIELD(ALLFUNC,_if_progress_blocked);
}
}
diff --git a/src/mpi/romio/adio/common/ad_iwrite_coll.c b/src/mpi/romio/adio/common/ad_iwrite_coll.c
index 7fa423f..b456ec4 100644
--- a/src/mpi/romio/adio/common/ad_iwrite_coll.c
+++ b/src/mpi/romio/adio/common/ad_iwrite_coll.c
@@ -7,6 +7,7 @@
#include "adio.h"
#include "adio_extern.h"
#include "mpiu_greq.h"
+#include "mpioimpl.h"
#ifdef AGGREGATION_PROFILE
#include "mpe.h"
@@ -1527,6 +1528,10 @@ static int ADIOI_GEN_iwc_wait_fn(int count, void **array_of_states,
if ((timeout > 0) && (timeout < (MPI_Wtime() - starttime)))
goto fn_exit;
+
+ /* If the progress engine is blocked, we have to yield for another
+ thread to be able to unblock the progress engine. */
+ MPIU_THREAD_CS_YIELD(ALLFUNC,_if_progress_blocked);
}
}
diff --git a/src/mpi/romio/mpi-io/mpioimpl.h b/src/mpi/romio/mpi-io/mpioimpl.h
index 77b18cb..1cb92bf 100644
--- a/src/mpi/romio/mpi-io/mpioimpl.h
+++ b/src/mpi/romio/mpi-io/mpioimpl.h
@@ -20,8 +20,10 @@
#define MPIU_THREAD_CS_ENTER(name_,ctx_) MPIU_THREAD_CS_ENTER_##name_(ctx_)
#define MPIU_THREAD_CS_EXIT(name_,ctx_) MPIU_THREAD_CS_EXIT_##name_(ctx_)
+#define MPIU_THREAD_CS_YIELD(name_,ctx_) MPIU_THREAD_CS_YIELD_##name_(ctx_)
#define MPIU_THREAD_CS_ENTER_ALLFUNC(ctx_) MPIR_Ext_cs_enter_allfunc()
#define MPIU_THREAD_CS_EXIT_ALLFUNC(ctx_) MPIR_Ext_cs_exit_allfunc()
+#define MPIU_THREAD_CS_YIELD_ALLFUNC(ctx_) MPIR_Ext_cs_yield_allfunc##ctx_()
/* committed datatype checking support in ROMIO */
#define MPIO_DATATYPE_ISCOMMITTED(dtype_, err_) \
@@ -36,6 +38,7 @@
of correct programs */
#define MPIU_THREAD_CS_ENTER(x,y)
#define MPIU_THREAD_CS_EXIT(x,y)
+#define MPIU_THREAD_CS_YIELD(x,y)
#define MPIO_DATATYPE_ISCOMMITTED(dtype_, err_) do {} while (0)
#ifdef HAVE_WINDOWS_H
#define MPIU_UNREFERENCED_ARG(a) a
-----------------------------------------------------------------------
Summary of changes:
src/glue/romio/glue_romio.c | 19 +++++++++++++++++++
src/include/glue_romio.h.in | 1 +
src/mpi/romio/adio/common/ad_iread_coll.c | 5 +++++
src/mpi/romio/adio/common/ad_iwrite_coll.c | 5 +++++
src/mpi/romio/mpi-io/mpioimpl.h | 3 +++
5 files changed, 33 insertions(+), 0 deletions(-)
hooks/post-receive
--
MPICH primary repository
More information about the commits
mailing list