[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2a1-30-gbb8f2b9
Service Account
noreply at mpich.org
Tue Sep 30 14:30:18 CDT 2014
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".
The branch, master has been updated
via bb8f2b9a7ae4f624bb3fbf16a56d085326159582 (commit)
from 0e945ae71025b195bb1211407785ebd2c7817776 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/bb8f2b9a7ae4f624bb3fbf16a56d085326159582
commit bb8f2b9a7ae4f624bb3fbf16a56d085326159582
Author: Nysal Jan K.A <jnysal at in.ibm.com>
Date: Tue Sep 30 09:54:25 2014 -0400
pamid: Fix a hang in scatter
The logic used to periodically issue a barrier
for PAMI algorithms that request flow control
was erroneous. The root ended up issuing a barrier
before the non-root tasks.
(ibm) D199034
Signed-off-by: Sameh Sharkawi <sssharka at us.ibm.com>
diff --git a/src/mpid/pamid/src/coll/scatter/mpido_scatter.c b/src/mpid/pamid/src/coll/scatter/mpido_scatter.c
index b034064..4e76fad 100644
--- a/src/mpid/pamid/src/coll/scatter/mpido_scatter.c
+++ b/src/mpid/pamid/src/coll/scatter/mpido_scatter.c
@@ -331,72 +331,53 @@ int MPIDO_Scatter_simple(const void *sendbuf,
int success = 1, snd_contig = 1, rcv_contig = 1;
void *snd_noncontig_buff = NULL, *rcv_noncontig_buff = NULL;
void *sbuf = NULL, *rbuf = NULL;
- size_t send_size = 0;
- size_t recv_size = 0;
+ size_t send_size = 0, recv_size = 0, data_size = 0;
MPI_Aint snd_true_lb = 0, rcv_true_lb = 0;
MPID_Segment segment;
const struct MPIDI_Comm* const mpid = &(comm_ptr->mpid);
const int size = comm_ptr->local_size;
+ advisor_algorithm_t advisor_algorithms[1];
- if (rank == root && sendtype != MPI_DATATYPE_NULL && sendcount >= 0)
+ if (rank == root)
{
- MPIDI_Datatype_get_info(sendcount, sendtype, snd_contig,
- send_size, data_ptr, snd_true_lb);
- if(MPIDI_Pamix_collsel_advise != NULL && mpid->collsel_fast_query != NULL)
- {
- advisor_algorithm_t advisor_algorithms[1];
- int num_algorithms = MPIDI_Pamix_collsel_advise(mpid->collsel_fast_query, PAMI_XFER_SCATTER, send_size, advisor_algorithms, 1);
- if(num_algorithms)
- {
- if(advisor_algorithms[0].algorithm_type == COLLSEL_EXTERNAL_ALGO)
- {
- return MPIR_Scatter(sendbuf, sendcount, sendtype,
- recvbuf, recvcount, recvtype,
- root, comm_ptr, mpierrno);
- }
- else if(advisor_algorithms[0].metadata && advisor_algorithms[0].metadata->check_correct.values.asyncflowctl && !(--(comm_ptr->mpid.num_requests)))
- {
- comm_ptr->mpid.num_requests = MPIDI_Process.optimized.num_requests;
- int tmpmpierrno;
- MPIDO_Barrier(comm_ptr, &tmpmpierrno);
- }
- }
+ MPIDI_Datatype_get_info(sendcount, sendtype, snd_contig,
+ send_size, data_ptr, snd_true_lb);
+
+ if (recvbuf != MPI_IN_PLACE && recvtype != MPI_DATATYPE_NULL && recvcount >= 0)
+ {
+ MPIDI_Datatype_get_info(recvcount, recvtype, rcv_contig,
+ recv_size, data_ptr, rcv_true_lb);
}
+ data_size = send_size;
}
-
- if (recvtype != MPI_DATATYPE_NULL && recvcount >= 0)
+ else if (recvtype != MPI_DATATYPE_NULL && recvcount >= 0)
{
MPIDI_Datatype_get_info(recvcount, recvtype, rcv_contig,
recv_size, data_ptr, rcv_true_lb);
- if(MPIDI_Pamix_collsel_advise != NULL && mpid->collsel_fast_query != NULL)
+ data_size = recv_size;
+ }
+
+ advisor_algorithms[0].metadata = NULL;/* We check for NULL further down */
+ if(MPIDI_Pamix_collsel_advise != NULL && mpid->collsel_fast_query != NULL)
+ {
+ int num_algorithms = MPIDI_Pamix_collsel_advise(mpid->collsel_fast_query,
+ PAMI_XFER_SCATTER, data_size, advisor_algorithms, 1);
+ if(num_algorithms)
{
- advisor_algorithm_t advisor_algorithms[1];
- int num_algorithms = MPIDI_Pamix_collsel_advise(mpid->collsel_fast_query, PAMI_XFER_SCATTER, recv_size, advisor_algorithms, 1);
- if(num_algorithms)
+ if(advisor_algorithms[0].algorithm_type == COLLSEL_EXTERNAL_ALGO)
{
- if(advisor_algorithms[0].algorithm_type == COLLSEL_EXTERNAL_ALGO)
- {
- return MPIR_Scatter(sendbuf, sendcount, sendtype,
- recvbuf, recvcount, recvtype,
- root, comm_ptr, mpierrno);
- }
- else if(advisor_algorithms[0].metadata && advisor_algorithms[0].metadata->check_correct.values.asyncflowctl && !(--(comm_ptr->mpid.num_requests)))
- {
- comm_ptr->mpid.num_requests = MPIDI_Process.optimized.num_requests;
- int tmpmpierrno;
- MPIDO_Barrier(comm_ptr, &tmpmpierrno);
- }
-
+ return MPIR_Scatter(sendbuf, sendcount, sendtype,
+ recvbuf, recvcount, recvtype,
+ root, comm_ptr, mpierrno);
}
}
}
sbuf = (char *)sendbuf + snd_true_lb;
rbuf = (char *)recvbuf + rcv_true_lb;
if (rank == root)
- {
+ {
if (send_size)
{
- sbuf = (char *)sendbuf + snd_true_lb;
if (!snd_contig)
{
snd_noncontig_buff = MPIU_Malloc(send_size * size);
@@ -418,7 +399,6 @@ int MPIDO_Scatter_simple(const void *sendbuf,
{
if (recv_size)
{
- rbuf = (char *)recvbuf + rcv_true_lb;
if (!rcv_contig)
{
rcv_noncontig_buff = MPIU_Malloc(recv_size);
@@ -433,12 +413,10 @@ int MPIDO_Scatter_simple(const void *sendbuf,
else success = 0;
}
}
-
else
{
if (recv_size)/* Should this be send or recv? */
{
- rbuf = (char *)recvbuf + rcv_true_lb;
if (!rcv_contig)
{
rcv_noncontig_buff = MPIU_Malloc(recv_size);
@@ -462,6 +440,15 @@ int MPIDO_Scatter_simple(const void *sendbuf,
root, comm_ptr, mpierrno);
}
+ if(advisor_algorithms[0].metadata &&
+ advisor_algorithms[0].metadata->check_correct.values.asyncflowctl &&
+ !(--(comm_ptr->mpid.num_requests)))
+ {
+ comm_ptr->mpid.num_requests = MPIDI_Process.optimized.num_requests;
+ int tmpmpierrno;
+ MPIDO_Barrier(comm_ptr, &tmpmpierrno);
+ }
+
pami_xfer_t scatter;
MPIDI_Post_coll_t scatter_post;
volatile unsigned scatter_active = 1;
@@ -483,8 +470,7 @@ int MPIDO_Scatter_simple(const void *sendbuf,
{
scatter.cmd.xfer_scatter.rcvbuf = PAMI_IN_PLACE;
}
-
-
+
TRACE_ERR("%s scatter\n", MPIDI_Process.context_post.active>0?"Posting":"Invoking");
MPIDI_Context_post(MPIDI_Context[0], &scatter_post.state,
MPIDI_Pami_post_wrapper, (void *)&scatter);
-----------------------------------------------------------------------
Summary of changes:
src/mpid/pamid/src/coll/scatter/mpido_scatter.c | 86 ++++++++++-------------
1 files changed, 36 insertions(+), 50 deletions(-)
hooks/post-receive
--
MPICH primary repository
More information about the commits
mailing list