[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.1-71-gc244ba4

Service Account noreply at mpich.org
Tue Jul 8 23:43:21 CDT 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  c244ba4905f7b1b4541cf1fbd9a84637525e4b22 (commit)
       via  393615326fb43b0383eca3b4ff139286f3085a73 (commit)
       via  fda7de63ddb9b9c7c5feae1ee86a34f4c3cb8c32 (commit)
       via  1c07dbafc4cd4ce8d3eaa67aa1080f7b42e6bf20 (commit)
       via  f02eed5be2e931f4c31bf787f6df34658bdfbaee (commit)
      from  3e62375a49105206bf641b7ea8163d61e3ec3544 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/c244ba4905f7b1b4541cf1fbd9a84637525e4b22

commit c244ba4905f7b1b4541cf1fbd9a84637525e4b22
Author: Pavan Balaji <balaji at anl.gov>
Date:   Tue Jul 8 21:48:23 2014 -0500

    Add a memory barrier at the end of the Win_complete function.
    
    We need to add a memory barrier at the end of the Win_complete
    function, so that shared memory operations issued during the
    start/complete epoch are visible to other processes on the node.
    
    Signed-off-by: Xin Zhao <xinzhao3 at illinois.edu>

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 4241d51..859b094 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -2569,6 +2569,11 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
     /* free the group stored in window */
     MPIR_Group_release(win_ptr->start_group_ptr);
     win_ptr->start_group_ptr = NULL; 
+
+    /* Ensure ordering of load/store operations. */
+    if (win_ptr->shm_allocated == TRUE) {
+        OPA_read_write_barrier();
+    }
     
  fn_exit:
     MPIU_CHKLMEM_FREEALL();

http://git.mpich.org/mpich.git/commitdiff/393615326fb43b0383eca3b4ff139286f3085a73

commit 393615326fb43b0383eca3b4ff139286f3085a73
Author: Pavan Balaji <balaji at anl.gov>
Date:   Tue Jul 8 21:00:34 2014 -0500

    Add barrier-like semantics in PSCW for shared-memory operations.
    
    When a window uses direct shared-memory operations that are
    immediately issued internally, we cannot avoid synchronization during
    the start operation.  This patch synchronizes processes that reside on
    the same node during start and the processes that do not reside on the
    same node during complete.
    
    Fixes #2041.
    
    Signed-off-by: Xin Zhao <xinzhao3 at illinois.edu>

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 0deebe6..4241d51 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -2171,6 +2171,99 @@ int MPIDI_Win_post(MPID_Group *post_grp_ptr, int assert, MPID_Win *win_ptr)
 }
 
 
+static int recv_post_msgs(MPID_Group *group_ptr, MPID_Win *win_ptr, int local)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int start_grp_size, *ranks_in_start_grp, *ranks_in_win_grp, src, rank, i, j;
+    MPI_Request *req;
+    MPI_Status *status;
+    MPID_Comm *comm_ptr = win_ptr->comm_ptr;
+    MPID_Group *win_grp_ptr;
+    MPIU_CHKLMEM_DECL(4);
+    MPIDI_STATE_DECL(MPID_STATE_RECV_POST_MSGS);
+
+    MPIDI_RMA_FUNC_ENTER(MPID_STATE_RECV_POST_MSGS);
+
+    /* Wait for 0-byte messages from processes either on the same node
+     * or not (depending on the "local" parameter), so we know they
+     * have entered post. */
+    start_grp_size = win_ptr->start_group_ptr->size;
+    MPIU_CHKLMEM_MALLOC(ranks_in_start_grp, int *, start_grp_size*sizeof(int),
+			mpi_errno, "ranks_in_start_grp");
+
+    MPIU_CHKLMEM_MALLOC(ranks_in_win_grp, int *, start_grp_size*sizeof(int),
+			mpi_errno, "ranks_in_win_grp");
+
+    for (i = 0; i < start_grp_size; i++)
+	ranks_in_start_grp[i] = i;
+
+    mpi_errno = MPIR_Comm_group_impl(comm_ptr, &win_grp_ptr);
+    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+
+    mpi_errno = MPIR_Group_translate_ranks_impl(win_ptr->start_group_ptr, start_grp_size,
+                                                ranks_in_start_grp,
+                                                win_grp_ptr, ranks_in_win_grp);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+    rank = win_ptr->comm_ptr->rank;
+    MPIU_CHKLMEM_MALLOC(req, MPI_Request *, start_grp_size*sizeof(MPI_Request), mpi_errno, "req");
+    MPIU_CHKLMEM_MALLOC(status, MPI_Status *, start_grp_size*sizeof(MPI_Status), mpi_errno, "status");
+
+    j = 0;
+    for (i = 0; i < start_grp_size; i++) {
+        src = ranks_in_win_grp[i];
+
+        if (src == rank)
+            continue;
+
+        if (local && win_ptr->shm_allocated == TRUE) {
+            MPID_Request *req_ptr;
+            MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
+
+            MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
+            MPIDI_Comm_get_vc(win_ptr->comm_ptr, src, &target_vc);
+
+            if (orig_vc->node_id == target_vc->node_id) {
+                mpi_errno = MPID_Irecv(NULL, 0, MPI_INT, src, SYNC_POST_TAG,
+                                       comm_ptr, MPID_CONTEXT_INTRA_PT2PT, &req_ptr);
+                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                req[j++] = req_ptr->handle;
+            }
+        }
+        else if (!local) {
+            MPID_Request *req_ptr;
+
+            mpi_errno = MPID_Irecv(NULL, 0, MPI_INT, src, SYNC_POST_TAG,
+                                   comm_ptr, MPID_CONTEXT_INTRA_PT2PT, &req_ptr);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            req[j++] = req_ptr->handle;
+        }
+    }
+
+    if (j) {
+        mpi_errno = MPIR_Waitall_impl(j, req, status);
+        if (mpi_errno && mpi_errno != MPI_ERR_IN_STATUS) MPIU_ERR_POP(mpi_errno);
+        /* --BEGIN ERROR HANDLING-- */
+        if (mpi_errno == MPI_ERR_IN_STATUS) {
+            for (i = 0; i < start_grp_size; i++) {
+                if (status[i].MPI_ERROR != MPI_SUCCESS) {
+                    mpi_errno = status[i].MPI_ERROR;
+                    MPIU_ERR_POP(mpi_errno);
+                }
+            }
+        }
+        /* --END ERROR HANDLING-- */
+    }
+
+    mpi_errno = MPIR_Group_free_impl(win_grp_ptr);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+ fn_fail:
+    MPIU_CHKLMEM_FREEALL();
+    MPIDI_RMA_FUNC_EXIT(MPID_STATE_RECV_POST_MSGS);
+    return mpi_errno;
+}
+
 
 #undef FUNCNAME
 #define FUNCNAME MPIDI_Win_start
@@ -2232,6 +2325,10 @@ int MPIDI_Win_start(MPID_Group *group_ptr, int assert, MPID_Win *win_ptr)
     MPIR_Group_add_ref( group_ptr );
     win_ptr->start_assert = assert;
 
+    /* wait for messages from local processes */
+    mpi_errno = recv_post_msgs(group_ptr, win_ptr, 1);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
  fn_fail:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_START);
     return mpi_errno;
@@ -2246,7 +2343,7 @@ int MPIDI_Win_start(MPID_Group *group_ptr, int assert, MPID_Win *win_ptr)
 int MPIDI_Win_complete(MPID_Win *win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    int comm_size, *nops_to_proc, src, new_total_op_count;
+    int comm_size, *nops_to_proc, new_total_op_count;
     int i, j, dst, total_op_count, *curr_ops_cnt;
     MPIDI_RMA_Op_t *curr_ptr;
     MPIDI_RMA_Ops_list_t *ops_list;
@@ -2256,7 +2353,7 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
     int start_grp_size, *ranks_in_start_grp, *ranks_in_win_grp, rank;
     int nRequest = 0;
     int nRequestNew = 0;
-    MPIU_CHKLMEM_DECL(9);
+    MPIU_CHKLMEM_DECL(7);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_COMPLETE);
 
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_COMPLETE);
@@ -2311,43 +2408,9 @@ int MPIDI_Win_complete(MPID_Win *win_ptr)
        message from each target process */
     if ((win_ptr->start_assert & MPI_MODE_NOCHECK) == 0)
     {
-        MPI_Request *req;
-        MPI_Status *status;
-
-        MPIU_CHKLMEM_MALLOC(req, MPI_Request *, start_grp_size*sizeof(MPI_Request), mpi_errno, "req");
-        MPIU_CHKLMEM_MALLOC(status, MPI_Status *, start_grp_size*sizeof(MPI_Status), mpi_errno, "status");
-
-	MPIR_T_PVAR_COUNTER_INC(RMA, rma_wincomplete_recvsync_aux, start_grp_size);
-	for (i = 0; i < start_grp_size; i++) {
-	    src = ranks_in_win_grp[i];
-	    if (src != rank) {
-                MPID_Request *req_ptr;
-		/* FIXME: This is a heavyweight way to process these sync 
-		   messages - this should be handled with a special packet
-		   type and callback function.
-		*/
-                mpi_errno = MPID_Irecv(NULL, 0, MPI_INT, src, SYNC_POST_TAG,
-                                       comm_ptr, MPID_CONTEXT_INTRA_PT2PT, &req_ptr);
-		if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-                req[i] = req_ptr->handle;
-	    } else {
-                req[i] = MPI_REQUEST_NULL;
-            }
-
-	}
-        mpi_errno = MPIR_Waitall_impl(start_grp_size, req, status);
-        if (mpi_errno && mpi_errno != MPI_ERR_IN_STATUS) MPIU_ERR_POP(mpi_errno);
-
-        /* --BEGIN ERROR HANDLING-- */
-        if (mpi_errno == MPI_ERR_IN_STATUS) {
-            for (i = 0; i < start_grp_size; i++) {
-                if (status[i].MPI_ERROR != MPI_SUCCESS) {
-                    mpi_errno = status[i].MPI_ERROR;
-                    MPIU_ERR_POP(mpi_errno);
-                }
-            }
-        }
-        /* --END ERROR HANDLING-- */
+        /* wait for messages from non-local processes */
+        mpi_errno = recv_post_msgs(win_ptr->start_group_ptr, win_ptr, 0);
+        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     MPIR_T_PVAR_TIMER_END(RMA, rma_wincomplete_recvsync);
 

http://git.mpich.org/mpich.git/commitdiff/fda7de63ddb9b9c7c5feae1ee86a34f4c3cb8c32

commit fda7de63ddb9b9c7c5feae1ee86a34f4c3cb8c32
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Mon Jul 7 21:36:17 2014 -0500

    Add a test code to test FENCE on SHM.
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/test/mpi/rma/Makefile.am b/test/mpi/rma/Makefile.am
index 61fb401..ecdd63f 100644
--- a/test/mpi/rma/Makefile.am
+++ b/test/mpi/rma/Makefile.am
@@ -122,7 +122,8 @@ noinst_PROGRAMS =          \
     rma-contig             \
     badrma                 \
     nb_test                \
-    acc-loc
+    acc-loc                \
+    fence_shm
 
 strided_acc_indexed_LDADD       = $(LDADD) -lm
 strided_acc_onelock_LDADD       = $(LDADD) -lm
diff --git a/test/mpi/rma/fence_shm.c b/test/mpi/rma/fence_shm.c
new file mode 100644
index 0000000..9711577
--- /dev/null
+++ b/test/mpi/rma/fence_shm.c
@@ -0,0 +1,94 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ *  (C) 2014 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include <stdio.h>
+#include "mpi.h"
+#include "mpitest.h"
+
+#define ELEM_PER_PROC 1
+
+static int errors = 0;
+
+int main(int argc, char *argv[])
+{
+    int rank, nprocs;
+    int shm_rank, shm_nprocs;
+    MPI_Comm shm_comm;
+    MPI_Win shm_win;
+    int *my_base;
+    int one = 1;
+    int result_data[0];
+
+    MTest_Init(&argc, &argv);
+
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+
+    /* run with two processes. */
+
+    MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &shm_comm);
+
+    MPI_Comm_rank(shm_comm, &shm_rank);
+    MPI_Comm_size(shm_comm, &shm_nprocs);
+
+    if (shm_nprocs >= 2) {
+        MPI_Win_allocate_shared(sizeof(int) * ELEM_PER_PROC, sizeof(int), MPI_INFO_NULL,
+                                shm_comm, &my_base, &shm_win);
+
+        /* Test for FENCE with no asserts. */
+
+        if (shm_rank == 1) {
+            *my_base = 0;
+
+            MPI_Win_fence(0, shm_win);
+            MPI_Win_fence(0, shm_win);
+
+            if (my_base[0] != one) {
+                errors++;
+                printf("Expected: my_base[0] = %d   Actual: my_base[0] = %d\n", one, my_base[0]);
+            }
+        }
+
+        if (shm_rank == 0) {
+            MPI_Win_fence(0, shm_win);
+            MPI_Put(&one, 1, MPI_INT, 1, 0, 1, MPI_INT, shm_win);
+            MPI_Win_fence(0, shm_win);
+        }
+
+        /* Test for FENCE with assert MPI_MODE_NOPRECEDE. */
+
+        if (shm_rank == 1) {
+            MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 1, 0, shm_win);
+            MPI_Put(&one, 1, MPI_INT, 1, 0, 1, MPI_INT, shm_win);
+            MPI_Win_unlock(1, shm_win);
+
+            MPI_Win_fence(MPI_MODE_NOPRECEDE, shm_win);
+            MPI_Win_fence(0, shm_win);
+        }
+
+        if (shm_rank == 0) {
+            result_data[0] = 0;
+            MPI_Win_fence(MPI_MODE_NOPRECEDE, shm_win);
+            MPI_Get(result_data, 1, MPI_INT, 1, 0, 1, MPI_INT, shm_win);
+            MPI_Win_fence(0, shm_win);
+
+            if (result_data[0] != one) {
+                errors++;
+                printf("Expected: result_data[0] = %d   Actual: result_data[0] = %d\n",
+                       one, result_data[0]);
+            }
+        }
+
+        MPI_Win_free(&shm_win);
+    }
+
+    MPI_Comm_free(&shm_comm);
+
+    MTest_Finalize(errors);
+    MPI_Finalize();
+    return 0;
+}
diff --git a/test/mpi/rma/testlist.in b/test/mpi/rma/testlist.in
index 847689c..04ddc4e 100644
--- a/test/mpi/rma/testlist.in
+++ b/test/mpi/rma/testlist.in
@@ -107,6 +107,7 @@ mutex_bench_shm 4 mpiversion=3.0
 rma-contig 2 mpiversion=3.0 timeLimit=600
 badrma 2 mpiversion=3.0
 acc-loc 4
+fence_shm 2 mpiversion=3.0
 
 ## This test is not strictly correct.  This was meant to test out the
 ## case when MPI_Test is not nonblocking.  However, we ended up

http://git.mpich.org/mpich.git/commitdiff/1c07dbafc4cd4ce8d3eaa67aa1080f7b42e6bf20

commit 1c07dbafc4cd4ce8d3eaa67aa1080f7b42e6bf20
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Mon Jul 7 20:52:40 2014 -0500

    Fix bug: add barrier semantic in FENCE for SHM ops.
    
    When SHM is allocated for RMA window, operations are completed
    eagerly (as soon as they are posted by the user), therefore we
    need barrier semantics in the FENCE that opens an epoch to prevent
    SHM ops happening on target process before that target process
    starts an epoch.
    
    Note that we need memory barrier before and after synchronization
    calls in both FENCEs that starts and ends an epoch to guarantee the
    ordering of load/store operations with synchronizations.
    
    See #2041.
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 216ae1d..0deebe6 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -968,6 +968,28 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
 	MPIR_T_PVAR_TIMER_END(RMA, rma_winfence_clearlock);
     }
 
+    if (!(assert & MPI_MODE_NOSUCCEED) &&
+        (assert & MPI_MODE_NOPRECEDE || win_ptr->fence_issued == 0)) {
+
+        /* In the FENCE that opens an epoch but does not close an epoch,
+           if SHM is allocated, perform a barrier among processes on the
+           same node, to prevent one process modifying another process's
+           memory before that process starts an epoch. */
+
+        if (win_ptr->shm_allocated == TRUE) {
+            MPID_Comm *node_comm_ptr = win_ptr->comm_ptr->node_comm;
+
+            /* Ensure ordering of load/store operations. */
+            OPA_read_write_barrier();
+
+            mpi_errno = MPIR_Barrier_impl(node_comm_ptr, &errflag);
+            if (mpi_errno) {goto fn_fail;}
+
+            /* Ensure ordering of load/store operations. */
+            OPA_read_write_barrier();
+        }
+    }
+
     /* Note that the NOPRECEDE and NOSUCCEED must be specified by all processes
        in the window's group if any specify it */
     if (assert & MPI_MODE_NOPRECEDE)
@@ -1054,6 +1076,11 @@ int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
 	if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
         MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
+        /* Ensure ordering of load/store operations. */
+        if (win_ptr->shm_allocated == TRUE) {
+            OPA_read_write_barrier();
+        }
+
 	/* Set the completion counter */
 	/* FIXME: MT: this needs to be done atomically because other
 	   procs have the address and could decrement it. */

http://git.mpich.org/mpich.git/commitdiff/f02eed5be2e931f4c31bf787f6df34658bdfbaee

commit f02eed5be2e931f4c31bf787f6df34658bdfbaee
Author: Pavan Balaji <balaji at anl.gov>
Date:   Tue Jul 8 16:19:59 2014 -0500

    Only create shared memory when more than one process exists.
    
    If there is only one process on the node in the particular
    communicator, we don't need to allocate any shared memory for it.  In
    this case, we simply call the non-shared-memory functions for
    Win_allocate and free.
    
    Signed-off-by: Xin Zhao <xinzhao3 at illinois.edu>

diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_rma_shm.c b/src/mpid/ch3/channels/nemesis/src/ch3_rma_shm.c
index bfd70b3..6278879 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_rma_shm.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_rma_shm.c
@@ -67,6 +67,11 @@ int MPIDI_CH3_SHM_Win_free(MPID_Win **win_ptr)
 
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3_SHM_WIN_FREE);
 
+    if ((*win_ptr)->comm_ptr->node_comm == NULL) {
+        mpi_errno = MPIDI_Win_free(win_ptr);
+        goto fn_exit;
+    }
+
     mpi_errno = MPIDI_CH3I_Wait_for_pt_ops_finish(*win_ptr);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c b/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
index 41ebc03..82c5aec 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
@@ -62,6 +62,11 @@ static int MPIDI_CH3I_Win_allocate_shm(MPI_Aint size, int disp_unit, MPID_Info *
 
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_WIN_ALLOCATE_SHM);
 
+    if ((*win_ptr)->comm_ptr->node_comm == NULL) {
+        mpi_errno = MPIDI_CH3U_Win_allocate_no_shm(size, disp_unit, info, comm_ptr, base_ptr, win_ptr);
+        goto fn_exit;
+    }
+
     /* If create flavor is MPI_WIN_FLAVOR_ALLOCATE, alloc_shared_noncontig is set to 1 by default. */
     if ((*win_ptr)->create_flavor == MPI_WIN_FLAVOR_ALLOCATE)
         (*win_ptr)->info_args.alloc_shared_noncontig = 1;

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/ch3/channels/nemesis/src/ch3_rma_shm.c |    5 +
 src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c |    5 +
 src/mpid/ch3/src/ch3u_rma_sync.c                |  173 ++++++++++++++++++-----
 test/mpi/rma/Makefile.am                        |    3 +-
 test/mpi/rma/fence_shm.c                        |   94 ++++++++++++
 test/mpi/rma/testlist.in                        |    1 +
 6 files changed, 241 insertions(+), 40 deletions(-)
 create mode 100644 test/mpi/rma/fence_shm.c


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list