[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.2-140-gf4518bf

Service Account noreply at mpich.org
Wed Sep 3 09:41:46 CDT 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  f4518bf66d6c3af9e7a27a6fa9c02ce5f5ede1b2 (commit)
       via  22924f357e3a571e7f3e1da7706c92d70dce818e (commit)
      from  f6d32e723fc5d7cd9c3adf3c971e3671b5b2bcaa (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/f4518bf66d6c3af9e7a27a6fa9c02ce5f5ede1b2

commit f4518bf66d6c3af9e7a27a6fa9c02ce5f5ede1b2
Author: Min Si <msi at il.is.s.u-tokyo.ac.jp>
Date:   Sat Aug 23 18:46:13 2014 -0500

    Added a test for flush on shared window.
    
    Flush should guarantee operations are finished on both origin and target
    side. However, flush may return before the completion on target side in
    MPI implementation. It makes an error in this case: P0 and P1 allocate a
    shared window, and P2 locks both of them; P2 first put to P0 and flush,
    then get the updated data from P1. The put may complete on P0 after the
    completion of get on P1.
    
    Signed-off-by: Xin Zhao <xinzhao3 at illinois.edu>
    Signed-off-by: Antonio J. Pena <apenya at mcs.anl.gov>

diff --git a/test/mpi/rma/Makefile.am b/test/mpi/rma/Makefile.am
index 99f928a..ac88fdc 100644
--- a/test/mpi/rma/Makefile.am
+++ b/test/mpi/rma/Makefile.am
@@ -112,6 +112,7 @@ noinst_PROGRAMS =          \
     get_accumulate_double_derived  \
     get_accumulate_int_derived     \
     flush                  \
+    win_shared_put_flush_get          \
     reqops                 \
     req_example            \
     req_example_shm        \
diff --git a/test/mpi/rma/testlist.in b/test/mpi/rma/testlist.in
index 9e9d3b7..267e041 100644
--- a/test/mpi/rma/testlist.in
+++ b/test/mpi/rma/testlist.in
@@ -110,6 +110,7 @@ badrma 2 mpiversion=3.0
 acc-loc 4
 fence_shm 2 mpiversion=3.0
 win_shared_zerobyte 4 mpiversion=3.0
+win_shared_put_flush_get 4 mpiversion=3.0
 get-struct 2
 
 ## This test is not strictly correct.  This was meant to test out the
diff --git a/test/mpi/rma/win_shared_put_flush_get.c b/test/mpi/rma/win_shared_put_flush_get.c
new file mode 100644
index 0000000..15f1c70
--- /dev/null
+++ b/test/mpi/rma/win_shared_put_flush_get.c
@@ -0,0 +1,199 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ *  (C) 2014 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <mpi.h>
+#include "mpitest.h"
+
+#define ITER 100
+#define BUF_CNT 4
+double local_buf[BUF_CNT], check_buf[BUF_CNT];
+
+const int verbose = 0;
+
+int main(int argc, char *argv[])
+{
+    int rank, nproc, i, x;
+    int errors = 0, all_errors = 0;
+    MPI_Win win = MPI_WIN_NULL;
+
+    MPI_Comm shm_comm = MPI_COMM_NULL;
+    int shm_nproc, shm_rank;
+    double **shm_bases = NULL, *my_base;
+    MPI_Win shm_win = MPI_WIN_NULL;
+    MPI_Group shm_group = MPI_GROUP_NULL, world_group = MPI_GROUP_NULL;
+    int *shm_ranks = NULL, *shm_ranks_in_world = NULL;
+    MPI_Aint get_target_base_offsets = 0;
+
+    int win_size = sizeof(double) * BUF_CNT;
+    int new_win_size = win_size;
+    int win_unit = sizeof(double);
+    int shm_root_rank_in_world;
+    int origin = -1, put_target, get_target;
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
+    MPI_Comm_group(MPI_COMM_WORLD, &world_group);
+
+    if (nproc != 4) {
+        if (rank == 0)
+            printf("Error: must be run with four processes\n");
+        MPI_Abort(MPI_COMM_WORLD, 1);
+    }
+
+    MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &shm_comm);
+    MPI_Comm_rank(shm_comm, &shm_rank);
+    MPI_Comm_size(shm_comm, &shm_nproc);
+    MPI_Comm_group(shm_comm, &shm_group);
+
+    /* Platform does not support shared memory or wrong host file, just return. */
+    if (shm_nproc != 2) {
+        goto exit;
+    }
+
+    shm_bases = (double **) calloc(shm_nproc, sizeof(double *));
+    shm_ranks = (int *) calloc(shm_nproc, sizeof(int));
+    shm_ranks_in_world = (int *) calloc(shm_nproc, sizeof(int));
+
+    if (shm_rank == 0)
+        shm_root_rank_in_world = rank;
+    MPI_Bcast(&shm_root_rank_in_world, 1, MPI_INT, 0, shm_comm);
+
+    /* Identify ranks of target processes which are located on node 0 */
+    if (rank == 0) {
+        for (i = 0; i < shm_nproc; i++) {
+            shm_ranks[i] = i;
+        }
+        MPI_Group_translate_ranks(shm_group, shm_nproc, shm_ranks, world_group, shm_ranks_in_world);
+    }
+    MPI_Bcast(shm_ranks_in_world, shm_nproc, MPI_INT, 0, MPI_COMM_WORLD);
+
+    put_target = shm_ranks_in_world[shm_nproc - 1];
+    get_target = shm_ranks_in_world[0];
+
+    /* Identify the rank of origin process which are located on node 1 */
+    if (shm_root_rank_in_world == 1 && shm_rank == 0) {
+        origin = rank;
+        if (verbose) {
+            printf("----   I am origin = %d, get_target = %d, put_target = %d\n",
+                   origin, get_target, put_target);
+        }
+    }
+
+    /* Allocate shared memory among local processes */
+    MPI_Win_allocate_shared(win_size, win_unit, MPI_INFO_NULL, shm_comm, &my_base, &shm_win);
+
+    if (shm_root_rank_in_world == 0 && verbose) {
+        MPI_Aint size;
+        int disp_unit;
+        for (i = 0; i < shm_nproc; i++) {
+            MPI_Win_shared_query(shm_win, i, &size, &disp_unit, &shm_bases[i]);
+            printf("%d --    shared query: base[%d]=%p, size %ld, "
+                   "unit %d\n", rank, i, shm_bases[i], size, disp_unit);
+        }
+    }
+
+    /* Get offset of put target(1) on get target(0) */
+    get_target_base_offsets = (shm_nproc - 1) * win_size / win_unit;
+
+    if (origin == rank && verbose)
+        printf("%d --    base_offset of put_target %d on get_target %d: %ld\n",
+               rank, put_target, get_target, get_target_base_offsets);
+
+    /* Create using MPI_Win_create(). Note that new window size of get_target(0)
+     * is equal to the total size of shm segments on this node, thus get_target
+     * process can read the byte located on put_target process.*/
+    for (i = 0; i < BUF_CNT; i++) {
+        local_buf[i] = (i + 1) * 1.0;
+        my_base[i] = 0.0;
+    }
+
+    if (get_target == rank)
+        new_win_size = win_size * shm_nproc;
+
+    MPI_Win_create(my_base, new_win_size, win_unit, MPI_INFO_NULL, MPI_COMM_WORLD, &win);
+
+    if (verbose)
+        printf("%d --    new window my_base %p, size %ld\n", rank, my_base, new_win_size);
+
+    MPI_Barrier(MPI_COMM_WORLD);
+
+    /* Check if flush guarantees the completion of put operations on target side.
+     *
+     * P exclusively locks 2 processes whose windows are shared with each other.
+     * P first put and flush to a process, then get the updated data from another process.
+     * If flush returns before operations are done on the target side, the data may be
+     * incorrect.*/
+    for (x = 0; x < ITER; x++) {
+        for (i = 0; i < BUF_CNT; i++) {
+            local_buf[i] += x;
+            check_buf[i] = 0;
+        }
+
+        if (rank == origin) {
+            MPI_Win_lock(MPI_LOCK_EXCLUSIVE, put_target, 0, win);
+            MPI_Win_lock(MPI_LOCK_EXCLUSIVE, get_target, 0, win);
+
+            for (i = 0; i < BUF_CNT; i++) {
+                MPI_Put(&local_buf[i], 1, MPI_DOUBLE, put_target, i, 1, MPI_DOUBLE, win);
+            }
+            MPI_Win_flush(put_target, win);
+
+            MPI_Get(check_buf, BUF_CNT, MPI_DOUBLE, get_target,
+                    get_target_base_offsets, BUF_CNT, MPI_DOUBLE, win);
+            MPI_Win_flush(get_target, win);
+
+            for (i = 0; i < BUF_CNT; i++) {
+                if (check_buf[i] != local_buf[i]) {
+                    printf("%d(iter %d) - Got check_buf[%d] = %.1lf, expected %.1lf\n",
+                           rank, x, i, check_buf[i], local_buf[i]);
+                    errors++;
+                }
+            }
+
+            MPI_Win_unlock(put_target, win);
+            MPI_Win_unlock(get_target, win);
+        }
+    }
+
+    MPI_Barrier(MPI_COMM_WORLD);
+
+    MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+
+  exit:
+
+    if (rank == 0 && all_errors == 0)
+        printf(" No Errors\n");
+
+    if (shm_bases)
+        free(shm_bases);
+    if (shm_ranks)
+        free(shm_ranks);
+    if (shm_ranks_in_world)
+        free(shm_ranks_in_world);
+
+    if (shm_win != MPI_WIN_NULL)
+        MPI_Win_free(&shm_win);
+
+    if (win != MPI_WIN_NULL)
+        MPI_Win_free(&win);
+
+    if (shm_comm != MPI_COMM_NULL)
+        MPI_Comm_free(&shm_comm);
+
+    if (shm_group != MPI_GROUP_NULL)
+        MPI_Group_free(&shm_group);
+
+    if (world_group != MPI_GROUP_NULL)
+        MPI_Group_free(&world_group);
+
+    MPI_Finalize();
+
+    return 0;
+}

http://git.mpich.org/mpich.git/commitdiff/22924f357e3a571e7f3e1da7706c92d70dce818e

commit 22924f357e3a571e7f3e1da7706c92d70dce818e
Author: Min Si <msi at il.is.s.u-tokyo.ac.jp>
Date:   Sat Aug 23 19:08:29 2014 -0500

    Bug-fix: correct the behavior of flush in exclusively locked epoch.
    
    FLUSH should guarantee the completion of operations on both origin
    and target side. However, for exclusive lock, there is an optimization
    in MPICH which allows FLUSH to return without waiting for the
    acknowledgement of remote completion from the target side. It relys
    on the fact that there will be no other processes accessing the window
    during the exclusive lock epoch.
    
    However, such optimization is not correct when two processes allocating
    windows on overlapping SHM region. Suppose P0 and P1 (on the same node)
    allocate RMA window using the same SHM region, and P2 (on a different node)
    locks both windows. P2 first issues a PUT and FLUSH to P0, then issues
    a GET to P1 on the same memory location with PUT, since FLUSH does not
    guarantee the remote completion of PUT, GET operation may not get the
    updated value.
    
    This patch disables the optimization for FLUSH and forces FLUSH to always
    wait for the remote completion of operations.
    
    Signed-off-by: Xin Zhao <xinzhao3 at illinois.edu>
    Signed-off-by: Antonio J. Pena <apenya at mcs.anl.gov>

diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index b3c5342..97ab0c8 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -3601,11 +3601,15 @@ static int do_passive_target_rma(MPID_Win *win_ptr, int target_rank,
                  win_ptr->targets[target_rank].remote_lock_assert & MPI_MODE_NOCHECK));
 
     if (win_ptr->targets[target_rank].remote_lock_mode == MPI_LOCK_EXCLUSIVE &&
-        win_ptr->targets[target_rank].remote_lock_state != MPIDI_CH3_WIN_LOCK_CALLED) {
+        win_ptr->targets[target_rank].remote_lock_state != MPIDI_CH3_WIN_LOCK_CALLED &&
+        win_ptr->targets[target_rank].remote_lock_state != MPIDI_CH3_WIN_LOCK_FLUSH) {
         /* Exclusive lock already held -- no need to wait for rma done pkt at
            the end.  This is because the target won't grant another process
            access to the window until all of our operations complete at that
-           target.  Thus, there is no third-party communication issue. */
+           target.  Thus, there is no third-party communication issue.
+           However, flush still needs to wait for rma done, otherwise result
+           may be unknown if user reads the updated location from a shared window of
+           another target process after this flush. */
         *wait_for_rma_done_pkt = 0;
     }
     else if (MPIDI_CH3I_RMA_Ops_isempty(&win_ptr->targets[target_rank].rma_ops_list)) {

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/ch3/src/ch3u_rma_sync.c        |    8 +-
 test/mpi/rma/Makefile.am                |    1 +
 test/mpi/rma/testlist.in                |    1 +
 test/mpi/rma/win_shared_put_flush_get.c |  199 +++++++++++++++++++++++++++++++
 4 files changed, 207 insertions(+), 2 deletions(-)
 create mode 100644 test/mpi/rma/win_shared_put_flush_get.c


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list