[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2a2-141-g51fdbed

Sun Feb 8 20:18:53 CST 2015

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  51fdbedcf352a56e5176192bfd665820cd64dadf (commit)
       via  bad898f9df13b5060cbf43ee4acdb3b7b4c9a0f7 (commit)
       via  65bf0d776e8932e7acb26714ee5a8c01ebf004ca (commit)
       via  8c5cb1e67248e29a7a5e4523b5adcfa044ab8a93 (commit)
       via  346050ea58448a53995b1581bb82b291f4a2d5f7 (commit)
      from  7dfe284051cacc8ef79f15e0c01c81b28f9fe78e (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/51fdbedcf352a56e5176192bfd665820cd64dadf

commit 51fdbedcf352a56e5176192bfd665820cd64dadf
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Thu Feb 5 01:41:01 2015 -0800

    Add tests to test the atomicity for FOP, CAS and GACC operations.
    
    The entire "read-modify-write" should be atomic for CAS, FOP and
    GACC operations. This patch adds corresponding tests for them.
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/test/mpi/rma/Makefile.am b/test/mpi/rma/Makefile.am
index e2a3eb3..5c0a5e8 100644
--- a/test/mpi/rma/Makefile.am
+++ b/test/mpi/rma/Makefile.am
@@ -140,7 +140,10 @@ noinst_PROGRAMS =          \
     get-struct             \
     rput_local_comp        \
     racc_local_comp        \
-    at_complete
+    at_complete            \
+    atomic_rmw_fop         \
+    atomic_rmw_cas         \
+    atomic_rmw_gacc
 
 if BUILD_MPIX_TESTS
 noinst_PROGRAMS += aint
diff --git a/test/mpi/rma/atomic_rmw_cas.c b/test/mpi/rma/atomic_rmw_cas.c
new file mode 100644
index 0000000..2b9a711
--- /dev/null
+++ b/test/mpi/rma/atomic_rmw_cas.c
@@ -0,0 +1,129 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ *  (C) 2015 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+/* This test is going to test the atomicity for "read-modify-write" in CAS
+ * operations */
+
+/* There are three processes involved in this test: P0 (origin_shm), P1 (origin_am),
+ * and P2 (dest). P0 and P1 issues one CAS to P2 via SHM and AM respectively.
+ * For P0, origin value is 1 and compare value is 0; for P1, origin value is 0 and
+ * compare value is 1; for P2, initial target value is 0. The correct results can
+ * only be one of the following cases:
+ *
+ *   (1) result value on P0: 0, result value on P1: 0, target value on P2: 1.
+ *   (2) result value on P0: 0, result value on P1: 1, target value on P2: 0.
+ *
+ * All other results are not correct. */
+
+#include "mpi.h"
+#include <stdio.h>
+
+#define LOOP_SIZE 10000
+#define CHECK_TAG 123
+
+int main (int argc, char *argv[]) {
+    int rank, size, i, j, k;
+    int errors = 0;
+    int origin_shm, origin_am, dest;
+    int *orig_buf = NULL, *result_buf = NULL, *compare_buf = NULL,
+        *target_buf = NULL, *check_buf = NULL;
+    MPI_Win win;
+    MPI_Status status;
+
+    MPI_Init(&argc, &argv);
+
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    if (size != 3) {
+        /* run this test with three processes */
+        goto exit_test;
+    }
+
+    /* this works when MPIR_PARAM_CH3_ODD_EVEN_CLIQUES is set */
+    dest = 2;
+    origin_shm = 0;
+    origin_am = 1;
+
+    if (rank != dest) {
+        MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &orig_buf);
+        MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &result_buf);
+        MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &compare_buf);
+    }
+
+    MPI_Win_allocate(sizeof(int), sizeof(int), MPI_INFO_NULL,
+                     MPI_COMM_WORLD, &target_buf, &win);
+
+    for (k = 0; k < LOOP_SIZE; k++)  {
+
+        /* init buffers */
+        if (rank == origin_shm) {
+            orig_buf[0] = 1;
+            compare_buf[0] = 0;
+            result_buf[0] = 0;
+        }
+        else if (rank == origin_am) {
+            orig_buf[0] = 0;
+            compare_buf[0] = 1;
+            result_buf[0] = 0;
+        }
+        else {
+            MPI_Win_lock(MPI_LOCK_SHARED, rank, 0, win);
+            target_buf[0] = 0;
+            MPI_Win_unlock(rank, win);
+        }
+
+        MPI_Barrier(MPI_COMM_WORLD);
+
+        /* perform FOP */
+        MPI_Win_lock_all(0, win);
+        if (rank != dest) {
+            MPI_Compare_and_swap(orig_buf, compare_buf, result_buf, MPI_INT, dest, 0, win);
+            MPI_Win_flush(dest, win);
+        }
+        MPI_Win_unlock_all(win);
+
+        MPI_Barrier(MPI_COMM_WORLD);
+
+        /* check results */
+        if (rank != dest) {
+            MPI_Gather(result_buf, 1, MPI_INT, check_buf, 1, MPI_INT, dest, MPI_COMM_WORLD);
+        }
+        else {
+            MPI_Alloc_mem(sizeof(int) * 3, MPI_INFO_NULL, &check_buf);
+            MPI_Gather(target_buf, 1, MPI_INT, check_buf, 1, MPI_INT, dest, MPI_COMM_WORLD);
+
+            if (!(check_buf[dest] == 0 && check_buf[origin_shm] == 0 && check_buf[origin_am] == 1) &&
+                !(check_buf[dest] == 1 && check_buf[origin_shm] == 0 && check_buf[origin_am] == 0)) {
+
+                printf("Wrong results: target result = %d, origin_shm result = %d, origin_am result = %d\n",
+                       check_buf[dest], check_buf[origin_shm], check_buf[origin_am]);
+
+                printf("Expected results (1): target result = 1, origin_shm result = 0, origin_am result = 0\n");
+                printf("Expected results (2): target result = 0, origin_shm result = 0, origin_am result = 1\n");
+
+                errors++;
+            }
+
+            MPI_Free_mem(check_buf);
+        }
+    }
+
+    MPI_Win_free(&win);
+
+    if (rank == origin_am || rank == origin_shm) {
+        MPI_Free_mem(orig_buf);
+        MPI_Free_mem(result_buf);
+        MPI_Free_mem(compare_buf);
+    }
+
+ exit_test:
+    if (rank == dest && errors == 0)
+        printf(" No Errors\n");
+
+    MPI_Finalize();
+    return 0;
+}
diff --git a/test/mpi/rma/atomic_rmw_fop.c b/test/mpi/rma/atomic_rmw_fop.c
new file mode 100644
index 0000000..873efe8
--- /dev/null
+++ b/test/mpi/rma/atomic_rmw_fop.c
@@ -0,0 +1,131 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ *  (C) 2015 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+/* This test is going to test the atomicity for "read-modify-write" in FOP
+ * operations */
+
+/* There are three processes involved in this test: P0 (origin_shm), P1 (origin_am),
+ * and P2 (dest). P0 and P1 issues multiple FOP with MPI_SUM and integer (value 1)
+ * to P2 via SHM and AM respectively. The correct results should be that the
+ * results on P0 and P1 never be the same. */
+
+#include "mpi.h"
+#include <stdio.h>
+
+#define AM_BUF_SIZE  10
+#define SHM_BUF_SIZE 1000
+#define WIN_BUF_SIZE 1
+
+#define LOOP_SIZE 15
+#define CHECK_TAG 123
+
+int main (int argc, char *argv[]) {
+    int rank, size, i, j, k;
+    int errors = 0, all_errors = 0;
+    int origin_shm, origin_am, dest;
+    int my_buf_size;
+    int *orig_buf = NULL, *result_buf = NULL, *target_buf = NULL, *check_buf = NULL;
+    MPI_Win win;
+    MPI_Status status;
+
+    MPI_Init(&argc, &argv);
+
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    if (size != 3) {
+        /* run this test with three processes */
+        goto exit_test;
+    }
+
+    /* this works when MPIR_PARAM_CH3_ODD_EVEN_CLIQUES is set */
+    dest = 2;
+    origin_shm = 0;
+    origin_am = 1;
+
+    if (rank == origin_am) my_buf_size = AM_BUF_SIZE;
+    else if (rank == origin_shm) my_buf_size = SHM_BUF_SIZE;
+
+    if (rank != dest) {
+        MPI_Alloc_mem(sizeof(int) * my_buf_size, MPI_INFO_NULL, &orig_buf);
+        MPI_Alloc_mem(sizeof(int) * my_buf_size, MPI_INFO_NULL, &result_buf);
+    }
+
+    MPI_Win_allocate(sizeof(int) * WIN_BUF_SIZE, sizeof(int), MPI_INFO_NULL,
+                     MPI_COMM_WORLD, &target_buf, &win);
+
+    for (k = 0; k < LOOP_SIZE; k++)  {
+
+        /* init buffers */
+        if (rank != dest) {
+            for (i = 0; i < my_buf_size; i++) {orig_buf[i] = 1; result_buf[i] = 0;}
+        }
+        else {
+            MPI_Win_lock(MPI_LOCK_SHARED, rank, 0, win);
+            for (i = 0; i < WIN_BUF_SIZE; i++) {target_buf[i] = 0;}
+            MPI_Win_unlock(rank, win);
+        }
+
+        MPI_Barrier(MPI_COMM_WORLD);
+
+        /* perform FOP */
+        MPI_Win_lock_all(0, win);
+        if (rank != dest) {
+            for (i = 0; i < my_buf_size; i++) {
+                MPI_Fetch_and_op(&(orig_buf[i]), &(result_buf[i]), MPI_INT, dest, 0, MPI_SUM, win);
+                MPI_Win_flush(dest, win);
+            }
+        }
+        MPI_Win_unlock_all(win);
+
+        MPI_Barrier(MPI_COMM_WORLD);
+
+        if (rank != dest) {
+            /* check results on P0 and P2 (origin) */
+            if (rank == origin_am) {
+                MPI_Send(result_buf, AM_BUF_SIZE, MPI_INT, origin_shm, CHECK_TAG, MPI_COMM_WORLD);
+            }
+            else if (rank == origin_shm) {
+                MPI_Alloc_mem(sizeof(int) * AM_BUF_SIZE, MPI_INFO_NULL, &check_buf);
+                MPI_Recv(check_buf, AM_BUF_SIZE, MPI_INT, origin_am, CHECK_TAG, MPI_COMM_WORLD, &status);
+                for (i = 0; i < AM_BUF_SIZE; i++) {
+                    for (j = 0; j < SHM_BUF_SIZE; j++) {
+                        if (check_buf[i] == result_buf[j]) {
+                            printf("LOOP=%d, rank=%d, FOP, both check_buf[%d] and result_buf[%d] equal to %d, expected to be different. \n",
+                                   k, rank, i, j, check_buf[i]);
+                            errors++;
+                        }
+                    }
+                }
+                MPI_Free_mem(check_buf);
+            }
+        }
+        else {
+            /* check results on P1 */
+            if (target_buf[0] != AM_BUF_SIZE + SHM_BUF_SIZE) {
+                printf("LOOP=%d, rank=%d, FOP, target_buf[0] = %d, expected %d. \n",
+                       k, rank, target_buf[0], AM_BUF_SIZE+SHM_BUF_SIZE);
+                errors++;
+            }
+        }
+    }
+
+    MPI_Win_free(&win);
+
+    if (rank == origin_am || rank == origin_shm) {
+        MPI_Free_mem(orig_buf);
+        MPI_Free_mem(result_buf);
+    }
+
+ exit_test:
+    MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+
+    if (rank == 0 && all_errors == 0)
+        printf(" No Errors\n");
+
+    MPI_Finalize();
+    return 0;
+}
diff --git a/test/mpi/rma/atomic_rmw_gacc.c b/test/mpi/rma/atomic_rmw_gacc.c
new file mode 100644
index 0000000..d04e9c3
--- /dev/null
+++ b/test/mpi/rma/atomic_rmw_gacc.c
@@ -0,0 +1,240 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ *  (C) 2015 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+/* This test is going to test the atomicity for "read-modify-write" in GACC
+ * operations */
+
+/* This test is similiar with atomic_rmw_fop.c.
+ * There are three processes involved in this test: P0 (origin_shm), P1 (origin_am),
+ * and P2 (dest). P0 and P1 issues multiple GACC with MPI_SUM and OP_COUNT integers
+ * (value 1) to P2 via SHM and AM respectively. The correct results should be that the
+ * results on P0 and P1 never be the same for intergers on the corresponding index
+ * in [0...OP_COUNT-1].
+ */
+
+#include "mpi.h"
+#include <stdio.h>
+
+#define OP_COUNT 10
+#define AM_BUF_NUM  10
+#define SHM_BUF_NUM 10000
+#define WIN_BUF_NUM 1
+
+#define LOOP_SIZE 15
+#define CHECK_TAG 123
+
+int rank, size;
+int dest, origin_shm, origin_am;
+int *orig_buf = NULL, *result_buf = NULL, *target_buf = NULL, *check_buf = NULL;
+
+void checkResults(int loop_k, int *errors) {
+    int i, j, m;
+    MPI_Status status;
+
+    if (rank != dest) {
+        /* check results on P0 and P2 (origin) */
+        if (rank == origin_am) {
+            MPI_Send(result_buf, AM_BUF_NUM * OP_COUNT, MPI_INT, origin_shm, CHECK_TAG, MPI_COMM_WORLD);
+        }
+        else if (rank == origin_shm) {
+            MPI_Alloc_mem(sizeof(int) * AM_BUF_NUM * OP_COUNT, MPI_INFO_NULL, &check_buf);
+            MPI_Recv(check_buf, AM_BUF_NUM * OP_COUNT, MPI_INT, origin_am, CHECK_TAG, MPI_COMM_WORLD, &status);
+            for (i = 0; i < AM_BUF_NUM; i++) {
+                for (j = 0; j < SHM_BUF_NUM; j++) {
+                    for (m = 0; m < OP_COUNT; m++) {
+                        if (check_buf[i*OP_COUNT+m] == result_buf[j*OP_COUNT+m]) {
+                            printf("LOOP=%d, rank=%d, FOP, both check_buf[%d] and result_buf[%d] equal to %d, expected to be different. \n",
+                                   loop_k, rank, i*OP_COUNT+m, j*OP_COUNT+m, check_buf[i*OP_COUNT+m]);
+                            (*errors)++;
+                        }
+                    }
+                }
+            }
+            MPI_Free_mem(check_buf);
+        }
+    }
+    else {
+        /* check results on P1 */
+        for (i = 0; i < OP_COUNT; i++) {
+            if (target_buf[i] != AM_BUF_NUM + SHM_BUF_NUM) {
+                printf("LOOP=%d, rank=%d, FOP, target_buf[%d] = %d, expected %d. \n",
+                       loop_k, rank, i, target_buf[i], AM_BUF_NUM+SHM_BUF_NUM);
+                (*errors)++;
+            }
+        }
+    }
+}
+
+int main (int argc, char *argv[]) {
+    int i, j, k;
+    int errors = 0, all_errors = 0;
+    int my_buf_num;
+    MPI_Win win;
+    MPI_Datatype origin_dtp, target_dtp;
+
+    MPI_Init(&argc, &argv);
+
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    if (size != 3) {
+        /* run this test with three processes */
+        goto exit_test;
+    }
+
+    MPI_Type_contiguous(OP_COUNT, MPI_INT, &origin_dtp);
+    MPI_Type_commit(&origin_dtp);
+    MPI_Type_contiguous(OP_COUNT, MPI_INT, &target_dtp);
+    MPI_Type_commit(&target_dtp);
+
+    /* this works when MPIR_PARAM_CH3_ODD_EVEN_CLIQUES is set */
+    dest = 2;
+    origin_shm = 0;
+    origin_am = 1;
+
+    if (rank == origin_am) my_buf_num = AM_BUF_NUM;
+    else if (rank == origin_shm) my_buf_num = SHM_BUF_NUM;
+
+    if (rank != dest) {
+        MPI_Alloc_mem(sizeof(int) * my_buf_num * OP_COUNT, MPI_INFO_NULL, &orig_buf);
+        MPI_Alloc_mem(sizeof(int) * my_buf_num * OP_COUNT, MPI_INFO_NULL, &result_buf);
+    }
+
+    MPI_Win_allocate(sizeof(int) * WIN_BUF_NUM * OP_COUNT, sizeof(int), MPI_INFO_NULL,
+                     MPI_COMM_WORLD, &target_buf, &win);
+
+    for (k = 0; k < LOOP_SIZE; k++)  {
+
+        /* ====== Part 1: test basic datatypes ======== */
+
+        /* init buffers */
+        if (rank != dest) {
+            for (i = 0; i < my_buf_num * OP_COUNT; i++) {orig_buf[i] = 1; result_buf[i] = 0;}
+        }
+        else {
+            MPI_Win_lock(MPI_LOCK_SHARED, rank, 0, win);
+            for (i = 0; i < WIN_BUF_NUM * OP_COUNT; i++) {target_buf[i] = 0;}
+            MPI_Win_unlock(rank, win);
+        }
+
+        MPI_Barrier(MPI_COMM_WORLD);
+
+        MPI_Win_lock_all(0, win);
+        if (rank != dest) {
+            for (i = 0; i < my_buf_num; i++) {
+                MPI_Get_accumulate(&(orig_buf[i*OP_COUNT]), OP_COUNT, MPI_INT,
+                                   &(result_buf[i*OP_COUNT]), OP_COUNT, MPI_INT,
+                                   dest, 0, OP_COUNT, MPI_INT, MPI_SUM, win);
+                MPI_Win_flush(dest, win);
+            }
+        }
+        MPI_Win_unlock_all(win);
+
+        MPI_Barrier(MPI_COMM_WORLD);
+
+        checkResults(k, &errors);
+
+        /* ====== Part 2: test derived datatypes (origin derived, target derived) ======== */
+
+        /* init buffers */
+        if (rank != dest) {
+            for (i = 0; i < my_buf_num * OP_COUNT; i++) {orig_buf[i] = 1; result_buf[i] = 0;}
+        }
+        else {
+            MPI_Win_lock(MPI_LOCK_SHARED, rank, 0, win);
+            for (i = 0; i < WIN_BUF_NUM * OP_COUNT; i++) {target_buf[i] = 0;}
+            MPI_Win_unlock(rank, win);
+        }
+
+                MPI_Win_lock_all(0, win);
+        if (rank != dest) {
+            for (i = 0; i < my_buf_num; i++) {
+                MPI_Get_accumulate(&(orig_buf[i*OP_COUNT]), 1, origin_dtp,
+                                   &(result_buf[i*OP_COUNT]), 1, origin_dtp,
+                                   dest, 0, 1, target_dtp, MPI_SUM, win);
+                MPI_Win_flush(dest, win);
+            }
+        }
+        MPI_Win_unlock_all(win);
+
+        MPI_Barrier(MPI_COMM_WORLD);
+
+        checkResults(k, &errors);
+
+        /* ====== Part 3: test derived datatypes (origin basic, target derived) ======== */
+
+        /* init buffers */
+        if (rank != dest) {
+            for (i = 0; i < my_buf_num * OP_COUNT; i++) {orig_buf[i] = 1; result_buf[i] = 0;}
+        }
+        else {
+            MPI_Win_lock(MPI_LOCK_SHARED, rank, 0, win);
+            for (i = 0; i < WIN_BUF_NUM * OP_COUNT; i++) {target_buf[i] = 0;}
+            MPI_Win_unlock(rank, win);
+        }
+
+                MPI_Win_lock_all(0, win);
+        if (rank != dest) {
+            for (i = 0; i < my_buf_num; i++) {
+                MPI_Get_accumulate(&(orig_buf[i*OP_COUNT]), OP_COUNT, MPI_INT,
+                                   &(result_buf[i*OP_COUNT]), OP_COUNT, MPI_INT,
+                                   dest, 0, 1, target_dtp, MPI_SUM, win);
+                MPI_Win_flush(dest, win);
+            }
+        }
+        MPI_Win_unlock_all(win);
+
+        MPI_Barrier(MPI_COMM_WORLD);
+
+        checkResults(k, &errors);
+
+        /* ====== Part 4: test derived datatypes (origin derived target basic) ======== */
+
+        /* init buffers */
+        if (rank != dest) {
+            for (i = 0; i < my_buf_num * OP_COUNT; i++) {orig_buf[i] = 1; result_buf[i] = 0;}
+        }
+        else {
+            MPI_Win_lock(MPI_LOCK_SHARED, rank, 0, win);
+            for (i = 0; i < WIN_BUF_NUM * OP_COUNT; i++) {target_buf[i] = 0;}
+            MPI_Win_unlock(rank, win);
+        }
+
+                MPI_Win_lock_all(0, win);
+        if (rank != dest) {
+            for (i = 0; i < my_buf_num; i++) {
+                MPI_Get_accumulate(&(orig_buf[i*OP_COUNT]), 1, origin_dtp,
+                                   &(result_buf[i*OP_COUNT]), 1, origin_dtp,
+                                   dest, 0, OP_COUNT, MPI_INT, MPI_SUM, win);
+                MPI_Win_flush(dest, win);
+            }
+        }
+        MPI_Win_unlock_all(win);
+
+        MPI_Barrier(MPI_COMM_WORLD);
+
+        checkResults(k, &errors);
+    }
+
+    MPI_Win_free(&win);
+
+    if (rank == origin_am || rank == origin_shm) {
+        MPI_Free_mem(orig_buf);
+        MPI_Free_mem(result_buf);
+    }
+
+    MPI_Type_free(&origin_dtp);
+    MPI_Type_free(&target_dtp);
+
+ exit_test:
+    MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+
+    if (rank == 0 && all_errors == 0)
+        printf(" No Errors\n");
+
+    MPI_Finalize();
+    return 0;
+}
diff --git a/test/mpi/rma/testlist.in b/test/mpi/rma/testlist.in
index 9a8a964..d7c8835 100644
--- a/test/mpi/rma/testlist.in
+++ b/test/mpi/rma/testlist.in
@@ -128,6 +128,9 @@ win_shared_zerobyte 4 mpiversion=3.0
 win_shared_put_flush_get 4 mpiversion=3.0
 get-struct 2
 at_complete 2
+atomic_rmw_fop 3
+atomic_rmw_cas 3
+atomic_rmw_gacc 3
 @mpix@ aint 2 strict=false
 
 ## This test is not strictly correct.  This was meant to test out the

http://git.mpich.org/mpich.git/commitdiff/bad898f9df13b5060cbf43ee4acdb3b7b4c9a0f7

commit bad898f9df13b5060cbf43ee4acdb3b7b4c9a0f7
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Feb 4 05:21:04 2015 -0800

    Bug-fix: guarantee atomicity for FOP and GACC.
    
    FOP, CAS and GACC are atomic "read-modify-write" operations,
    which means when the target window is defined on a SHM region,
    we need inter-process lock to guarantee the atomicity of the
    entire "read+OP". The current implementation is correct for
    SHM-based RMA operations, but not correct for AM-based RMA
    operations: for SHM-based operations, it protects the entire
    "read+OP", but for AM-based operations, it only protects the
    "OP" part.
    
    This patch fixes this issue by protecting the memory copy to
    temporary buffer and computation together for AM-based operations.
    
    Fix ticket 2226
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index c2e90b6..79dc318 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -255,6 +255,9 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete( MPIDI_VC_t *vc,
     MPIU_CHKPMEM_MALLOC(resp_req->dev.user_buf, void *, rreq->dev.user_count * type_size,
                         mpi_errno, "GACC resp. buffer");
 
+    if (win_ptr->shm_allocated == TRUE)
+        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
+
     if (MPIR_DATATYPE_IS_PREDEFINED(rreq->dev.datatype)) {
         MPIU_Memcpy(resp_req->dev.user_buf, rreq->dev.real_user_buf,
                     rreq->dev.user_count * type_size);
@@ -268,6 +271,16 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete( MPIDI_VC_t *vc,
         MPID_Segment_free(seg);
     }
 
+    /* accumulate data from tmp_buf into user_buf */
+    mpi_errno = do_accumulate_op(rreq->dev.final_user_buf, rreq->dev.real_user_buf,
+                                 rreq->dev.user_count, rreq->dev.datatype, rreq->dev.op);
+    if (mpi_errno) {
+        MPIU_ERR_POP(mpi_errno);
+    }
+
+    if (win_ptr->shm_allocated == TRUE)
+        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
+
     resp_req->dev.OnFinal = MPIDI_CH3_ReqHandler_GaccumSendComplete;
     resp_req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_GaccumSendComplete;
     resp_req->dev.target_win_handle = rreq->dev.target_win_handle;
@@ -322,17 +335,6 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete( MPIDI_VC_t *vc,
 
     MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_GET_ACCUM_RESP);
 
-    if (win_ptr->shm_allocated == TRUE)
-        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
-    /* accumulate data from tmp_buf into user_buf */
-    mpi_errno = do_accumulate_op(rreq->dev.final_user_buf, rreq->dev.real_user_buf,
-                                 rreq->dev.user_count, rreq->dev.datatype, rreq->dev.op);
-    if (win_ptr->shm_allocated == TRUE)
-        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
-    if (mpi_errno) {
-        MPIU_ERR_POP(mpi_errno);
-    }
-
     /* free the temporary buffer */
     MPIR_Type_get_true_extent_impl(rreq->dev.datatype, &true_lb, &true_extent);
     MPIU_Free((char *) rreq->dev.final_user_buf + true_lb);
@@ -1023,6 +1025,10 @@ static inline int perform_get_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Loc
     MPID_Datatype_get_size_macro(get_accum_pkt->datatype, type_size);
     sreq->dev.user_buf = (void *)MPIU_Malloc(get_accum_pkt->count * type_size);
 
+    /* Perform ACCUMULATE OP */
+    if (win_ptr->shm_allocated == TRUE)
+        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
+
     if (MPIR_DATATYPE_IS_PREDEFINED(get_accum_pkt->datatype)) {
         MPIU_Memcpy(sreq->dev.user_buf, get_accum_pkt->addr,
                     get_accum_pkt->count * type_size);
@@ -1037,6 +1043,21 @@ static inline int perform_get_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Loc
         MPID_Segment_free(seg);
     }
 
+    if (lock_entry->data == NULL) {
+        /* All data fits in packet header */
+        mpi_errno = do_accumulate_op(get_accum_pkt->data, get_accum_pkt->addr,
+                                     get_accum_pkt->count, get_accum_pkt->datatype, get_accum_pkt->op);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    }
+    else {
+        mpi_errno = do_accumulate_op(lock_entry->data, get_accum_pkt->addr,
+                                     get_accum_pkt->count, get_accum_pkt->datatype, get_accum_pkt->op);
+        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    }
+
+    if (win_ptr->shm_allocated == TRUE)
+        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
+
     /* here we increment the Active Target counter to guarantee the GET-like
        operation are completed when counter reaches zero. */
     win_ptr->at_completion_counter++;
@@ -1096,25 +1117,6 @@ static inline int perform_get_acc_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Loc
 	MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
     }
 
-    /* Perform ACCUMULATE OP */
-    if (win_ptr->shm_allocated == TRUE)
-        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
-
-    if (lock_entry->data == NULL) {
-        /* All data fits in packet header */
-        mpi_errno = do_accumulate_op(get_accum_pkt->data, get_accum_pkt->addr,
-                                     get_accum_pkt->count, get_accum_pkt->datatype, get_accum_pkt->op);
-        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-    }
-    else {
-        mpi_errno = do_accumulate_op(lock_entry->data, get_accum_pkt->addr,
-                                     get_accum_pkt->count, get_accum_pkt->datatype, get_accum_pkt->op);
-        if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-    }
-
-    if (win_ptr->shm_allocated == TRUE)
-        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
-
  fn_exit:
     return mpi_errno;
  fn_fail:
@@ -1149,6 +1151,9 @@ static inline int perform_fop_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_en
         fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
     fop_resp_pkt->immed_len = fop_pkt->immed_len;
 
+    if (win_ptr->shm_allocated == TRUE)
+        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
+
     /* copy data to resp pkt header */
     void *src = fop_pkt->addr, *dest = fop_resp_pkt->data;
     mpi_errno = immed_copy(src, dest, (size_t)fop_resp_pkt->immed_len);
@@ -1158,13 +1163,12 @@ static inline int perform_fop_in_lock_queue(MPID_Win *win_ptr, MPIDI_RMA_Lock_en
     if (fop_pkt->op != MPI_NO_OP) {
         MPI_User_function *uop = MPIR_OP_HDL_TO_FN(fop_pkt->op);
         int one = 1;
-        if (win_ptr->shm_allocated == TRUE)
-            MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
         (*uop)(fop_pkt->data, fop_pkt->addr, &one, &(fop_pkt->datatype));
-        if (win_ptr->shm_allocated == TRUE)
-            MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
     }
 
+    if (win_ptr->shm_allocated == TRUE)
+        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
+
     /* send back the original data */
     MPIU_THREAD_CS_ENTER(CH3COMM,vc);
     mpi_errno = MPIDI_CH3_iStartMsg(vc, fop_resp_pkt, sizeof(*fop_resp_pkt), &resp_req);
diff --git a/src/mpid/ch3/src/ch3u_rma_pkthandler.c b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
index 76ac30d..a64e97c 100644
--- a/src/mpid/ch3/src/ch3u_rma_pkthandler.c
+++ b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
@@ -1098,6 +1098,9 @@ int MPIDI_CH3_PktHandler_FOP(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         fop_resp_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH_ACK;
     fop_resp_pkt->immed_len = fop_pkt->immed_len;
 
+    if (win_ptr->shm_allocated == TRUE)
+        MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
+
     /* copy data to resp pkt header */
     void *src = fop_pkt->addr, *dest = fop_resp_pkt->data;
     mpi_errno = immed_copy(src, dest, (size_t)fop_resp_pkt->immed_len);
@@ -1107,13 +1110,12 @@ int MPIDI_CH3_PktHandler_FOP(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     if (fop_pkt->op != MPI_NO_OP) {
         MPI_User_function *uop = MPIR_OP_HDL_TO_FN(fop_pkt->op);
         int one = 1;
-        if (win_ptr->shm_allocated == TRUE)
-            MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
         (*uop)(fop_pkt->data, fop_pkt->addr, &one, &(fop_pkt->datatype));
-        if (win_ptr->shm_allocated == TRUE)
-            MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
     }
 
+    if (win_ptr->shm_allocated == TRUE)
+        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
+
     /* send back the original data */
     MPIU_THREAD_CS_ENTER(CH3COMM,vc);
     mpi_errno = MPIDI_CH3_iStartMsg(vc, fop_resp_pkt, sizeof(*fop_resp_pkt), &resp_req);

http://git.mpich.org/mpich.git/commitdiff/65bf0d776e8932e7acb26714ee5a8c01ebf004ca

commit 65bf0d776e8932e7acb26714ee5a8c01ebf004ca
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Feb 4 16:48:40 2015 -0800

    Add tests to test cases when both SHM window and non-SHM window exist.
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/test/mpi/rma/testlist.in b/test/mpi/rma/testlist.in
index 32cd86b..9a8a964 100644
--- a/test/mpi/rma/testlist.in
+++ b/test/mpi/rma/testlist.in
@@ -71,6 +71,7 @@ contention_putget 4
 put_base 2
 put_bottom 2
 win_flavors 4 mpiversion=3.0
+win_flavors 3 mpiversion=3.0
 manyrma2 2 timeLimit=500
 manyrma2_shm 2 timeLimit=500
 manyrma3 2
@@ -81,6 +82,7 @@ win_shared_noncontig 4 mpiversion=3.0
 win_shared_noncontig_put 4 mpiversion=3.0
 win_zero 4 mpiversion=3.0
 @largetest at win_large_shm 4 mpiversion=3.0
+ at largetest@win_large_shm 3 mpiversion=3.0
 win_dynamic_acc 4 mpiversion=3.0
 get_acc_local 1 mpiversion=3.0
 linked_list 4 mpiversion=3.0
diff --git a/test/mpi/rma/win_large_shm.c b/test/mpi/rma/win_large_shm.c
index fe730d2..583eab2 100644
--- a/test/mpi/rma/win_large_shm.c
+++ b/test/mpi/rma/win_large_shm.c
@@ -17,62 +17,71 @@ int main(int argc, char **argv) {
     MPI_Win win;
     MPI_Info win_info;
     MPI_Comm shared_comm;
+    int i;
     int shm_win_size = 1024 * 1024 * 1024 * sizeof(char); /* 1GB */
 
     MPI_Init(&argc, &argv);
 
     MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
 
-    MPI_Info_create(&win_info);
-    MPI_Info_set(win_info, (char*)"alloc_shm", (char*)"true");
+    for (i = 0; i < 2; i++) {
+        if (i == 0) {
+            MPI_Info_create(&win_info);
+            MPI_Info_set(win_info, (char*)"alloc_shm", (char*)"true");
+        }
+        else {
+            win_info = MPI_INFO_NULL;
+        }
 
-    MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, my_rank, MPI_INFO_NULL, &shared_comm);
+        MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, my_rank, MPI_INFO_NULL, &shared_comm);
 
-    MPI_Comm_rank(shared_comm, &shared_rank);
+        MPI_Comm_rank(shared_comm, &shared_rank);
 
-    /* every processes allocate 1GB window memory */
-    MPI_Win_allocate(shm_win_size, sizeof(char), win_info, MPI_COMM_WORLD, &mybase, &win);
+        /* every processes allocate 1GB window memory */
+        MPI_Win_allocate(shm_win_size, sizeof(char), win_info, MPI_COMM_WORLD, &mybase, &win);
 
-    MPI_Win_free(&win);
+        MPI_Win_free(&win);
 
-    MPI_Win_allocate_shared(shm_win_size, sizeof(char), win_info, shared_comm, &mybase, &win);
+        MPI_Win_allocate_shared(shm_win_size, sizeof(char), win_info, shared_comm, &mybase, &win);
 
-    MPI_Win_free(&win);
+        MPI_Win_free(&win);
 
-    /* some processes allocate 1GB and some processes allocate zero bytes */
-    if (my_rank % 2 == 0)
-        MPI_Win_allocate(shm_win_size, sizeof(char), win_info, MPI_COMM_WORLD, &mybase, &win);
-    else
-        MPI_Win_allocate(0, sizeof(char), win_info, MPI_COMM_WORLD, &mybase, &win);
+        /* some processes allocate 1GB and some processes allocate zero bytes */
+        if (my_rank % 2 == 0)
+            MPI_Win_allocate(shm_win_size, sizeof(char), win_info, MPI_COMM_WORLD, &mybase, &win);
+        else
+            MPI_Win_allocate(0, sizeof(char), win_info, MPI_COMM_WORLD, &mybase, &win);
 
-    MPI_Win_free(&win);
+        MPI_Win_free(&win);
 
-    if (shared_rank % 2 == 0)
-        MPI_Win_allocate_shared(shm_win_size, sizeof(char), win_info, shared_comm, &mybase, &win);
-    else
-        MPI_Win_allocate_shared(0, sizeof(char), win_info, shared_comm, &mybase, &win);
+        if (shared_rank % 2 == 0)
+            MPI_Win_allocate_shared(shm_win_size, sizeof(char), win_info, shared_comm, &mybase, &win);
+        else
+            MPI_Win_allocate_shared(0, sizeof(char), win_info, shared_comm, &mybase, &win);
 
-    MPI_Win_free(&win);
+        MPI_Win_free(&win);
 
-    /* some processes allocate 1GB and some processes allocate smaller bytes */
-    if (my_rank % 2 == 0)
-        MPI_Win_allocate(shm_win_size, sizeof(char), win_info, MPI_COMM_WORLD, &mybase, &win);
-    else
-        MPI_Win_allocate(shm_win_size/2, sizeof(char), win_info, MPI_COMM_WORLD, &mybase, &win);
+        /* some processes allocate 1GB and some processes allocate smaller bytes */
+        if (my_rank % 2 == 0)
+            MPI_Win_allocate(shm_win_size, sizeof(char), win_info, MPI_COMM_WORLD, &mybase, &win);
+        else
+            MPI_Win_allocate(shm_win_size/2, sizeof(char), win_info, MPI_COMM_WORLD, &mybase, &win);
 
-    MPI_Win_free(&win);
+        MPI_Win_free(&win);
 
-    /* some processes allocate 1GB and some processes allocate smaller bytes */
-    if (shared_rank % 2 == 0)
-        MPI_Win_allocate_shared(shm_win_size, sizeof(char), win_info, shared_comm, &mybase, &win);
-    else
-        MPI_Win_allocate_shared(shm_win_size/2, sizeof(char), win_info, shared_comm, &mybase, &win);
+        /* some processes allocate 1GB and some processes allocate smaller bytes */
+        if (shared_rank % 2 == 0)
+            MPI_Win_allocate_shared(shm_win_size, sizeof(char), win_info, shared_comm, &mybase, &win);
+        else
+            MPI_Win_allocate_shared(shm_win_size/2, sizeof(char), win_info, shared_comm, &mybase, &win);
 
-    MPI_Win_free(&win);
+        MPI_Win_free(&win);
 
-    MPI_Comm_free(&shared_comm);
+        MPI_Comm_free(&shared_comm);
 
-    MPI_Info_free(&win_info);
+        if (i == 0)
+            MPI_Info_free(&win_info);
+    }
 
     if (my_rank == 0)
         printf(" No Errors\n");

http://git.mpich.org/mpich.git/commitdiff/8c5cb1e67248e29a7a5e4523b5adcfa044ab8a93

commit 8c5cb1e67248e29a7a5e4523b5adcfa044ab8a93
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Feb 4 16:36:57 2015 -0800

    Bug-fix: making processes with SHM and without SHM win work corrrectly.
    
    In commit 7d71278, if node_comm is NULL (only self process is on that
    node), we call allocate_no_shm() in CH3 to allocate window. If
    node_comm is not NULL (more than one process is on the same node), we
    call allocate_shm() in Nemesis to allocate SHM window. However,
    the exchanged information amount (in MPI_Allgather) is different
    in allocate_no_shm() and allocate_shm(), which leads to wrong execution
    when both SHM window and non-SHM window exist. This patch fixes this issue.
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c b/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
index 8348475..288213f 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
@@ -319,46 +319,19 @@ static int MPIDI_CH3I_Win_allocate_shm(MPI_Aint size, int disp_unit, MPID_Info *
 
     /* get the sizes of the windows and window objectsof
        all processes.  allocate temp. buffer for communication */
-    MPIU_CHKLMEM_MALLOC(tmp_buf, MPI_Aint *, 3*comm_size*sizeof(MPI_Aint), mpi_errno, "tmp_buf");
+    MPIU_CHKLMEM_MALLOC(node_sizes, MPI_Aint *, node_size*sizeof(MPI_Aint), mpi_errno, "node_sizes");
 
     /* FIXME: This needs to be fixed for heterogeneous systems */
-    tmp_buf[3*rank]   = (MPI_Aint) size;
-    tmp_buf[3*rank+1] = (MPI_Aint) disp_unit;
-    tmp_buf[3*rank+2] = (MPI_Aint) (*win_ptr)->handle;
+    node_sizes[node_rank]   = (MPI_Aint) size;
 
     mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
-                                    tmp_buf, 3 * sizeof(MPI_Aint), MPI_BYTE,
-                                    (*win_ptr)->comm_ptr, &errflag);
+                                    node_sizes, sizeof(MPI_Aint), MPI_BYTE,
+                                    node_comm_ptr, &errflag);
     MPIR_T_PVAR_TIMER_END(RMA, rma_wincreate_allgather);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
-    if ((*win_ptr)->create_flavor != MPI_WIN_FLAVOR_SHARED) {
-        MPIU_CHKLMEM_MALLOC(node_sizes, MPI_Aint *, node_size*sizeof(MPI_Aint), mpi_errno, "node_sizes");
-        for (i = 0; i < node_size; i++) node_sizes[i] = 0;
-    }
-    else {
-        node_sizes = (*win_ptr)->sizes;
-    }
-
     (*win_ptr)->shm_segment_len = 0;
-    k = 0;
-    for (i = 0; i < comm_size; ++i) {
-        (*win_ptr)->sizes[i]           = tmp_buf[k++];
-        (*win_ptr)->disp_units[i]      = (int) tmp_buf[k++];
-        (*win_ptr)->all_win_handles[i] = (MPI_Win) tmp_buf[k++];
-
-        if ((*win_ptr)->create_flavor != MPI_WIN_FLAVOR_SHARED) {
-            /* If create flavor is not MPI_WIN_FLAVOR_SHARED, all processes on this
-               window may not be on the same node. Because we only need the sizes of local
-               processes (in order), we copy their sizes to a seperate array and keep them
-               in order, fur purpose of future use of calculating shm_base_addrs. */
-            if ((*win_ptr)->comm_ptr->intranode_table[i] >= 0) {
-                MPIU_Assert((*win_ptr)->comm_ptr->intranode_table[i] < node_size);
-                node_sizes[(*win_ptr)->comm_ptr->intranode_table[i]] = (*win_ptr)->sizes[i];
-            }
-        }
-    }
 
     for (i = 0; i < node_size; i++) {
         if (noncontig)
@@ -529,18 +502,29 @@ static int MPIDI_CH3I_Win_allocate_shm(MPI_Aint size, int disp_unit, MPID_Info *
     (*win_ptr)->base = (*win_ptr)->shm_base_addrs[rank];
     }
 
+    MPIU_CHKLMEM_MALLOC(tmp_buf, MPI_Aint *, 4*comm_size*sizeof(MPI_Aint),
+                        mpi_errno, "tmp_buf");
+
     /* get the base addresses of the windows.  Note we reuse tmp_buf from above
        since it's at least as large as we need it for this allgather. */
-    tmp_buf[rank] = MPIU_PtrToAint((*win_ptr)->base);
+    tmp_buf[4*rank] = MPIU_PtrToAint((*win_ptr)->base);
+    tmp_buf[4*rank+1] = size;
+    tmp_buf[4*rank+2] = (MPI_Aint) disp_unit;
+    tmp_buf[4*rank+3] = (MPI_Aint) (*win_ptr)->handle;
 
     mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
-                                    tmp_buf, 1, MPI_AINT,
+                                    tmp_buf, 4, MPI_AINT,
                                     (*win_ptr)->comm_ptr, &errflag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
-    for (i = 0; i < comm_size; ++i)
-        (*win_ptr)->base_addrs[i] = MPIU_AintToPtr(tmp_buf[i]);
+    k = 0;
+    for (i = 0; i < comm_size; ++i) {
+        (*win_ptr)->base_addrs[i] = MPIU_AintToPtr(tmp_buf[k++]);
+        (*win_ptr)->sizes[i] = tmp_buf[k++];
+        (*win_ptr)->disp_units[i] = (int) tmp_buf[k++];
+        (*win_ptr)->all_win_handles[i] = (MPI_Win) tmp_buf[k++];
+    }
 
     *base_pp = (*win_ptr)->base;
 

http://git.mpich.org/mpich.git/commitdiff/346050ea58448a53995b1581bb82b291f4a2d5f7

commit 346050ea58448a53995b1581bb82b291f4a2d5f7
Author: Xin Zhao <xinzhao3 at illinois.edu>
Date:   Wed Feb 4 16:29:40 2015 -0800

    Delete unnecessary code in SHM allocate / free.
    
    We allocate / free SHM regions only when node_comm exists,
    which means there are more than one processes on the same
    node. When node_comm is NULL (only self process is on that
    node), we call default allocate / free functions in CH3.
    (Please refer to commit f02eed5b)
    
    Here we delete unnecessary code dealing with node_comm being
    NULL in SHM allocate / free functions.
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_rma_shm.c b/src/mpid/ch3/channels/nemesis/src/ch3_rma_shm.c
index fa4d906..63c85f3 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_rma_shm.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_rma_shm.c
@@ -107,10 +107,7 @@ int MPIDI_CH3_SHM_Win_free(MPID_Win **win_ptr)
            that are on the same node as this process (node_comm).
            If node_comm == NULL, this process is the only one on this node, therefore
            we use comm_self as node comm. */
-        if ((*win_ptr)->comm_ptr->node_comm != NULL)
-            node_comm_ptr = (*win_ptr)->comm_ptr->node_comm;
-        else
-            node_comm_ptr = MPIR_Process.comm_self;
+        node_comm_ptr = (*win_ptr)->comm_ptr->node_comm;
         MPIU_Assert(node_comm_ptr != NULL);
 
         if (node_comm_ptr->rank == 0) {
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c b/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
index efd16cb..8348475 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
@@ -291,10 +291,7 @@ static int MPIDI_CH3I_Win_allocate_shm(MPI_Aint size, int disp_unit, MPID_Info *
        that are on the same node as this process (node_comm).
        If node_comm == NULL, this process is the only one on this node, therefore
        we use comm_self as node comm. */
-    if ((*win_ptr)->comm_ptr->node_comm != NULL)
-        node_comm_ptr = (*win_ptr)->comm_ptr->node_comm;
-    else
-        node_comm_ptr = MPIR_Process.comm_self;
+    node_comm_ptr = (*win_ptr)->comm_ptr->node_comm;
     MPIU_Assert(node_comm_ptr != NULL);
     node_size = node_comm_ptr->local_size;
     node_rank = node_comm_ptr->rank;

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/ch3/channels/nemesis/src/ch3_rma_shm.c |    5 +-
 src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c |   59 ++----
 src/mpid/ch3/src/ch3u_handle_recv_req.c         |   72 ++++----
 src/mpid/ch3/src/ch3u_rma_pkthandler.c          |   10 +-
 test/mpi/rma/Makefile.am                        |    5 +-
 test/mpi/rma/atomic_rmw_cas.c                   |  129 ++++++++++++
 test/mpi/rma/atomic_rmw_fop.c                   |  131 ++++++++++++
 test/mpi/rma/atomic_rmw_gacc.c                  |  240 +++++++++++++++++++++++
 test/mpi/rma/testlist.in                        |    5 +
 test/mpi/rma/win_large_shm.c                    |   77 ++++----
 10 files changed, 617 insertions(+), 116 deletions(-)
 create mode 100644 test/mpi/rma/atomic_rmw_cas.c
 create mode 100644 test/mpi/rma/atomic_rmw_fop.c
 create mode 100644 test/mpi/rma/atomic_rmw_gacc.c


hooks/post-receive
-- 
MPICH primary repository