[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.1-3-ge6ddea1
Service Account
noreply at mpich.org
Fri Jun 6 14:12:47 CDT 2014
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".
The branch, master has been updated
via e6ddea13901a9f718af9108638f4755327442e14 (commit)
via 66c07f53bd389af488adcf421f28a7d9e6660ce2 (commit)
via cd168292eed8ee98fd15554d6109fda6f860d150 (commit)
from d39b7f534d72ca862fea1ec80894f5633bc8d926 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/e6ddea13901a9f718af9108638f4755327442e14
commit e6ddea13901a9f718af9108638f4755327442e14
Author: Michael Blocksome <blocksom at us.ibm.com>
Date: Thu Jun 5 23:10:15 2014 -0500
pamid: remove blocking shmem mutex; remove shmem CAS/FOP optimizations
The blocking pthread mutex in the shared memory window causes a deadlock
on bgq - perhaps because the messaging state is not advanced while the
thread is waiting for the mutex to be released.
Removing this blocking mutex resolves the bgq failures:
- rma/strided_putget_indexed_shared
- rma/strided_getacc_indexed_shared
With the calls to the blocking mutex removed the CAS and FOP functions
are not atomic. Solution is to remove the shared memory optimization and
instead use the common (network) code path.
Removing these shared memory optimizations from CAS/FOP resolves the
bgq hangs:
- rma/mutex_bench_shared
diff --git a/src/mpid/pamid/src/onesided/mpid_win_accumulate.c b/src/mpid/pamid/src/onesided/mpid_win_accumulate.c
index 47fe880..26ed220 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_accumulate.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_accumulate.c
@@ -298,8 +298,6 @@ MPID_Accumulate(const void *origin_addr,
int len, one;
++win->mpid.sync.total;
- MPIDI_SHM_MUTEX_LOCK(win);
- shm_locked = 1;
base = win->mpid.info[target_rank].base_addr;
disp_unit = win->mpid.info[target_rank].disp_unit;
dest_addr = (char *) base + disp_unit * target_disp;
@@ -311,10 +309,6 @@ MPID_Accumulate(const void *origin_addr,
(*uop)((void *) origin_addr, dest_addr, &one, &origin_datatype);
- if (shm_locked) {
- MPIDI_SHM_MUTEX_UNLOCK(win);
- shm_locked = 0;
- }
MPIU_Free(req);
++win->mpid.sync.complete;
diff --git a/src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c b/src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c
index e75c921..d31b4be 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c
@@ -109,46 +109,6 @@ int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
}
req->target.rank = target_rank;
-
- if (target_rank == win->comm_ptr->rank || win->create_flavor == MPI_WIN_FLAVOR_SHARED)
- {
- void *base, *dest_addr;
- int disp_unit;
- int len;
-
- ++win->mpid.sync.total;
- if (win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
- MPIDI_SHM_MUTEX_LOCK(win);
- shm_locked = 1;
-
- base = win->mpid.info[target_rank].base_addr;
- disp_unit = win->disp_unit;
- }
- else if (win->create_flavor == MPI_WIN_FLAVOR_DYNAMIC) {
- base = NULL;
- disp_unit = win->disp_unit;
- }
- else {
- base = win->mpid.info[target_rank].base_addr;
- disp_unit = win->disp_unit;
- }
-
- dest_addr = (char *) base + disp_unit * target_disp;
-
- MPID_Datatype_get_size_macro(datatype, len);
- MPIU_Memcpy(result_addr, dest_addr, len);
-
- if (MPIR_Compare_equal(compare_addr, dest_addr, datatype))
- MPIU_Memcpy(dest_addr, origin_addr, len);
-
- if (shm_locked) {
- MPIDI_SHM_MUTEX_UNLOCK(win);
- shm_locked = 0;
- }
- MPIU_Free(req);
- ++win->mpid.sync.complete;
- }
- else {
req->buffer = (void *) ((uintptr_t) origin_addr + req->origin.dt.true_lb);
req->user_buffer = result_addr + req->origin.dt.true_lb;
req->compare_buffer = (void *) ((uintptr_t) compare_addr + req->origin.dt.true_lb);
@@ -198,7 +158,6 @@ int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
{
PAMI_Context_post(MPIDI_Context[0], &req->post_request, MPIDI_Atomic, req);
}
- }
fn_fail:
return mpi_errno;
diff --git a/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c b/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
index 6343484..749165c 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
@@ -289,51 +289,6 @@ int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
req->target.rank = target_rank;
-
- if (target_rank == win->comm_ptr->rank || win->create_flavor == MPI_WIN_FLAVOR_SHARED)
- {
- MPI_User_function *uop;
- void *base, *dest_addr;
- int disp_unit;
- int len, one;
-
- ++win->mpid.sync.total;
- if (win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
- MPIDI_SHM_MUTEX_LOCK(win);
- shm_locked = 1;
- base = win->mpid.info[target_rank].base_addr;
- disp_unit = win->mpid.info[target_rank].disp_unit;
-
- }
- else if (win->create_flavor == MPI_WIN_FLAVOR_DYNAMIC) {
- base = NULL;
- disp_unit = win->disp_unit;
- }
- else {
- base = win->mpid.info[target_rank].base_addr;
- disp_unit = win->mpid.info[target_rank].disp_unit;
- }
-
- dest_addr = (char *) base + disp_unit * target_disp;
-
- MPID_Datatype_get_size_macro(datatype, len);
- MPIU_Memcpy(result_addr, dest_addr, len);
-
- uop = MPIR_OP_HDL_TO_FN(op);
- one = 1;
-
- (*uop)((void *) origin_addr, dest_addr, &one, &datatype);
-
- if (shm_locked) {
- MPIDI_SHM_MUTEX_UNLOCK(win);
- shm_locked = 0;
- }
-
- MPIU_Free(req);
-
- ++win->mpid.sync.complete;
- }
- else {
req->compare_buffer = NULL;
req->pami_op = pami_op;
req->op = op;
@@ -376,7 +331,6 @@ int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
PAMI_Context_post(MPIDI_Context[0], &req->post_request, MPIDI_Atomic, req);
}
- }
fn_fail:
return mpi_errno;
diff --git a/src/mpid/pamid/src/onesided/mpid_win_get.c b/src/mpid/pamid/src/onesided/mpid_win_get.c
index 7b83d4a..872f62f 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_get.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_get.c
@@ -267,10 +267,6 @@ MPID_Get(void *origin_addr,
else
target_addr = win->mpid.info[target_rank].base_addr + req->offset;
- if (win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
- MPIDI_SHM_MUTEX_LOCK(win);
- shm_locked=1;
- }
/* The operation is not complete until the local copy is performed */
mpi_errno = MPIR_Localcopy(target_addr,
@@ -279,10 +275,6 @@ MPID_Get(void *origin_addr,
origin_addr,
origin_count,
origin_datatype);
- if (shm_locked) {
- MPIDI_SHM_MUTEX_UNLOCK(win);
- shm_locked=0;
- }
/* The instant this completion counter is set to zero another thread
* may notice the change and begin freeing request resources. The
diff --git a/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c b/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c
index 52855bd..f57eddb 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c
@@ -40,12 +40,8 @@ MPIDI_Fetch_data_op(const void * origin_addr,
int disp_unit;
int len, one;
- if (win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
- MPIDI_SHM_MUTEX_LOCK(win);
- shm_locked = 1;
base = win->mpid.info[target_rank].base_addr;
disp_unit = win->mpid.info[target_rank].disp_unit;
- }
dest_addr = (char *) base + disp_unit * target_disp;
MPID_Datatype_get_size_macro(origin_datatype, len);
@@ -56,10 +52,6 @@ MPIDI_Fetch_data_op(const void * origin_addr,
(*uop)((void *) origin_addr, dest_addr, &one, &origin_datatype);
}
- if (shm_locked) {
- MPIDI_SHM_MUTEX_UNLOCK(win);
- shm_locked = 0;
- }
fn_fail: return;
}
http://git.mpich.org/mpich.git/commitdiff/66c07f53bd389af488adcf421f28a7d9e6660ce2
commit 66c07f53bd389af488adcf421f28a7d9e6660ce2
Author: Michael Blocksome <blocksom at us.ibm.com>
Date: Thu Jun 5 15:58:05 2014 -0500
pamid: set base addr to NULL if size is zero in win_allocate_shared
diff --git a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
index e94eb48..cfd08a7 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
@@ -365,14 +365,14 @@ MPID_getSharedSegment(MPI_Aint size,
/* allocate a temporary buffer to gather the 'size' of each buffer on
* the node to determine the amount of shared memory to allocate
*/
- MPI_Aint *tmp_buf;
- tmp_buf = MPIU_Malloc (2*comm_size*sizeof(MPI_Aint));
- tmp_buf[rank] = (MPI_Aint) size;
+ MPI_Aint * size_array;
+ size_array = MPIU_Malloc (2*comm_size*sizeof(MPI_Aint));
+ size_array[rank] = (MPI_Aint) size;
mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
- tmp_buf, 1 * sizeof(MPI_Aint), MPI_BYTE,
+ size_array, 1 * sizeof(MPI_Aint), MPI_BYTE,
(*win_ptr)->comm_ptr, &errflag);
if (mpi_errno) {
- MPIU_Free(tmp_buf);
+ MPIU_Free(size_array);
MPIU_ERR_POP(mpi_errno);
}
@@ -380,9 +380,9 @@ MPID_getSharedSegment(MPI_Aint size,
MPI_Aint actual_size;
win->mpid.info[0].base_addr = NULL;
for (i = 0; i < comm_size; ++i) {
- win->mpid.info[i].base_size = tmp_buf[i];
+ win->mpid.info[i].base_size = size_array[i];
- actual_size = (*noncontig)?MPIDI_ROUND_UP_PAGESIZE(tmp_buf[i],pageSize):tmp_buf[i];
+ actual_size = (*noncontig)?MPIDI_ROUND_UP_PAGESIZE(size_array[i],pageSize):size_array[i];
win->mpid.shm->segment_len += actual_size;
@@ -394,7 +394,6 @@ MPID_getSharedSegment(MPI_Aint size,
win->mpid.info[i+1].base_addr =
(void *) ((uintptr_t)win->mpid.info[i].base_addr + actual_size);
}
- MPIU_Free(tmp_buf);
/* The beginning of the shared memory allocation contains a control
* block before the data begins.
@@ -421,6 +420,12 @@ MPID_getSharedSegment(MPI_Aint size,
(void *) ((uintptr_t)win->mpid.info[i].base_addr + (uintptr_t)win->base);
}
+ for (i = 0; i < comm_size; ++i) {
+ if (size_array[i] == 0) win->mpid.info[i].base_addr = NULL;
+ }
+
+ MPIU_Free(size_array);
+
/* increment the shared counter */
OPA_fetch_and_add_int((OPA_int_t *) &win->mpid.shm->ctrl->shm_count,(int) 1);
http://git.mpich.org/mpich.git/commitdiff/cd168292eed8ee98fd15554d6109fda6f860d150
commit cd168292eed8ee98fd15554d6109fda6f860d150
Author: Michael Blocksome <blocksom at us.ibm.com>
Date: Thu Jun 5 12:09:19 2014 -0500
pamid: create memregions at an offset of the shared memory allocation
For MPI_Win_allocate_shared() the memory regions that are exchanged all
pointed to the begining of the shared memory allocation instead of at
the portion of the shared memory allocated to each process. When
MPI_Put(), for example, issues a PAMI_Rput() it uses an offset of zero
to specify the begining of the peer's window. As the exchanged memory
regions for each peer rank were not created at the start of each peer
rank's allocation, data corruption and other bad things occur.
diff --git a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
index 0a63e9f..e94eb48 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
@@ -377,14 +377,22 @@ MPID_getSharedSegment(MPI_Aint size,
}
/* calculate total number of bytes needed */
+ MPI_Aint actual_size;
+ win->mpid.info[0].base_addr = NULL;
for (i = 0; i < comm_size; ++i) {
win->mpid.info[i].base_size = tmp_buf[i];
- len = tmp_buf[i];
- if (*noncontig)
- /* Round up to next page size */
- win->mpid.shm->segment_len += MPIDI_ROUND_UP_PAGESIZE(len,pageSize);
- else
- win->mpid.shm->segment_len += len;
+
+ actual_size = (*noncontig)?MPIDI_ROUND_UP_PAGESIZE(tmp_buf[i],pageSize):tmp_buf[i];
+
+ win->mpid.shm->segment_len += actual_size;
+
+ /* Save the OFFSET to each rank's private shared memory area. This
+ * will be added to the BASE ADDRESS of the entire shared memory
+ * allocation to determine the virtual address.
+ */
+ if (i < comm_size-1)
+ win->mpid.info[i+1].base_addr =
+ (void *) ((uintptr_t)win->mpid.info[i].base_addr + actual_size);
}
MPIU_Free(tmp_buf);
@@ -405,14 +413,19 @@ MPID_getSharedSegment(MPI_Aint size,
#endif
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ /* compute the base addresses of each process within the shared memory segment */
+ win->base = (void *) ((long) win->mpid.shm->base_addr + (long ) MPIDI_ROUND_UP_PAGESIZE((sizeof(MPIDI_Win_shm_ctrl_t) + ((comm_size+1) * sizeof(void *))),pageSize));
+
+ for (i = 0; i < comm_size; ++i) {
+ win->mpid.info[i].base_addr =
+ (void *) ((uintptr_t)win->mpid.info[i].base_addr + (uintptr_t)win->base);
+ }
+
/* increment the shared counter */
OPA_fetch_and_add_int((OPA_int_t *) &win->mpid.shm->ctrl->shm_count,(int) 1);
/* wait for all ranks complete */
while((int) win->mpid.shm->ctrl->shm_count != comm_size) MPIDI_QUICKSLEEP;
-
- /* compute the base addresses of each process within the shared memory segment */
- win->base = (void *) ((long) win->mpid.shm->base_addr + (long ) MPIDI_ROUND_UP_PAGESIZE((sizeof(MPIDI_Win_shm_ctrl_t) + ((comm_size+1) * sizeof(void *))),pageSize));
}
fn_exit:
@@ -508,26 +521,6 @@ MPID_Win_allocate_shared(MPI_Aint size,
if (mpi_errno != MPI_SUCCESS)
return mpi_errno;
- if (comm_size > 1) {
- char *cur_base = (*win_ptr)->base;
- for (i = 0; i < comm_size; ++i) {
- if (win->mpid.info[i].base_size) {
- if (i == 0)
- win->mpid.info[i].base_addr = (void *) ((MPI_Aint) cur_base);
- else {
- if (noncontig)
- /* Round up to next page size */
- win->mpid.info[i].base_addr =(void *) ((MPI_Aint) cur_base + (MPI_Aint) MPIDI_ROUND_UP_PAGESIZE((win->mpid.info[i-1].base_size),pageSize));
- else
- win->mpid.info[i].base_addr = (void *) ((MPI_Aint) cur_base + (MPI_Aint) (win->mpid.info[i-1].base_size));
- }
- cur_base = win->mpid.info[i].base_addr;
- } else {
- win->mpid.info[i].base_addr = NULL;
- }
- }
- }
-
*(void**) base_ptr = (void *) win->mpid.info[rank].base_addr;
mpi_errno = MPIR_Barrier_impl(comm_ptr, &mpi_errno);
diff --git a/src/mpid/pamid/src/onesided/mpid_win_create.c b/src/mpid/pamid/src/onesided/mpid_win_create.c
index 7be9415..706da3c 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_create.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_create.c
@@ -123,7 +123,7 @@ MPIDI_Win_allgather( MPI_Aint size, MPID_Win **win_ptr )
#ifdef USE_PAMI_RDMA
if (size != 0)
{
- rc = PAMI_Memregion_create(MPIDI_Context[0], win->base, win->size, &length_out, &winfo->memregion);
+ rc = PAMI_Memregion_create(MPIDI_Context[0], win->mpid.info[rank].base_addr, win->mpid.info[rank].base_size, &length_out, &winfo->memregion);
MPIU_ERR_CHKANDJUMP((rc != PAMI_SUCCESS), mpi_errno, MPI_ERR_OTHER, "**nomem");
MPIU_ERR_CHKANDJUMP((win->size < length_out), mpi_errno, MPI_ERR_OTHER, "**nomem");
@@ -131,7 +131,7 @@ MPIDI_Win_allgather( MPI_Aint size, MPID_Win **win_ptr )
#else
if ( (!MPIDI_Process.mp_s_use_pami_get) && (size != 0) )
{
- rc = PAMI_Memregion_create(MPIDI_Context[0], win->base, win->size, &length_out, &winfo->memregion);
+ rc = PAMI_Memregion_create(MPIDI_Context[0], win->mpid.info[rank].base_addr, win->mpid.info[rank].base_size, &length_out, &winfo->memregion);
if(rc == PAMI_SUCCESS)
{
winfo->memregion_used = 1;
-----------------------------------------------------------------------
Summary of changes:
src/mpid/pamid/src/onesided/mpid_win_accumulate.c | 6 --
.../pamid/src/onesided/mpid_win_allocate_shared.c | 70 ++++++++++----------
.../pamid/src/onesided/mpid_win_compare_and_swap.c | 41 ------------
src/mpid/pamid/src/onesided/mpid_win_create.c | 4 +-
.../pamid/src/onesided/mpid_win_fetch_and_op.c | 46 -------------
src/mpid/pamid/src/onesided/mpid_win_get.c | 8 --
.../pamid/src/onesided/mpid_win_get_accumulate.c | 8 --
7 files changed, 36 insertions(+), 147 deletions(-)
hooks/post-receive
--
MPICH primary repository
More information about the commits
mailing list