[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1rc2-147-g4b07ac8
mysql vizuser
noreply at mpich.org
Wed Jan 8 14:28:50 CST 2014
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".
The branch, master has been updated
via 4b07ac8ab4ba5203f228348b05a9a2da71f13396 (commit)
via c03e766f497f91de0f75a37c945a8e356f1d3788 (commit)
via 039c2290692c6fff053fd62ec2a3b5da700c7163 (commit)
via e25dc31f0ba40a35086b60e76afe1069af876b58 (commit)
via 31ac6f01a51883c466255cd8ae80ae32a9d7a6a9 (commit)
via c6d910c783380440d1946ab366e5f2496eaed042 (commit)
from 399e546369c3938ada9cc0b6c3c218f685c7613b (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/4b07ac8ab4ba5203f228348b05a9a2da71f13396
commit 4b07ac8ab4ba5203f228348b05a9a2da71f13396
Author: Sameer Kumar <sameerk at us.ibm.com>
Date: Tue Jan 7 04:29:13 2014 -0600
Bug fix for strided datatypes.
Full fix
Fix for get accumulate that sends contig ack back and then scatters result buffer on the src node.
Remove unused params.
Signed-off-by: Michael Blocksome <blocksom at us.ibm.com>
diff --git a/src/mpid/pamid/src/onesided/mpid_1s.c b/src/mpid/pamid/src/onesided/mpid_1s.c
index 3b89aab..6e64c3f 100644
--- a/src/mpid/pamid/src/onesided/mpid_1s.c
+++ b/src/mpid/pamid/src/onesided/mpid_1s.c
@@ -45,7 +45,9 @@ MPIDI_Win_DoneCB(pami_context_t context,
req->origin.count,
req->origin.datatype);
MPID_assert(mpi_errno == MPI_SUCCESS);
+#ifndef USE_PAMI_RDMA
MPIDI_Win_datatype_unmap(&req->target.dt);
+#endif
MPID_Datatype_release(req->origin.dt.pointer);
MPIU_Free(req->buffer);
MPIU_Free(req->user_buffer);
diff --git a/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c b/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c
index b8779e1..8f8d07c 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c
@@ -38,28 +38,21 @@ MPIDI_Win_GetAccumSendAck(pami_context_t context,
pami_result_t rc = PAMI_SUCCESS;
//Copy from msginfo->addr to a contiguous buffer
- MPIDI_Datatype result_dt;
char *buffer = NULL;
- MPIDI_Win_datatype_basic(msginfo->result_count,
- msginfo->result_datatype,
- &result_dt);
-
- int use_map = 0;
- buffer = MPIU_Malloc(result_dt.size);
- if (result_dt.contig)
- memcpy(buffer, (msginfo->addr + result_dt.true_lb), result_dt.size);
+ buffer = MPIU_Malloc(msginfo->size);
+ MPID_assert(buffer != NULL);
+
+ if (msginfo->num_contig == 1)
+ memcpy(buffer, msginfo->addr, msginfo->size);
else
{
- use_map = 1;
- MPID_assert(buffer != NULL);
-
int mpi_errno = 0;
mpi_errno = MPIR_Localcopy(msginfo->addr,
msginfo->count,
msginfo->type,
buffer,
- result_dt.size,
+ msginfo->size,
MPI_CHAR);
MPID_assert(mpi_errno == MPI_SUCCESS);
}
@@ -69,6 +62,7 @@ MPIDI_Win_GetAccumSendAck(pami_context_t context,
pami_send_t params = {
.send = {
.header = {
+ .iov_base = msginfo,
.iov_len = sizeof(MPIDI_Win_GetAccMsgInfo),
},
.dispatch = MPIDI_Protocols_WinGetAccumAck,
@@ -80,30 +74,14 @@ MPIDI_Win_GetAccumSendAck(pami_context_t context,
},
};
- int index = 0;
- size_t local_offset = 0;
- //Set the map
- MPIDI_Win_datatype_map(&result_dt);
- MPID_assert(result_dt.num_contig == msginfo->result_num_contig);
-
- while (index < result_dt.num_contig) {
- params.send.header.iov_base = msginfo;
- params.send.data.iov_len = result_dt.map[index].DLOOP_VECTOR_LEN;
- params.send.data.iov_base = buffer + local_offset;
-
- rc = PAMI_Send(context, ¶ms);
- MPID_assert(rc == PAMI_SUCCESS);
- local_offset += params.send.data.iov_len;
- ++index;
- }
-
- /** Review PAMI_Send semantics and consider moving the free calls to
- the completion callback*/
- //free msginfo
- MPIU_Free(msginfo);
+ params.send.data.iov_len = msginfo->size;
+ params.send.data.iov_base = buffer;
+
+ rc = PAMI_Send(context, ¶ms);
+ MPID_assert(rc == PAMI_SUCCESS);
- if (use_map && result_dt.map != &result_dt.__map)
- MPIU_Free (result_dt.map);
+ //free msginfo
+ //MPIU_Free(msginfo);
}
void
@@ -158,8 +136,7 @@ MPIDI_Win_GetAccDoneCB(pami_context_t context,
++req->win->mpid.sync.complete;
++req->origin.completed;
- if (req->origin.completed ==
- (req->result_num_contig + req->target.dt.num_contig))
+ if (req->origin.completed == req->target.dt.num_contig + 1)
{
if(req->req_handle)
MPID_cc_set(req->req_handle->cc_ptr, 0);
@@ -178,6 +155,29 @@ MPIDI_Win_GetAccDoneCB(pami_context_t context,
}
void
+MPIDI_Win_GetAccAckDoneCB(pami_context_t context,
+ void * _msginfo,
+ pami_result_t result)
+{
+ MPIDI_Win_GetAccMsgInfo * msginfo =(MPIDI_Win_GetAccMsgInfo *)_msginfo;
+ MPIDI_Win_request *req = (MPIDI_Win_request *) msginfo->request;
+
+ if (req->result_num_contig > 1) {
+ MPIR_Localcopy(req->result.addr,
+ req->result.count,
+ req->result.datatype,
+ msginfo->result_addr,
+ msginfo->size,
+ MPI_CHAR);
+ MPIU_Free(msginfo->result_addr);
+ }
+ MPIU_Free(msginfo);
+
+ MPIDI_Win_GetAccDoneCB(context, req, result);
+}
+
+
+void
MPIDI_WinGetAccumAckCB(pami_context_t context,
void * cookie,
const void * _msginfo,
@@ -189,24 +189,22 @@ MPIDI_WinGetAccumAckCB(pami_context_t context,
{
MPID_assert(recv != NULL);
MPID_assert(sndbuf == NULL);
- MPID_assert(msginfo_size == sizeof(MPIDI_Win_GetAccMsgInfo));
MPID_assert(_msginfo != NULL);
- const MPIDI_Win_GetAccMsgInfo * msginfo =(const MPIDI_Win_GetAccMsgInfo *)_msginfo;
-
- int null=0;
- pami_type_t pami_type;
- pami_data_function pami_op;
- MPI_Op op = msginfo->op;
-
- MPIDI_Datatype_to_pami(msginfo->result_datatype, &pami_type, op, &pami_op, &null);
-
- recv->addr = msginfo->result_addr;
- recv->type = pami_type;
+ MPIDI_Win_GetAccMsgInfo * msginfo =MPIU_Malloc(sizeof(MPIDI_Win_GetAccMsgInfo));
+ *msginfo = *(const MPIDI_Win_GetAccMsgInfo *)_msginfo;
+ MPIDI_Win_request *req = (MPIDI_Win_request *) msginfo->request;
+
+ msginfo->result_addr = NULL;
+ recv->addr = req->result.addr;
+ if (req->result_num_contig > 1)
+ recv->addr = msginfo->result_addr = MPIU_Malloc(msginfo->size);
+
+ recv->type = PAMI_TYPE_BYTE;
recv->offset = 0;
recv->data_fn = PAMI_DATA_COPY;
recv->data_cookie = NULL;
- recv->local_fn = MPIDI_Win_GetAccDoneCB;
- recv->cookie = msginfo->req;
+ recv->local_fn = MPIDI_Win_GetAccAckDoneCB;
+ recv->cookie = msginfo;
}
static pami_result_t
@@ -455,13 +453,15 @@ MPID_Get_accumulate(const void * origin_addr,
MPIDI_Win_datatype_map(&req->target.dt);
- MPIDI_Datatype result_dt;
- MPIDI_Win_datatype_basic(result_count, result_datatype, &result_dt);
- req->result_num_contig = 1;
- if (!result_dt.contig)
- req->result_num_contig =result_dt.pointer->max_contig_blocks*result_count+1;
+ req->result.addr = result_addr;
+ req->result.count = result_count;
+ req->result.datatype = result_datatype;
+ MPIDI_Win_datatype_basic(result_count, result_datatype, &req->result.dt);
+ MPIDI_Win_datatype_map(&req->result.dt);
+ req->result_num_contig = req->result.dt.num_contig;
+
//We wait for #messages depending on target and result_datatype
- win->mpid.sync.total += (req->result_num_contig + req->target.dt.num_contig);
+ win->mpid.sync.total += (1 + req->target.dt.num_contig);
{
MPI_Datatype basic_type = MPI_DATATYPE_NULL;
@@ -478,20 +478,6 @@ MPID_Get_accumulate(const void * origin_addr,
}
MPID_assert(basic_type != MPI_DATATYPE_NULL);
- MPI_Datatype result_basic_type = MPI_DATATYPE_NULL;
- MPID_Datatype_get_basic_type(result_datatype, result_basic_type);
- /* MPID_Datatype_get_basic_type() doesn't handle the struct types */
- if ((result_datatype == MPI_FLOAT_INT) ||
- (result_datatype == MPI_DOUBLE_INT) ||
- (result_datatype == MPI_LONG_INT) ||
- (result_datatype == MPI_SHORT_INT) ||
- (result_datatype == MPI_LONG_DOUBLE_INT))
- {
- MPID_assert(result_basic_type == MPI_DATATYPE_NULL);
- result_basic_type = result_datatype;
- }
- MPID_assert(result_basic_type != MPI_DATATYPE_NULL);
-
unsigned index;
MPIDI_Win_GetAccMsgInfo * headers = MPIU_Calloc0(req->target.dt.num_contig, MPIDI_Win_GetAccMsgInfo);
req->accum_headers = headers;
@@ -505,11 +491,8 @@ MPID_Get_accumulate(const void * origin_addr,
headers[index].count = target_count;
headers[index].counter = index;
headers[index].num_contig = req->target.dt.num_contig;
+ headers[index].size = req->target.dt.size;
headers[index].request = req;
- headers[index].result_addr = result_addr;
- headers[index].result_count = result_count;
- headers[index].result_datatype = result_basic_type;
- headers[index].result_num_contig= req->result_num_contig;
}
}
diff --git a/src/mpid/pamid/src/onesided/mpidi_onesided.h b/src/mpid/pamid/src/onesided/mpidi_onesided.h
index 6969351..180aaad 100644
--- a/src/mpid/pamid/src/onesided/mpidi_onesided.h
+++ b/src/mpid/pamid/src/onesided/mpidi_onesided.h
@@ -199,11 +199,9 @@ typedef struct
int count;
int counter;
int num_contig;
+ int size;
void * request;
void * result_addr;
- int result_count;
- MPI_Datatype result_datatype;
- int result_num_contig;
pami_endpoint_t src_endpoint;
} MPIDI_Win_GetAccMsgInfo;
@@ -245,13 +243,19 @@ typedef struct _mpidi_win_request
MPIDI_Datatype dt;
} target;
+ struct
+ {
+ void *addr;
+ int count;
+ MPI_Datatype datatype;
+ MPIDI_Datatype dt;
+ } result;
+
void *user_buffer;
- void *compare_buffer; /* anchor of compare buffer for compare and swap */
+ void *compare_buffer; /* anchor of compare buffer for compare and swap */
uint32_t buffer_free;
void *buffer;
struct _mpidi_win_request *next;
- void * compare_addr;
- void * result_addr;
MPI_Op op;
int result_num_contig;
http://git.mpich.org/mpich.git/commitdiff/c03e766f497f91de0f75a37c945a8e356f1d3788
commit c03e766f497f91de0f75a37c945a8e356f1d3788
Author: Su Huang <suhuang at us.ibm.com>
Date: Mon Jan 6 15:25:55 2014 -0500
PAMID: fix memory leak problem
Signed-off-by: Michael Blocksome <blocksom at us.ibm.com>
diff --git a/src/mpid/pamid/src/mpid_finalize.c b/src/mpid/pamid/src/mpid_finalize.c
index 2ed8633..82da86f 100644
--- a/src/mpid/pamid/src/mpid_finalize.c
+++ b/src/mpid/pamid/src/mpid_finalize.c
@@ -37,12 +37,19 @@ extern conn_info *_conn_info_list;
void MPIDI_close_pe_extension() {
+ extern MPIDI_printenv_t *mpich_env;
+ extern MPIX_stats_t *mpid_statp;
int rc;
/* PAMI_Extension_open in pami_init */
rc = PAMI_Extension_close (pe_extension);
if (rc != PAMI_SUCCESS) {
TRACE_ERR("ERROR close PAMI_Extension failed rc %d", rc);
}
+ if (mpich_env)
+ MPIU_Free(mpich_env);
+ if (mpid_statp)
+ MPIU_Free(mpid_statp);
+
}
#endif
diff --git a/src/mpid/pamid/src/mpidi_util.c b/src/mpid/pamid/src/mpidi_util.c
index 70f2a60..1c6cf3c 100644
--- a/src/mpid/pamid/src/mpidi_util.c
+++ b/src/mpid/pamid/src/mpidi_util.c
@@ -45,7 +45,7 @@
#define PAMI_ASYNC_EXT_ATTR 2000
#if (MPIDI_PRINTENV || MPIDI_STATISTICS || MPIDI_BANNER)
-MPIDI_printenv_t *mpich_env;
+MPIDI_printenv_t *mpich_env=NULL;
extern char* mp_euilib;
char mp_euidevice[20];
extern pami_extension_t pe_extension;
diff --git a/src/mpid/pamid/src/onesided/mpid_1s.c b/src/mpid/pamid/src/onesided/mpid_1s.c
index dd3b234..3b89aab 100644
--- a/src/mpid/pamid/src/onesided/mpid_1s.c
+++ b/src/mpid/pamid/src/onesided/mpid_1s.c
@@ -54,7 +54,9 @@ MPIDI_Win_DoneCB(pami_context_t context,
}
- if (req->origin.completed == req->target.dt.num_contig)
+ if ((req->origin.completed == req->target.dt.num_contig) ||
+ ((req->type >= MPIDI_WIN_REQUEST_COMPARE_AND_SWAP) &&
+ (req->origin.completed == req->origin.dt.num_contig)))
{
if(req->req_handle)
MPID_cc_set(req->req_handle->cc_ptr, 0);
http://git.mpich.org/mpich.git/commitdiff/039c2290692c6fff053fd62ec2a3b5da700c7163
commit 039c2290692c6fff053fd62ec2a3b5da700c7163
Author: Su Huang <suhuang at us.ibm.com>
Date: Mon Jan 6 14:10:59 2014 -0500
Fix multiple FCNAME declarations error.
Also clean up warnings.
src/mpid/pamid/src/dyntask/mpidi_port.c:53: warning: useless storage class
specifier in empty declaration
Signed-off-by: Michael Blocksome <blocksom at us.ibm.com>
diff --git a/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c b/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c
index d470475..2b4bd86 100644
--- a/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c
+++ b/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c
@@ -112,7 +112,6 @@ int MPID_Comm_spawn_multiple(int count, char *array_of_commands[],
int array_of_errcodes[])
{
int mpi_errno = MPI_SUCCESS;
- static char FCNAME[] = "MPID_Comm_spawn_multiple";
if(mpidi_dynamic_tasking == 0) {
fprintf(stderr, "Received spawn request for non-dynamic jobs\n");
@@ -359,7 +358,10 @@ int MPIDI_Comm_spawn_multiple(int count, char **commands,
static char *parent_port_name = 0; /* Name of parent port if this
process was spawned (and is root
of comm world) or null */
-
+#undef FUNCNAME
+#define FUNCNAME MPIDI_GetParentPort
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIDI_GetParentPort(char ** parent_port)
{
int mpi_errno = MPI_SUCCESS;
diff --git a/src/mpid/pamid/src/dyntask/mpid_port.c b/src/mpid/pamid/src/dyntask/mpid_port.c
index b0415b5..e6f8810 100644
--- a/src/mpid/pamid/src/dyntask/mpid_port.c
+++ b/src/mpid/pamid/src/dyntask/mpid_port.c
@@ -98,7 +98,6 @@ int MPID_Comm_accept(const char * port_name, MPID_Info * info, int root,
MPID_Comm * comm, MPID_Comm ** newcomm_ptr)
{
int mpi_errno = MPI_SUCCESS;
- static char FCNAME[] = "MPID_Comm_accept";
if(mpidi_dynamic_tasking == 0) {
fprintf(stderr, "Dynamic tasking API is called on non-dynamic jobs\n");
@@ -127,7 +126,6 @@ int MPID_Comm_connect(const char * port_name, MPID_Info * info, int root,
MPID_Comm * comm, MPID_Comm ** newcomm_ptr)
{
int mpi_errno=MPI_SUCCESS;
- static char FCNAME[] = "MPID_Comm_connect";
if(mpidi_dynamic_tasking == 0) {
fprintf(stderr, "Dynamic tasking API is called on non-dynamic jobs\n");
diff --git a/src/mpid/pamid/src/dyntask/mpidi_port.c b/src/mpid/pamid/src/dyntask/mpidi_port.c
index 2d4fecf..fcdbee1 100644
--- a/src/mpid/pamid/src/dyntask/mpidi_port.c
+++ b/src/mpid/pamid/src/dyntask/mpidi_port.c
@@ -50,7 +50,6 @@ static int maxAcceptQueueSize = 0;
static int AcceptQueueSize = 0;
pthread_mutex_t rem_connlist_mutex = PTHREAD_MUTEX_INITIALIZER;
-extern struct transactionID;
/* FIXME: If dynamic processes are not supported, this file will contain
no code and some compilers may warn about an "empty translation unit" */
http://git.mpich.org/mpich.git/commitdiff/e25dc31f0ba40a35086b60e76afe1069af876b58
commit e25dc31f0ba40a35086b60e76afe1069af876b58
Author: Su Huang <suhuang at us.ibm.com>
Date: Tue Dec 31 12:44:13 2013 -0500
PAMID: segfault occurred in MPIDI_Win_DoneCB
The segfault occurred while trying to free an already freed request
handle in MPIDI_Win_DoneCB. To fix the problem, the second MPIU_Free(req)
is removed from the routine.
Signed-off-by: Michael Blocksome <blocksom at us.ibm.com>
diff --git a/src/mpid/pamid/src/onesided/mpid_1s.c b/src/mpid/pamid/src/onesided/mpid_1s.c
index b111546..dd3b234 100644
--- a/src/mpid/pamid/src/onesided/mpid_1s.c
+++ b/src/mpid/pamid/src/onesided/mpid_1s.c
@@ -69,9 +69,6 @@ MPIDI_Win_DoneCB(pami_context_t context,
if (!((req->type > MPIDI_WIN_REQUEST_GET_ACCUMULATE) && (req->type <=MPIDI_WIN_REQUEST_RGET_ACCUMULATE)))
MPIU_Free(req);
}
-
- if ( (req->origin.completed == req->origin.dt.num_contig) && ( (req->type == MPIDI_WIN_REQUEST_FETCH_AND_OP) || (req->type == MPIDI_WIN_REQUEST_COMPARE_AND_SWAP) ) )
- MPIU_Free(req);
MPIDI_Progress_signal();
}
http://git.mpich.org/mpich.git/commitdiff/31ac6f01a51883c466255cd8ae80ae32a9d7a6a9
commit 31ac6f01a51883c466255cd8ae80ae32a9d7a6a9
Author: Su Huang <suhuang at us.ibm.com>
Date: Wed Oct 30 16:30:59 2013 -0400
PAMID: fixes for MPI_Win_*
PAMID: fixes for RMA shared related, lock_all and unlock_all functions
The following has been updated:
- MPID_Win_allocated_shared
- MPID_Win_shared_query
- several functions in mpid_win_lock_all.c, mpid_win_lock.c, mpid_win_free.c and
structures in mpidi_onesided.h, mpidi_datatypes.h have been modified for
scalability support for MPID_Win_lock_all() and MPID_Win_unlock_all()
PAMID: fix MPID_Win_allocate_shared, MPID_Win_lock_all/unlock_all and MPID_Win_shared_query
Add changes based on code review feedback from the team:
1) update getPageSize to ensure that the pagesize is obtained from the range of
passed in address.
2) don't call dispatcher if lockQ[index].done == 1
if (!lockQ[index].done)
MPID_PROGRESS_WAIT_WHILE(lockQ[index].done == 0);
3) in mpid_win_shared_query()
replace
MPID_assert(win->create_flavor == MPI_WIN_FLAVOR_SHARED);
by
MPIU_ERR_CHKANDSTMT((win->create_flavor != MPI_WIN_FLAVOR_SHARED), mpi_errno,
MPI_ERR_RMA_FLAVOR, return mpi_errno, "**rmaflavor");
3) some minor fixes.
PAMID: modify MPI_Win_flush_local, MPI_Win_lock etc for better performance
The current implementation for the subject mentioned function is to allocate
two counters for each rank in the window group. The design could cause a scaling
issue. The fix is to update MPI_Win_flush_local etc with two counters per
window approach.
The changes also include the follwoing:
- provides mutex_lock/mutex_unlock for atomic operations in shared window
- fixes some bugs in handling shared window.
- removes the shared segment with IPC_RMID in MPI_Win_free.
PAMID: fixed base address for each rank in a window group
For shared window, the base address for each rank should not be
exchanged among ranks in a window group. Without the fix, the job
will be terminated with segfault.
(ibm) F189033
(ibm) D194640
Signed-off-by: Michael Blocksome <blocksom at us.ibm.com>
diff --git a/src/mpid/pamid/include/mpidi_datatypes.h b/src/mpid/pamid/include/mpidi_datatypes.h
index 2d98dcf..b773ce7 100644
--- a/src/mpid/pamid/include/mpidi_datatypes.h
+++ b/src/mpid/pamid/include/mpidi_datatypes.h
@@ -388,6 +388,7 @@ struct MPIDI_Win_lock
unsigned rank;
MPIDI_LOCK_TYPE_t mtype; /* MPIDI_REQUEST_LOCK or MPIDI_REQUEST_LOCKALL */
int type;
+ void *flagAddr;
};
struct MPIDI_Win_queue
{
@@ -415,12 +416,6 @@ typedef struct MPIDI_Win_info_args {
int alloc_shared_noncontig;
} MPIDI_Win_info_args;
-
-typedef struct {
- int nStarted;
- int nCompleted;
-} RMA_nOps_t;
-
typedef struct workQ_t {
void *msgQ;
int count;
@@ -446,13 +441,18 @@ typedef struct MPIDI_Win_info
uint32_t memregion_used;
} MPIDI_Win_info;
+typedef pthread_mutex_t MPIDI_SHM_MUTEX;
+
typedef struct MPIDI_Win_shm_t
{
int allocated; /* flag: TRUE iff this window has a shared memory
region associated with it */
void *base_addr; /* base address of shared memory region */
MPI_Aint segment_len; /* size of shared memory region */
- uint32_t shm_key; /* shared memory key */
+ uint32_t shm_id; /* shared memory id */
+ int *shm_count;
+ MPIDI_SHM_MUTEX *mutex_lock; /* shared memory windows -- lock for */
+ /* accumulate/atomic operations */
} MPIDI_Win_shm_t;
/**
@@ -465,7 +465,7 @@ struct MPIDI_Win
void ** shm_base_addrs; /* base address shared by all process in comm */
MPIDI_Win_shm_t *shm; /* shared memory info */
workQ_t work;
- RMA_nOps_t *origin;
+ int max_ctrlsends;
struct MPIDI_Win_sync
{
#if 0
diff --git a/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c b/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c
index be71200..d470475 100644
--- a/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c
+++ b/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c
@@ -112,6 +112,7 @@ int MPID_Comm_spawn_multiple(int count, char *array_of_commands[],
int array_of_errcodes[])
{
int mpi_errno = MPI_SUCCESS;
+ static char FCNAME[] = "MPID_Comm_spawn_multiple";
if(mpidi_dynamic_tasking == 0) {
fprintf(stderr, "Received spawn request for non-dynamic jobs\n");
diff --git a/src/mpid/pamid/src/dyntask/mpid_port.c b/src/mpid/pamid/src/dyntask/mpid_port.c
index e6f8810..b0415b5 100644
--- a/src/mpid/pamid/src/dyntask/mpid_port.c
+++ b/src/mpid/pamid/src/dyntask/mpid_port.c
@@ -98,6 +98,7 @@ int MPID_Comm_accept(const char * port_name, MPID_Info * info, int root,
MPID_Comm * comm, MPID_Comm ** newcomm_ptr)
{
int mpi_errno = MPI_SUCCESS;
+ static char FCNAME[] = "MPID_Comm_accept";
if(mpidi_dynamic_tasking == 0) {
fprintf(stderr, "Dynamic tasking API is called on non-dynamic jobs\n");
@@ -126,6 +127,7 @@ int MPID_Comm_connect(const char * port_name, MPID_Info * info, int root,
MPID_Comm * comm, MPID_Comm ** newcomm_ptr)
{
int mpi_errno=MPI_SUCCESS;
+ static char FCNAME[] = "MPID_Comm_connect";
if(mpidi_dynamic_tasking == 0) {
fprintf(stderr, "Dynamic tasking API is called on non-dynamic jobs\n");
diff --git a/src/mpid/pamid/src/onesided/mpid_1s.c b/src/mpid/pamid/src/onesided/mpid_1s.c
index 618f01f..b111546 100644
--- a/src/mpid/pamid/src/onesided/mpid_1s.c
+++ b/src/mpid/pamid/src/onesided/mpid_1s.c
@@ -56,7 +56,6 @@ MPIDI_Win_DoneCB(pami_context_t context,
if (req->origin.completed == req->target.dt.num_contig)
{
- req->win->mpid.origin[target_rank].nCompleted++;
if(req->req_handle)
MPID_cc_set(req->req_handle->cc_ptr, 0);
diff --git a/src/mpid/pamid/src/onesided/mpid_win_accumulate.c b/src/mpid/pamid/src/onesided/mpid_win_accumulate.c
index d6c3224..7792d8f 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_accumulate.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_accumulate.c
@@ -251,7 +251,6 @@ MPID_Accumulate(void *origin_addr,
MPIU_Free(req);
return MPI_SUCCESS;
}
- win->mpid.origin[target_rank].nStarted++;
req->target.rank = target_rank;
diff --git a/src/mpid/pamid/src/onesided/mpid_win_allocate.c b/src/mpid/pamid/src/onesided/mpid_win_allocate.c
index d17ff26..f9ff368 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_allocate.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_allocate.c
@@ -86,7 +86,7 @@ MPID_Win_allocate(MPI_Aint size,
winfo->win = win;
winfo->disp_unit = disp_unit;
- rc= MPIDI_Win_allgather(baseP,size,win_ptr);
+ rc= MPIDI_Win_allgather(size,win_ptr);
if (rc != MPI_SUCCESS)
return rc;
*(void**) base_ptr = (void *) win->base;
diff --git a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
index 97525f5..d0f687b 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
@@ -36,10 +36,12 @@ extern int mpidi_dynamic_tasking;
#define MPIDI_PAGESIZE ((MPI_Aint)pageSize)
#define MPIDI_PAGESIZE_MASK (~(MPIDI_PAGESIZE-1))
#define MPIDI_ROUND_UP_PAGESIZE(x) ((((MPI_Aint)x)+(~MPIDI_PAGESIZE_MASK)) & MPIDI_PAGESIZE_MASK)
+#define ALIGN_BOUNDARY 128 /* Align data structures to cache line */
+#define PAD_SIZE(s) (ALIGN_BOUNDARY - (sizeof(s) & (ALIGN_BOUNDARY-1)))
int CheckRankOnNode(MPID_Comm * comm_ptr,int *onNode ) {
- int rank,comm_size;
+ int rank,comm_size,i;
int mpi_errno=PAMI_SUCCESS;
@@ -47,9 +49,13 @@ int CheckRankOnNode(MPID_Comm * comm_ptr,int *onNode ) {
comm_size = comm_ptr->local_size;
rank = comm_ptr->rank;
- *onNode=0;
- if (comm_ptr->intranode_table[rank] != -1)
- *onNode=1;
+ *onNode=1;
+ for (i=0; i< comm_size; i++) {
+ if (comm_ptr->intranode_table[i] == -1) {
+ *onNode=0;
+ break;
+ }
+ }
if (*onNode== 0) {
MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_CONFLICT,
return mpi_errno, "**rmaconflict");
@@ -109,7 +115,7 @@ int GetPageSize(void *addr, ulong *pageSize)
while(fgets(Line,200,fp)) {
i++;
sscanf(Line,"%s %s %s %s \n",A1,A2,A3,A4);
- if (memcmp(A1,"KernelPageSize",14)==0) {
+ if ((found == 1) && (memcmp(A1,"KernelPageSize",14)==0)) {
j=atoi(A2);
if ((A3[0]=='k') || (A3[0]=='K'))
k=1024;
@@ -118,13 +124,29 @@ int GetPageSize(void *addr, ulong *pageSize)
else if ((A3[0]=='g') || (A3[0]=='G'))
k=0x40000000; /* 1 GB */
else {
- printf("ERROR unrecognized unit A3=%s\n",A3);
+ TRACE_ERR("ERROR unrecognized unit A3=%s\n",A3);
break;
}
*pageSize = (ulong)(j * k);
TRACE_ERR(" addr=%p pageSize=%ld %s(%d)\n", addr,*pageSize,__FILE__,__LINE__);
break;
}
+ if ((strlen(A2) == 4) && ((A2[0]=='r') || (A2[3]=='p'))) {
+ len = strlen(A1);
+ #ifndef REDHAT
+ t1=strtok(A1,search);
+ #else
+ t1=strtok(A1,"-");
+ #endif
+ t2 = A1+strlen(t1)+1;
+ sscanf(t1,"%p \n",&beg);
+ sscanf(t2,"%p \n",&end);
+ if (((ulong) addr >= (ulong)beg) && ((ulong)addr <= (ulong)end)) {
+ found=1;
+ TRACE_ERR("found addr=%p i=%d between beg=%p and end=%p in %s\n",
+ addr,i,beg,end,fileName);
+ }
+ }
}
fclose(fp);
if (*pageSize == 0) {
@@ -135,34 +157,28 @@ int GetPageSize(void *addr, ulong *pageSize)
return 0;
}
-#define MPIDI_PAGESIZE ((MPI_Aint)pageSize)
-#define MPIDI_PAGESIZE_MASK (~(MPIDI_PAGESIZE-1))
-#define MPIDI_ROUND_UP_PAGESIZE(x) ((((MPI_Aint)x)+(~MPIDI_PAGESIZE_MASK)) & MPIDI_PAGESIZE_MASK)
-
-
-
int
-MPID_getSharedSegment(MPI_Aint size,
+MPID_getSharedSegment(MPI_Aint size,
int disp_unit,
- MPID_Info * info,
MPID_Comm * comm_ptr,
- void **base_ptr,
- MPID_Win ** win_ptr)
+ void **base_ptr,
+ MPID_Win **win_ptr,
+ MPI_Aint *pSize,
+ int *noncontig)
{
int mpi_errno = MPI_SUCCESS;
- void **base_pp = (void **) base_ptr;
+ void **base_pp = base_ptr;
int i, k, comm_size, rank;
- int shm_id;
uint32_t shm_key;
int node_rank;
+ int shm_id;
MPI_Aint *node_sizes;
- void * base_addr;
MPI_Aint *tmp_buf;
int errflag = FALSE;
- int noncontig = FALSE;
- MPI_Aint pageSize, len,new_size;
+ MPI_Aint pageSize,pageSize2, len,new_size;
char *cp;
MPID_Win *win;
+ int padSize;
MPIDI_Win_info *winfo;
int shm_flag = IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR;
@@ -171,14 +187,14 @@ MPID_getSharedSegment(MPI_Aint size,
rank = win->comm_ptr->rank;
tmp_buf = MPIU_Malloc( 2*comm_size*sizeof(MPI_Aint));
- mpi_errno=CheckSpaceType(win_ptr,info,&noncontig);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
- GetPageSize((void *) win_ptr, &pageSize);
+ GetPageSize((void *) win_ptr, (ulong *) &pageSize);
+ *pSize = pageSize;
win->mpid.shm->segment_len = 0;
if (comm_size == 1) {
if (size > 0) {
- if (noncontig)
+ if (*noncontig)
new_size = MPIDI_ROUND_UP_PAGESIZE(size);
else
new_size = size;
@@ -196,6 +212,7 @@ MPID_getSharedSegment(MPI_Aint size,
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
win->mpid.shm->segment_len = new_size;
win->mpid.info[rank].base_addr = *base_pp;
+ win->base = *base_pp;
} else {
tmp_buf[rank] = (MPI_Aint) size;
mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
@@ -206,12 +223,13 @@ MPID_getSharedSegment(MPI_Aint size,
/* calculate total number of bytes needed */
for (i = 0; i < comm_size; ++i) {
len = tmp_buf[i];
- if (noncontig)
+ if (*noncontig)
/* Round up to next page size */
win->mpid.shm->segment_len += MPIDI_ROUND_UP_PAGESIZE(len);
else
win->mpid.shm->segment_len += len;
}
+ len = len + 128; /* needed for mutex_lock etc */
/* get shared segment */
shm_key=-1;
@@ -266,7 +284,13 @@ MPID_getSharedSegment(MPI_Aint size,
shm_id = shmget(shm_key, win->mpid.shm->segment_len, shm_flag);
MPIU_ERR_CHKANDJUMP((shm_id == -1), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
win->mpid.shm->base_addr = (void *) shmat(shm_id,0,0);
- MPIU_ERR_CHKANDJUMP((base_addr == NULL), mpi_errno,MPI_ERR_BUFFER, "**bufnull");
+ MPIU_ERR_CHKANDJUMP((win->mpid.shm->base_addr == NULL), mpi_errno,MPI_ERR_BUFFER, "**bufnull");
+ GetPageSize((void *) win->mpid.shm->base_addr, &pageSize2);
+ MPID_assert(pageSize == pageSize2);
+ /* set mutex_lock address and initialize it */
+ win->mpid.shm->mutex_lock = (pthread_mutex_t *) win->mpid.shm->base_addr;
+ win->mpid.shm->shm_count=(int *)((MPI_Aint) win->mpid.shm->mutex_lock + (MPI_Aint) sizeof(pthread_mutex_t));
+ MPIDI_SHM_MUTEX_INIT(win);
win->mpid.shm->allocated = 1;
/* successfully created shm segment */
mpi_errno = MPIR_Bcast_impl((void *) &shm_key, sizeof(int), MPI_CHAR, 0, comm_ptr, &errflag);
@@ -278,35 +302,22 @@ MPID_getSharedSegment(MPI_Aint size,
if (shm_id != -1) { /* shm segment is available */
win->mpid.shm->base_addr = (void *) shmat(shm_id,0,0);
win->mpid.shm->allocated = 1;
- MPIU_ERR_CHKANDJUMP((base_addr == (void *) -1), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
+ MPIU_ERR_CHKANDJUMP((win->mpid.shm->base_addr == (void *) -1), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
} else { /* node leader failed, no need to try here */
MPIU_ERR_CHKANDJUMP((shm_id == -1), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
}
+ win->mpid.shm->mutex_lock = (pthread_mutex_t *) win->mpid.shm->base_addr;
+ win->mpid.shm->shm_count=(int *)((MPI_Aint) win->mpid.shm->mutex_lock + (MPI_Aint) sizeof(pthread_mutex_t));
}
- mpi_errno = MPIR_Barrier_impl(comm_ptr, &errflag);
+ win->mpid.shm->shm_id = shm_id;
+ OPA_fetch_and_add_int((OPA_int_t *) win->mpid.shm->shm_count,1);
+ while(*win->mpid.shm->shm_count != comm_size) MPIDI_QUICKSLEEP; /* wait for all ranks complete shmat */
/* compute the base addresses of each process within the shared memory segment */
{
- win->base = win->mpid.shm->base_addr;
- winfo = &win->mpid.info[rank];
- winfo->win = win;
- winfo->disp_unit = disp_unit;
- win->mpid.info[0].base_addr = win->mpid.shm->base_addr;
- char *cur_base = (*win_ptr)->mpid.shm->base_addr;
- for (i = 1; i < comm_size; ++i) {
- size = tmp_buf[i];
- if (size) {
- if (noncontig)
- /* Round up to next page size */
- win->mpid.info[i].base_addr =(void *) ((MPI_Aint) cur_base + (MPI_Aint) MPIDI_ROUND_UP_PAGESIZE(size));
- else
- win->mpid.info[i].base_addr = (void *) ((MPI_Aint) cur_base + size);
- cur_base = win->mpid.info[i].base_addr;
- } else {
- win->mpid.info[i].base_addr = NULL;
- }
- }
- }
- *base_pp = win->mpid.info[rank].base_addr;
+ padSize=sizeof(pthread_mutex_t) + sizeof(OPA_int_t);
+ win->base = (void *) ((long) win->mpid.shm->base_addr + (long ) PAD_SIZE(padSize));
+ }
+ *base_pp = win->base;
}
fn_exit:
@@ -352,6 +363,11 @@ fn_fail:
* \param[out] win_ptr window object returned by the call (handle)
* \return MPI_SUCCESS, MPI_ERR_ARG, MPI_ERR_COMM, MPI_ERR_INFO. MPI_ERR_OTHER,
* MPI_ERR_SIZE
+ *
+ * win->mpid.shm->base_addr \* return address from shmat *\
+ * win->base \* address for data starts here == win->mpid.shm->base_addr *\
+ * \* + space for mutex_lock and shm_count *\
+ *
*/
int
MPID_Win_allocate_shared(MPI_Aint size,
@@ -365,35 +381,49 @@ MPID_Win_allocate_shared(MPI_Aint size,
void **baseP = base_ptr;
MPIDI_Win_info *winfo;
MPID_Win *win;
- int rank, comm_size;
- int onNode;
+ int rank, comm_size,i;
+ int onNode,noncontig=FALSE;
+ MPI_Aint pageSize=0;
+
mpi_errno =MPIDI_Win_init(size,disp_unit,win_ptr, info, comm_ptr, MPI_WIN_FLAVOR_SHARED, MPI_WIN_UNIFIED);
if (mpi_errno) MPIU_ERR_POP(mpi_errno);
win = *win_ptr;
mpi_errno=CheckRankOnNode(comm_ptr,&onNode);
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
MPIU_ERR_CHKANDJUMP((onNode == 0), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
-
+ mpi_errno=CheckSpaceType(win_ptr,info,&noncontig);
rank = (*win_ptr)->comm_ptr->rank;
comm_size = (*win_ptr)->comm_ptr->local_size;
win->mpid.shm = MPIU_Malloc(sizeof(MPIDI_Win_shm_t));
win->mpid.shm->allocated=0;
MPID_assert(win->mpid.shm != NULL);
- MPID_getSharedSegment(size, disp_unit,info,comm_ptr,baseP, win_ptr);
+ MPID_getSharedSegment(size, disp_unit,comm_ptr,baseP, win_ptr,(ulong *)&pageSize,(int *)&noncontig);
- win->base = *baseP;
winfo = &win->mpid.info[rank];
winfo->win = win;
winfo->disp_unit = disp_unit;
- win->base = (void *) MPIU_PtrToAint(winfo->base_addr);
- winfo->base_addr = win->base;
-
- mpi_errno = MPIDI_Win_allgather(*baseP,size,win_ptr);
+ mpi_errno = MPIDI_Win_allgather(size,win_ptr);
if (mpi_errno != MPI_SUCCESS)
return mpi_errno;
-
- *(void**) base_ptr = (void *) win->base;
+ win->mpid.info[0].base_addr = win->base;
+ if (comm_size > 1) {
+ char *cur_base = (*win_ptr)->base;
+ for (i = 1; i < comm_size; ++i) {
+ if (size) {
+ if (noncontig)
+ /* Round up to next page size */
+ win->mpid.info[i].base_addr =(void *) ((MPI_Aint) cur_base + (MPI_Aint) MPIDI_ROUND_UP_PAGESIZE(size));
+ else
+ win->mpid.info[i].base_addr = (void *) ((MPI_Aint) cur_base + (MPI_Aint) size);
+ cur_base = win->mpid.info[i].base_addr;
+ } else {
+ win->mpid.info[i].base_addr = NULL;
+ }
+ }
+ }
+ *(void**) base_ptr = (void *) win->mpid.info[rank].base_addr;
mpi_errno = MPIR_Barrier_impl(comm_ptr, &mpi_errno);
diff --git a/src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c b/src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c
index 8022bc7..3f7d925 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_compare_and_swap.c
@@ -112,7 +112,6 @@ int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
int disp_unit;
int len;
-#ifdef PENDING_SHM_WIN
if (win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
MPIDI_SHM_MUTEX_LOCK(win);
shm_locked = 1;
@@ -121,12 +120,9 @@ int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
disp_unit = win->disp_unit;
}
else {
-#endif
base = win->base;
disp_unit = win->disp_unit;
-#ifdef PENDING_SHM_WIN
}
-#endif
dest_addr = (char *) base + disp_unit * target_disp;
@@ -136,12 +132,10 @@ int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
if (MPIR_Compare_equal(compare_addr, dest_addr, datatype))
MPIU_Memcpy(dest_addr, origin_addr, len);
-#ifdef PENDING_SHM_WIN
if (shm_locked) {
MPIDI_SHM_MUTEX_UNLOCK(win);
shm_locked = 0;
}
-#endif
MPIU_Free(req);
}
else {
diff --git a/src/mpid/pamid/src/onesided/mpid_win_create.c b/src/mpid/pamid/src/onesided/mpid_win_create.c
index 38f072a..e3d2b9d 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_create.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_create.c
@@ -78,7 +78,6 @@ MPIDI_Win_init( MPI_Aint length,
MPID_assert(mpi_errno == 0);
}
MPID_assert(mpi_errno == 0);
- win->mpid.origin = MPIU_Calloc0(size, RMA_nOps_t);
/* Initialize the info (hint) flags per window */
@@ -106,7 +105,7 @@ MPIDI_Win_init( MPI_Aint length,
/* */
/***************************************************************************/
int
-MPIDI_Win_allgather(void *base, MPI_Aint size, MPID_Win **win_ptr )
+MPIDI_Win_allgather( MPI_Aint size, MPID_Win **win_ptr )
{
int mpi_errno = MPI_SUCCESS;
MPID_Win *win;
@@ -208,7 +207,7 @@ MPID_Win_create(void * base,
winfo->win = win;
winfo->disp_unit = disp_unit;
- rc= MPIDI_Win_allgather(base,size,win_ptr);
+ rc= MPIDI_Win_allgather(size,win_ptr);
if (rc != MPI_SUCCESS)
return rc;
diff --git a/src/mpid/pamid/src/onesided/mpid_win_create_dynamic.c b/src/mpid/pamid/src/onesided/mpid_win_create_dynamic.c
index 5343362..74e10ca 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_create_dynamic.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_create_dynamic.c
@@ -57,7 +57,7 @@ MPID_Win_create_dynamic( MPID_Info * info,
winfo = &win->mpid.info[rank];
winfo->win = win;
- rc= MPIDI_Win_allgather(MPI_BOTTOM,0,win_ptr);
+ rc= MPIDI_Win_allgather(0,win_ptr);
if (rc != MPI_SUCCESS)
return rc;
diff --git a/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c b/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
index 153ef88..9f47b9b 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
@@ -293,7 +293,6 @@ int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
int disp_unit;
int len, one;
-#ifdef PENDING_SHM_WIN
if (win->create_flavor == MPI_WIN_FLAVOR_SHARED) {
MPIDI_SHM_MUTEX_LOCK(win);
shm_locked = 1;
@@ -302,12 +301,9 @@ int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
}
else {
-#endif
base = win->base;
disp_unit = win->disp_unit;
-#ifdef PENDING_SHM_WIN
}
-#endif
dest_addr = (char *) base + disp_unit * target_disp;
@@ -319,12 +315,10 @@ int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
(*uop)((void *) origin_addr, dest_addr, &one, &datatype);
-#ifdef PENDING_SHM_WIN
if (shm_locked) {
MPIDI_SHM_MUTEX_UNLOCK(win);
shm_locked = 0;
}
-#endif
MPIU_Free(req);
diff --git a/src/mpid/pamid/src/onesided/mpid_win_flush.c b/src/mpid/pamid/src/onesided/mpid_win_flush.c
index 4fb3dde..c2e7e29 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_flush.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_flush.c
@@ -52,12 +52,11 @@ MPID_Win_flush(int rank,
MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_SYNC,
return mpi_errno, "**rmasync");
}
- MPID_PROGRESS_WAIT_WHILE(win->mpid.origin[rank].nStarted != win->mpid.origin[rank].nCompleted);
-
sync = &win->mpid.sync;
- win->mpid.origin[rank].nStarted=0;
- win->mpid.origin[rank].nCompleted=0;
-
+ MPID_PROGRESS_WAIT_WHILE(sync->total != sync->complete);
+ sync->total = 0;
+ sync->started = 0;
+ sync->complete = 0;
return mpi_errno;
}
@@ -80,7 +79,6 @@ int
MPID_Win_flush_all(MPID_Win *win)
{
int mpi_errno = MPI_SUCCESS;
- int nTasks,i;
struct MPIDI_Win_sync* sync;
static char FCNAME[] = "MPID_Win_flush_all";
@@ -91,15 +89,11 @@ MPID_Win_flush_all(MPID_Win *win)
return mpi_errno, "**rmasync");
}
- sync = &win->mpid.sync;
- MPID_PROGRESS_WAIT_WHILE(sync->total != sync->complete);
- sync->total = 0;
- sync->started = 0;
- sync->complete = 0;
- for (i = 0; i < MPIR_Comm_size(win->comm_ptr); i++) {
- win->mpid.origin[i].nStarted=0;
- win->mpid.origin[i].nCompleted=0;
- }
+ sync = &win->mpid.sync;
+ MPID_PROGRESS_WAIT_WHILE(sync->total != sync->complete);
+ sync->total = 0;
+ sync->started = 0;
+ sync->complete = 0;
return mpi_errno;
}
@@ -114,6 +108,10 @@ MPID_Win_flush_all(MPID_Win *win)
* the cng process to the target rank on the given window. The user may
* reuse any buffers after this routine returns.
*
+ * It has been determined that the routine uses only counters for each window
+ * and not for each rank because the overhead of tracking each rank could be great
+ * if a window group contains a large number of ranks.
+ *
* \param[in] rank rank of target window
* \param[in] win window object
* \return MPI_SUCCESS, MPI_ERR_OTHER
@@ -123,6 +121,7 @@ int
MPID_Win_flush_local(int rank, MPID_Win *win)
{
int mpi_errno = MPI_SUCCESS;
+ struct MPIDI_Win_sync* sync;
static char FCNAME[] = "MPID_Win_flush_local";
if((win->mpid.sync.origin_epoch_type != MPID_EPOTYPE_LOCK) &&
@@ -131,9 +130,11 @@ MPID_Win_flush_local(int rank, MPID_Win *win)
MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_SYNC,
return mpi_errno, "**rmasync");
}
- MPID_PROGRESS_WAIT_WHILE(win->mpid.origin[rank].nStarted != win->mpid.origin[rank].nCompleted);
- win->mpid.origin[rank].nStarted=0;
- win->mpid.origin[rank].nCompleted=0;
+ sync = &win->mpid.sync;
+ MPID_PROGRESS_WAIT_WHILE(sync->total != sync->complete);
+ sync->total = 0;
+ sync->started = 0;
+ sync->complete = 0;
return mpi_errno;
}
@@ -155,8 +156,8 @@ int
MPID_Win_flush_local_all(MPID_Win *win)
{
int mpi_errno = MPI_SUCCESS;
+ struct MPIDI_Win_sync* sync;
static char FCNAME[] = "MPID_Win_flush";
- int size,i;
if((win->mpid.sync.origin_epoch_type != MPID_EPOTYPE_LOCK) &&
(win->mpid.sync.origin_epoch_type != MPID_EPOTYPE_LOCK_ALL))
@@ -164,14 +165,11 @@ MPID_Win_flush_local_all(MPID_Win *win)
MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_SYNC,
return mpi_errno, "**rmasync");
}
- size = MPIR_Comm_size(win->comm_ptr);
- for (i=0; i < size; i++) {
- MPID_PROGRESS_WAIT_WHILE(win->mpid.origin[i].nStarted != win->mpid.origin[i].nCompleted);
- }
- for (i=0; i < size; i++) {
- win->mpid.origin[i].nStarted=0;
- win->mpid.origin[i].nCompleted=0;
- }
+ sync = &win->mpid.sync;
+ MPID_PROGRESS_WAIT_WHILE(sync->total != sync->complete);
+ sync->total = 0;
+ sync->started = 0;
+ sync->complete = 0;
return mpi_errno;
}
diff --git a/src/mpid/pamid/src/onesided/mpid_win_free.c b/src/mpid/pamid/src/onesided/mpid_win_free.c
index 2ba83d9..c8a8867 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_free.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_free.c
@@ -20,16 +20,34 @@
* \brief ???
*/
#include "mpidi_onesided.h"
+#include <sys/shm.h>
+#include <sys/ipc.h>
+#include <sys/stat.h>
+
int MPIDI_SHM_Win_free(MPID_Win **win_ptr)
{
+ static char FCNAME[] = "MPID_SHM_Win_free";
+ int rc;
int mpi_errno = MPI_SUCCESS;
+
/* Free shared memory region */
/* free shm_base_addrs that's only used for shared memory windows */
- if ((*win_ptr)->mpid.shm->allocated)
- mpi_errno = shmdt((*win_ptr)->base);
+ if ((*win_ptr)->mpid.shm->allocated) {
+ OPA_fetch_and_add_int((OPA_int_t *) (*win_ptr)->mpid.shm->shm_count,-1);
+ while(*(*win_ptr)->mpid.shm->shm_count) MPIDI_QUICKSLEEP;
+ if ((*win_ptr)->comm_ptr->rank == 0) {
+ MPIDI_SHM_MUTEX_DESTROY(*win_ptr);
+ }
+ mpi_errno = shmdt((*win_ptr)->mpid.shm->base_addr);
+ if ((*win_ptr)->comm_ptr->rank == 0) {
+ rc=shmctl((*win_ptr)->mpid.shm->shm_id,IPC_RMID,NULL);
+ MPIU_ERR_CHKANDJUMP((rc == -1), errno,MPI_ERR_RMA_SHARED, "**shmctl");
+ }
+ }
MPIU_Free((*win_ptr)->mpid.shm);
(*win_ptr)->mpid.shm = NULL;
+ fn_fail:
return mpi_errno;
}
@@ -62,10 +80,12 @@ MPID_Win_free(MPID_Win **win_ptr)
mpi_errno = MPIR_Barrier_impl(win->comm_ptr, &errflag);
MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**mpi_bcast");
-
- if (win->create_flavor == MPI_WIN_FLAVOR_SHARED)
+
+ if (win->create_flavor == MPI_WIN_FLAVOR_SHARED)
mpi_errno=MPIDI_SHM_Win_free(win_ptr);
+
+
struct MPIDI_Win_info *winfo = &win->mpid.info[rank];
#ifdef USE_PAMI_RDMA
if (win->size != 0)
@@ -82,12 +102,10 @@ MPID_Win_free(MPID_Win **win_ptr)
MPID_assert(rc == PAMI_SUCCESS);
}
#endif
+
MPIU_Free(win->mpid.info);
- MPIU_Free(win->mpid.origin);
if (win->mpid.work.msgQ)
MPIU_Free(win->mpid.work.msgQ);
- if (win->create_flavor == MPI_WIN_FLAVOR_SHARED)
- MPIU_Free(win->base);
MPIR_Comm_release(win->comm_ptr, 0);
diff --git a/src/mpid/pamid/src/onesided/mpid_win_get.c b/src/mpid/pamid/src/onesided/mpid_win_get.c
index 0d5b6a2..b56e2b2 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_get.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_get.c
@@ -258,13 +258,11 @@ MPID_Get(void *origin_addr,
MPIU_Free(req);
return MPI_SUCCESS;
}
- win->mpid.origin[target_rank].nStarted++;
/* If the get is a local operation, do it here */
if (target_rank == win->comm_ptr->rank)
{
size_t offset = req->offset;
- win->mpid.origin[target_rank].nCompleted++;
if(req->req_handle)
MPID_cc_set(req->req_handle->cc_ptr, 0);
else
diff --git a/src/mpid/pamid/src/onesided/mpid_win_lock.c b/src/mpid/pamid/src/onesided/mpid_win_lock.c
index 259ab75..5871ddc 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_lock.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_lock.c
@@ -91,8 +91,10 @@ MPIDI_WinLockReq_proc(pami_context_t context,
struct MPIDI_Win_lock* lock = MPIU_Calloc0(1, struct MPIDI_Win_lock);
if (info->type == MPIDI_WIN_MSGTYPE_LOCKREQ)
lock->mtype = MPIDI_REQUEST_LOCK;
- else if (info->type == MPIDI_WIN_MSGTYPE_LOCKALLREQ)
+ else if (info->type == MPIDI_WIN_MSGTYPE_LOCKALLREQ) {
lock->mtype = MPIDI_REQUEST_LOCKALL;
+ lock->flagAddr = (void *) info->flagAddr;
+ }
lock->rank = info->rank;
lock->type = info->data.lock.type;
@@ -206,9 +208,10 @@ MPID_Win_unlock(int rank,
}
if (rank == MPI_PROC_NULL) goto fn_exit;
struct MPIDI_Win_sync* sync = &win->mpid.sync;
- MPID_PROGRESS_WAIT_DO_WHILE(win->mpid.origin[rank].nStarted != win->mpid.origin[rank].nCompleted);
- win->mpid.origin[rank].nCompleted=0;
- win->mpid.origin[rank].nStarted=0;
+ MPID_PROGRESS_WAIT_DO_WHILE(sync->total != sync->complete);
+ sync->total = 0;
+ sync->started = 0;
+ sync->complete = 0;
MPIDI_WinLock_info info = {
.done = 0,
diff --git a/src/mpid/pamid/src/onesided/mpid_win_lock_all.c b/src/mpid/pamid/src/onesided/mpid_win_lock_all.c
index 74248e4..5fdf67f 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_lock_all.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_lock_all.c
@@ -29,6 +29,7 @@ MPIDI_WinLockAllReq_post(pami_context_t context,
MPIDI_WinLock_info* info = (MPIDI_WinLock_info*)_info;
MPIDI_Win_control_t msg = {
.type = MPIDI_WIN_MSGTYPE_LOCKALLREQ,
+ .flagAddr = info,
.data = {
.lock = {
.type = info->lock_type,
@@ -42,19 +43,17 @@ MPIDI_WinLockAllReq_post(pami_context_t context,
static pami_result_t
-MPIDI_WinUnlockAll_post(pami_context_t context,
+MPIDI_WinUnlockAllReq_post(pami_context_t context,
void * _info)
{
MPID_Win *win;
MPIDI_WinLock_info* info = (MPIDI_WinLock_info*)_info;
MPIDI_Win_control_t msg = {
.type = MPIDI_WIN_MSGTYPE_UNLOCKALL,
+ .flagAddr = info,
};
win=info->win;
MPIDI_WinCtrlSend(context, &msg, info->peer, info->win);
- /* current request is the last one in the group */
- if (win->mpid.sync.lock.remote.allLocked == 1)
- info->done = 1;
return PAMI_SUCCESS;
}
@@ -88,6 +87,8 @@ MPID_Win_lock_all(int assert,
int mpi_errno = MPI_SUCCESS;
int i,size;
MPIDI_WinLock_info *lockQ;
+ char *cp=NULL;
+ int nMask,index;
struct MPIDI_Win_sync_lock* slock = &win->mpid.sync.lock;
static char FCNAME[] = "MPID_Win_lock_all";
@@ -97,19 +98,47 @@ MPID_Win_lock_all(int assert,
return mpi_errno, "**rmasync");
}
size = (MPIR_Comm_size(win->comm_ptr));
+ win->mpid.max_ctrlsends = MAX_NUM_CTRLSEND;
+ nMask= win->mpid.max_ctrlsends - 1;
+ if (cp=getenv("MP_MAX_NUM_CTRLSEND")) {
+ win->mpid.max_ctrlsends = atoi(cp);
+ }
+ nMask=(win->mpid.max_ctrlsends - 1);
if (!win->mpid.work.msgQ) {
- win->mpid.work.msgQ = (void *) MPIU_Calloc0(size, MPIDI_WinLock_info);
+ if (size < (win->mpid.max_ctrlsends)) {
+ win->mpid.work.msgQ = (void *) MPIU_Calloc0(size, MPIDI_WinLock_info);
+ } else {
+ win->mpid.work.msgQ = (void *) MPIU_Calloc0((win->mpid.max_ctrlsends), MPIDI_WinLock_info);
+ }
MPID_assert(win->mpid.work.msgQ != NULL);
- win->mpid.work.count=0;
+ win->mpid.work.count=0;
}
lockQ = (MPIDI_WinLock_info *) win->mpid.work.msgQ;
- for (i = 0; i < size; i++) {
- lockQ[i].done=0;
- lockQ[i].peer=i;
- lockQ[i].win=win;
- lockQ[i].lock_type=MPI_LOCK_SHARED;
- MPIDI_Context_post(MPIDI_Context[0], &lockQ[i].work, MPIDI_WinLockAllReq_post, &lockQ[i]);
- }
+ if (size < win->mpid.max_ctrlsends) {
+ for (i = 0; i < size; i++) {
+ lockQ[i].done=0;
+ lockQ[i].peer=i;
+ lockQ[i].win=win;
+ lockQ[i].lock_type=MPI_LOCK_SHARED;
+ MPIDI_Context_post(MPIDI_Context[0], &lockQ[i].work, MPIDI_WinLockAllReq_post, &lockQ[i]);
+ }
+ } else {
+ for (i = 0; i < size; i++) {
+ if (i < win->mpid.max_ctrlsends)
+ index=i;
+ else {
+ index = i & nMask;
+ if (!lockQ[index].done) {
+ MPID_PROGRESS_WAIT_WHILE(lockQ[index].done == 0);
+ }
+ }
+ lockQ[index].done=0;
+ lockQ[index].peer=i;
+ lockQ[index].win=win;
+ lockQ[index].lock_type=MPI_LOCK_SHARED;
+ MPIDI_Context_post(MPIDI_Context[0], &lockQ[index].work, MPIDI_WinLockAllReq_post, &lockQ[index]);
+ }
+ }
/* wait for the lock is granted for all tasks in the window */
MPID_PROGRESS_WAIT_WHILE(size != slock->remote.allLocked);
@@ -123,8 +152,9 @@ int
MPID_Win_unlock_all(MPID_Win *win)
{
int mpi_errno = MPI_SUCCESS;
- int i;
+ int i,size;
MPIDI_WinLock_info *lockQ;
+ int nMask,index;
struct MPIDI_Win_sync* sync;
static char FCNAME[] = "MPID_Win_unlock_all";
@@ -138,17 +168,32 @@ MPID_Win_unlock_all(MPID_Win *win)
sync->total = 0;
sync->started = 0;
sync->complete = 0;
- for (i = 0; i < MPIR_Comm_size(win->comm_ptr); i++) {
- win->mpid.origin[i].nStarted=0;
- win->mpid.origin[i].nCompleted=0;
- }
MPID_assert(win->mpid.work.msgQ != NULL);
lockQ = (MPIDI_WinLock_info *) win->mpid.work.msgQ;
- for (i = 0; i < MPIR_Comm_size(win->comm_ptr); i++) {
- lockQ[i].done=0;
- lockQ[i].peer=i;
- lockQ[i].win=win;
- MPIDI_Context_post(MPIDI_Context[0], &lockQ[i].work, MPIDI_WinUnlockAll_post, &lockQ[i]);
+ size = MPIR_Comm_size(win->comm_ptr);
+ nMask = (win->mpid.max_ctrlsends - 1);
+ if (size < win->mpid.max_ctrlsends) {
+ for (i = 0; i < size; i++) {
+ lockQ[i].done=0;
+ lockQ[i].peer=i;
+ lockQ[i].win=win;
+ MPIDI_Context_post(MPIDI_Context[0], &lockQ[i].work, MPIDI_WinUnlockAllReq_post, &lockQ[i]);
+ }
+ } else {
+ for (i = 0; i < size; i++) {
+ if (i < win->mpid.max_ctrlsends)
+ index=i;
+ else {
+ index = (i & nMask);
+ if (!lockQ[index].done) {
+ MPID_PROGRESS_WAIT_WHILE(lockQ[index].done == 0);
+ }
+ }
+ lockQ[index].done=0;
+ lockQ[index].peer=i;
+ lockQ[index].win=win;
+ MPIDI_Context_post(MPIDI_Context[0], &lockQ[index].work, MPIDI_WinUnlockAllReq_post, &lockQ[index]);
+ }
}
MPID_PROGRESS_WAIT_WHILE(sync->lock.remote.allLocked);
diff --git a/src/mpid/pamid/src/onesided/mpid_win_put.c b/src/mpid/pamid/src/onesided/mpid_win_put.c
index 6e8d38b..67f9d84 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_put.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_put.c
@@ -274,13 +274,11 @@ MPID_Put(void *origin_addr,
return MPI_SUCCESS;
}
- win->mpid.origin[target_rank].nStarted++;
/* If the get is a local operation, do it here */
if (target_rank == win->comm_ptr->rank)
{
size_t offset = req->offset;
- win->mpid.origin[target_rank].nCompleted++;
if(req->req_handle)
MPID_cc_set(req->req_handle->cc_ptr, 0);
else
diff --git a/src/mpid/pamid/src/onesided/mpid_win_shared_query.c b/src/mpid/pamid/src/onesided/mpid_win_shared_query.c
index fb304ee..21e70fd 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_shared_query.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_shared_query.c
@@ -40,8 +40,10 @@ MPID_Win_shared_query(MPID_Win *win, int rank, MPI_Aint *size,
int *disp_unit, void *base_ptr)
{
int mpi_errno = MPI_SUCCESS;
-
- *(void**) base_ptr = (void *) win->base;
+ static char FCNAME[] = "MPID_Win_shared_query";
+ MPIU_ERR_CHKANDSTMT((win->create_flavor != MPI_WIN_FLAVOR_SHARED), mpi_errno,
+ MPI_ERR_RMA_FLAVOR, return mpi_errno, "**rmaflavor");
+ *((void **) base_ptr) = (void *) win->base;
*size = win->size;
*disp_unit = win->disp_unit;
diff --git a/src/mpid/pamid/src/onesided/mpidi_onesided.h b/src/mpid/pamid/src/onesided/mpidi_onesided.h
index 57aabca..6969351 100644
--- a/src/mpid/pamid/src/onesided/mpidi_onesided.h
+++ b/src/mpid/pamid/src/onesided/mpidi_onesided.h
@@ -20,6 +20,7 @@
* \brief ???
*/
#include <mpidimpl.h>
+#include "opa_primitives.h"
#ifndef __src_onesided_mpidi_onesided_h__
#define __src_onesided_mpidi_onesided_h__
@@ -33,6 +34,54 @@ pami_send_immediate_t zero_send_immediate_parms;
pami_recv_t zero_recv_parms;
pami_rmw_t zero_rmw_parms;
+#define MPIDI_QUICKSLEEP usleep(1);
+#define MAX_NUM_CTRLSEND 1024 /* no more than 1024 outstanding control sends */
+
+
+#define MPIDI_SHM_MUTEX_LOCK(win) \
+ do { \
+ pthread_mutex_t *shm_mutex = win->mpid.shm->mutex_lock; \
+ int rval = pthread_mutex_lock(shm_mutex); \
+ MPIU_ERR_CHKANDJUMP1(rval, mpi_errno, MPI_ERR_OTHER, "**pthread_lock", \
+ "**pthread_lock %s", strerror(rval)); \
+ } while (0)
+
+#define MPIDI_SHM_MUTEX_UNLOCK(win) \
+ do { \
+ pthread_mutex_t *shm_mutex = win->mpid.shm->mutex_lock; \
+ int rval = pthread_mutex_unlock(shm_mutex); \
+ MPIU_ERR_CHKANDJUMP1(rval, mpi_errno, MPI_ERR_OTHER, "**pthread_unlock", \
+ "**pthread_unlock %s", strerror(rval)); \
+ } while (0)
+
+#define MPIDI_SHM_MUTEX_INIT(win) \
+ do { \
+ int rval=0; \
+ pthread_mutexattr_t attr; \
+ pthread_mutex_t *shm_mutex = win->mpid.shm->mutex_lock; \
+ \
+ rval = pthread_mutexattr_init(&attr); \
+ MPIU_ERR_CHKANDJUMP1(rval, mpi_errno, MPI_ERR_OTHER, "**pthread_mutex", \
+ "**pthread_mutex %s", strerror(rval)); \
+ rval = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); \
+ MPIU_ERR_CHKANDJUMP1(rval, mpi_errno, MPI_ERR_OTHER, "**pthread_mutex", \
+ "**pthread_mutex %s", strerror(rval)); \
+ rval = pthread_mutex_init(shm_mutex, &attr); \
+ MPIU_ERR_CHKANDJUMP1(rval, mpi_errno, MPI_ERR_OTHER, "**pthread_mutex", \
+ "**pthread_mutex %s", strerror(rval)); \
+ rval = pthread_mutexattr_destroy(&attr); \
+ MPIU_ERR_CHKANDJUMP1(rval, mpi_errno, MPI_ERR_OTHER, "**pthread_mutex", \
+ "**pthread_mutex %s", strerror(rval)); \
+ } while (0);
+
+#define MPIDI_SHM_MUTEX_DESTROY(win) \
+ do { \
+ pthread_mutex_t *shm_mutex = (win)->mpid.shm->mutex_lock; \
+ int rval = pthread_mutex_destroy(shm_mutex); \
+ MPIU_ERR_CHKANDJUMP1(rval, mpi_errno, MPI_ERR_OTHER, "**pthread_mutex", \
+ "**pthread_mutex %s", strerror(rval)); \
+ } while (0);
+
/**
* \brief One-sided Message Types
*/
@@ -75,6 +124,7 @@ typedef struct
MPIDI_Win_msgtype_t type;
MPID_Win *win;
int rank; /* MPI rank */
+ void *flagAddr;
union
{
struct
@@ -104,6 +154,7 @@ typedef struct MPIDI_WinLock_info
unsigned peer;
int lock_type;
struct MPID_Win * win;
+ void *flagAddr;
volatile unsigned done;
pami_work_t work;
} MPIDI_WinLock_info;
@@ -280,8 +331,7 @@ MPIDI_Win_init( MPI_Aint length,
int create_flavor,
int model);
int
-MPIDI_Win_allgather(void *base,
- MPI_Aint size,
+MPIDI_Win_allgather(MPI_Aint size,
MPID_Win **win_ptr);
void
diff --git a/src/mpid/pamid/src/onesided/mpidi_win_control.c b/src/mpid/pamid/src/onesided/mpidi_win_control.c
index 8c537ff..56016be 100644
--- a/src/mpid/pamid/src/onesided/mpidi_win_control.c
+++ b/src/mpid/pamid/src/onesided/mpidi_win_control.c
@@ -29,6 +29,7 @@ MPIDI_WinCtrlSend(pami_context_t context,
MPID_Win *win)
{
pami_task_t taskid;
+ MPIDI_WinLock_info *winLock;
control->win = win->mpid.info[rank].win;
control->rank = win->comm_ptr->rank;
taskid=MPID_VCR_GET_LPID(win->comm_ptr->vcr,rank);
@@ -72,6 +73,10 @@ MPIDI_WinCtrlSend(pami_context_t context,
};
rc = PAMI_Send_immediate(context, ¶ms);
}
+ if ((control->type == MPIDI_WIN_MSGTYPE_LOCKALLREQ) || (control->type == MPIDI_WIN_MSGTYPE_UNLOCKALL)) {
+ winLock = (MPIDI_WinLock_info *) control->flagAddr;
+ winLock->done = 1;
+ }
MPID_assert(rc == PAMI_SUCCESS);
}
http://git.mpich.org/mpich.git/commitdiff/c6d910c783380440d1946ab366e5f2496eaed042
commit c6d910c783380440d1946ab366e5f2496eaed042
Author: Su Huang <suhuang at us.ibm.com>
Date: Wed Oct 16 14:27:49 2013 -0400
PAMID: MPID_Win_allocate_shared and MPID_Win_shared_query support
added MPID_Win_allocate_shared and MPID_Win_shared_query support and
fixes on passing incorrect rank to MPIDI_WinCtrlSend in MPIDI_WinPost_post
and MPIDI_WinComplete_post
minor fixes for 'win allocate shared'
- compiles for bgq
- UNRESOLVED: undefined shm key on bgq, or any other time the required
environment variables are not set
- UNRESOLVED: multiple calls to 'win allocate shared' will use the same
shm key
(ibm) F189033
Signed-off-by: Michael Blocksome <blocksom at us.ibm.com>
diff --git a/src/mpid/pamid/include/mpidi_datatypes.h b/src/mpid/pamid/include/mpidi_datatypes.h
index 203961e..2d98dcf 100644
--- a/src/mpid/pamid/include/mpidi_datatypes.h
+++ b/src/mpid/pamid/include/mpidi_datatypes.h
@@ -445,6 +445,16 @@ typedef struct MPIDI_Win_info
pami_memregion_t memregion; /**< Memory region descriptor for each node */
uint32_t memregion_used;
} MPIDI_Win_info;
+
+typedef struct MPIDI_Win_shm_t
+{
+ int allocated; /* flag: TRUE iff this window has a shared memory
+ region associated with it */
+ void *base_addr; /* base address of shared memory region */
+ MPI_Aint segment_len; /* size of shared memory region */
+ uint32_t shm_key; /* shared memory key */
+} MPIDI_Win_shm_t;
+
/**
* \brief Structure of PAMI extensions to MPID_Win structure
*/
@@ -453,7 +463,8 @@ struct MPIDI_Win
struct MPIDI_Win_info *info; /**< allocated array of collective info */
MPIDI_Win_info_args info_args;
void ** shm_base_addrs; /* base address shared by all process in comm */
- workQ_t work;
+ MPIDI_Win_shm_t *shm; /* shared memory info */
+ workQ_t work;
RMA_nOps_t *origin;
struct MPIDI_Win_sync
{
diff --git a/src/mpid/pamid/src/misc/mpid_unimpl.c b/src/mpid/pamid/src/misc/mpid_unimpl.c
index 44bda50..1009566 100644
--- a/src/mpid/pamid/src/misc/mpid_unimpl.c
+++ b/src/mpid/pamid/src/misc/mpid_unimpl.c
@@ -90,17 +90,3 @@ int MPID_Comm_group_failed(MPID_Comm *comm_ptr, MPID_Group **failed_group_ptr)
MPID_abort();
return 0;
}
-
-int MPID_Win_allocate_shared(MPI_Aint size, int disp_unit, MPID_Info *info_ptr, MPID_Comm *comm_ptr,
- void **base_ptr, MPID_Win **win_ptr)
-{
- MPID_abort();
- return 0;
-}
-
-int MPID_Win_shared_query(MPID_Win *win, int rank, MPI_Aint *size, int *disp_unit,
- void *baseptr)
-{
- MPID_abort();
- return 0;
-}
diff --git a/src/mpid/pamid/src/onesided/Makefile.mk b/src/mpid/pamid/src/onesided/Makefile.mk
index cb526db..9a6cd37 100644
--- a/src/mpid/pamid/src/onesided/Makefile.mk
+++ b/src/mpid/pamid/src/onesided/Makefile.mk
@@ -36,7 +36,9 @@ lib_lib at MPILIBNAME@_la_SOURCES += \
src/mpid/pamid/src/onesided/mpid_win_lock_all.c \
src/mpid/pamid/src/onesided/mpid_win_pscw.c \
src/mpid/pamid/src/onesided/mpid_win_put.c \
+ src/mpid/pamid/src/onesided/mpid_win_shared_query.c \
src/mpid/pamid/src/onesided/mpid_win_create_dynamic.c \
+ src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c \
src/mpid/pamid/src/onesided/mpid_win_flush.c \
src/mpid/pamid/src/onesided/mpid_win_allocate.c \
src/mpid/pamid/src/onesided/mpid_win_sync.c \
diff --git a/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
new file mode 100644
index 0000000..97525f5
--- /dev/null
+++ b/src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
@@ -0,0 +1,409 @@
+/* begin_generated_IBM_copyright_prolog */
+/* */
+/* This is an automatically generated copyright prolog. */
+/* After initializing, DO NOT MODIFY OR MOVE */
+/* --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM */
+/* Blue Gene/Q 5765-PER 5765-PRP */
+/* */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved */
+/* US Government Users Restricted Rights - */
+/* Use, duplication, or disclosure restricted */
+/* by GSA ADP Schedule Contract with IBM Corp. */
+/* */
+/* --------------------------------------------------------------- */
+/* */
+/* end_generated_IBM_copyright_prolog */
+/* (C)Copyright IBM Corp. 2007, 2011 */
+/**
+ * \file src/onesided/mpid_win_allocate_shared.c
+ * \brief
+ */
+#include "mpidi_onesided.h"
+#include <sys/shm.h>
+#include <sys/ipc.h>
+#include <sys/stat.h>
+
+#undef FUNCNAME
+#define FUNCNAME MPID_Win_allocate_shared
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+
+#define SHM_KEY_TAIL 0xfdbc /* If this value is changed, change */
+ /* in PMD (mp_pmd.c) as well. */
+
+extern int mpidi_dynamic_tasking;
+#define MPIDI_PAGESIZE ((MPI_Aint)pageSize)
+#define MPIDI_PAGESIZE_MASK (~(MPIDI_PAGESIZE-1))
+#define MPIDI_ROUND_UP_PAGESIZE(x) ((((MPI_Aint)x)+(~MPIDI_PAGESIZE_MASK)) & MPIDI_PAGESIZE_MASK)
+
+
+int CheckRankOnNode(MPID_Comm * comm_ptr,int *onNode ) {
+ int rank,comm_size;
+ int mpi_errno=PAMI_SUCCESS;
+
+
+ rank=comm_ptr->rank;
+ comm_size = comm_ptr->local_size;
+ rank = comm_ptr->rank;
+
+ *onNode=0;
+ if (comm_ptr->intranode_table[rank] != -1)
+ *onNode=1;
+ if (*onNode== 0) {
+ MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_CONFLICT,
+ return mpi_errno, "**rmaconflict");
+ }
+
+ return mpi_errno;
+}
+
+int CheckSpaceType(MPID_Win **win_ptr, MPID_Info *info,int *noncontig) {
+ int mpi_errno=MPI_SUCCESS;
+ /* Check if we are allowed to allocate space non-contiguously */
+ if (info != NULL) {
+ int alloc_shared_nctg_flag = 0;
+ char alloc_shared_nctg_value[MPI_MAX_INFO_VAL+1];
+ MPIR_Info_get_impl(info, "alloc_shared_noncontig", MPI_MAX_INFO_VAL,
+ alloc_shared_nctg_value, &alloc_shared_nctg_flag);
+ if ((alloc_shared_nctg_flag == 1)) {
+ if (!strncmp(alloc_shared_nctg_value, "true", strlen("true")))
+ (*win_ptr)->mpid.info_args.alloc_shared_noncontig = 1;
+ if (!strncmp(alloc_shared_nctg_value, "false", strlen("false")))
+ (*win_ptr)->mpid.info_args.alloc_shared_noncontig = 0;
+ }
+ }
+ *noncontig = (*win_ptr)->mpid.info_args.alloc_shared_noncontig;
+ return mpi_errno;
+}
+
+int GetPageSize(void *addr, ulong *pageSize)
+{
+ pid_t pid;
+ FILE *fp;
+ char Line[201],A1[50],A2[50];
+ char fileName[100];
+ char A3[50],A4[50];
+ int i=0;
+ char *t1,*t2;
+ int len;
+ #ifndef REDHAT
+ char a='-';
+ char *search = &a;
+ #endif
+ void *beg, *end;
+ int found=0;
+ int ps;
+ int j,k;
+
+ *pageSize = 0;
+ pid = getpid();
+ sprintf(fileName,"/proc/%d/smaps",pid);
+ /* linux-2.6.29 or greater, KernelPageSize in /proc/pid/smaps includes 4K, 16 MB */
+ TRACE_ERR("fileName = %s addr=%p\n",fileName,addr);
+ fp = fopen(fileName,"r");
+ if (fp == NULL) {
+ TRACE_ERR("fileName = %s open failed errno=%d\n",fileName,errno);
+ return errno;
+ }
+ while(fgets(Line,200,fp)) {
+ i++;
+ sscanf(Line,"%s %s %s %s \n",A1,A2,A3,A4);
+ if (memcmp(A1,"KernelPageSize",14)==0) {
+ j=atoi(A2);
+ if ((A3[0]=='k') || (A3[0]=='K'))
+ k=1024;
+ else if ((A3[0]=='m') || (A3[0]=='M'))
+ k=1048576;
+ else if ((A3[0]=='g') || (A3[0]=='G'))
+ k=0x40000000; /* 1 GB */
+ else {
+ printf("ERROR unrecognized unit A3=%s\n",A3);
+ break;
+ }
+ *pageSize = (ulong)(j * k);
+ TRACE_ERR(" addr=%p pageSize=%ld %s(%d)\n", addr,*pageSize,__FILE__,__LINE__);
+ break;
+ }
+ }
+ fclose(fp);
+ if (*pageSize == 0) {
+ ps = getpagesize();
+ *pageSize = (ulong) ps;
+ TRACE_ERR("LinuxPageSize %p not in %s getpagesize=%ld\n", addr,fileName,*pageSize);
+ }
+ return 0;
+}
+
+#define MPIDI_PAGESIZE ((MPI_Aint)pageSize)
+#define MPIDI_PAGESIZE_MASK (~(MPIDI_PAGESIZE-1))
+#define MPIDI_ROUND_UP_PAGESIZE(x) ((((MPI_Aint)x)+(~MPIDI_PAGESIZE_MASK)) & MPIDI_PAGESIZE_MASK)
+
+
+
+int
+MPID_getSharedSegment(MPI_Aint size,
+ int disp_unit,
+ MPID_Info * info,
+ MPID_Comm * comm_ptr,
+ void **base_ptr,
+ MPID_Win ** win_ptr)
+{
+ int mpi_errno = MPI_SUCCESS;
+ void **base_pp = (void **) base_ptr;
+ int i, k, comm_size, rank;
+ int shm_id;
+ uint32_t shm_key;
+ int node_rank;
+ MPI_Aint *node_sizes;
+ void * base_addr;
+ MPI_Aint *tmp_buf;
+ int errflag = FALSE;
+ int noncontig = FALSE;
+ MPI_Aint pageSize, len,new_size;
+ char *cp;
+ MPID_Win *win;
+ MPIDI_Win_info *winfo;
+ int shm_flag = IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR;
+
+ win = *win_ptr;
+ comm_size = win->comm_ptr->local_size;
+ rank = win->comm_ptr->rank;
+ tmp_buf = MPIU_Malloc( 2*comm_size*sizeof(MPI_Aint));
+
+ mpi_errno=CheckSpaceType(win_ptr,info,&noncontig);
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+ GetPageSize((void *) win_ptr, &pageSize);
+ win->mpid.shm->segment_len = 0;
+ if (comm_size == 1) {
+ if (size > 0) {
+ if (noncontig)
+ new_size = MPIDI_ROUND_UP_PAGESIZE(size);
+ else
+ new_size = size;
+ *base_pp = MPIU_Malloc(new_size);
+ #ifndef MPIDI_NO_ASSERT
+ MPID_assert(*base_pp != NULL);
+ #else
+ MPIU_ERR_CHKANDJUMP((*base_pp == NULL), mpi_errno, MPI_ERR_BUFFER, "**bufnull");
+ #endif
+ } else if (size == 0) {
+ *base_pp = NULL;
+ } else {
+ MPIU_ERR_CHKANDSTMT(size >=0 , mpi_errno, MPI_ERR_SIZE,return mpi_errno, "**rmasize");
+ }
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ win->mpid.shm->segment_len = new_size;
+ win->mpid.info[rank].base_addr = *base_pp;
+ } else {
+ tmp_buf[rank] = (MPI_Aint) size;
+ mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
+ tmp_buf, 1 * sizeof(MPI_Aint), MPI_BYTE,
+ (*win_ptr)->comm_ptr, &errflag);
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+ /* calculate total number of bytes needed */
+ for (i = 0; i < comm_size; ++i) {
+ len = tmp_buf[i];
+ if (noncontig)
+ /* Round up to next page size */
+ win->mpid.shm->segment_len += MPIDI_ROUND_UP_PAGESIZE(len);
+ else
+ win->mpid.shm->segment_len += len;
+ }
+ /* get shared segment */
+
+ shm_key=-1;
+ if (rank == 0) {
+ #ifdef DYNAMIC_TASKING
+ /* generate an appropriate key */
+ if (!mpidi_dynamic_tasking) {
+ cp = getenv("MP_I_PMD_PID");
+ if (cp) {
+ shm_key = atoi(cp);
+ shm_key = shm_key & 0x07ffffff;
+ shm_key = shm_key | 0x80000000;
+ } else {
+ cp = getenv("MP_PARTITION");
+ if (cp ) {
+ shm_key = atol(cp);
+ shm_key = (shm_key << 16) + SHM_KEY_TAIL;
+ } else {
+ TRACE_ERR("ERROR MP_PARTITION not set \n");
+ }
+ }
+ } else {
+ cp = getenv("MP_I_KEY_RANGE");
+ if (cp) {
+ sscanf(cp, "0x%x", &shm_key);
+ shm_key = shm_key | 0x80;
+ } else {
+ TRACE_ERR("ERROR MP_I_KEY_RANGE not set \n");
+ }
+ }
+ #else
+ cp = getenv("MP_I_PMD_PID");
+ if (cp) {
+ shm_key = atoi(cp);
+ shm_key = shm_key & 0x07ffffff;
+ shm_key = shm_key | 0x80000000;
+ } else {
+ cp = getenv("MP_PARTITION");
+ if (cp ) {
+ shm_key = atol(cp);
+#ifdef __PE__
+ shm_key = (shm_key << 16) + SHMCC_KEY_TAIL;
+#else
+ shm_key = (shm_key << 16);
+#endif
+ } else {
+ TRACE_ERR("ERROR MP_PARTITION not set \n");
+ }
+ }
+ #endif
+ MPID_assert(shm_key != -1);
+ shm_id = shmget(shm_key, win->mpid.shm->segment_len, shm_flag);
+ MPIU_ERR_CHKANDJUMP((shm_id == -1), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
+ win->mpid.shm->base_addr = (void *) shmat(shm_id,0,0);
+ MPIU_ERR_CHKANDJUMP((base_addr == NULL), mpi_errno,MPI_ERR_BUFFER, "**bufnull");
+ win->mpid.shm->allocated = 1;
+ /* successfully created shm segment */
+ mpi_errno = MPIR_Bcast_impl((void *) &shm_key, sizeof(int), MPI_CHAR, 0, comm_ptr, &errflag);
+ } else { /* task other than task 0 */
+ mpi_errno = MPIR_Bcast_impl((void *) &shm_key, sizeof(int), MPI_CHAR, 0, comm_ptr, &errflag);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
+ MPID_assert(shm_key != -1);
+ shm_id = shmget(shm_key, 0, 0);
+ if (shm_id != -1) { /* shm segment is available */
+ win->mpid.shm->base_addr = (void *) shmat(shm_id,0,0);
+ win->mpid.shm->allocated = 1;
+ MPIU_ERR_CHKANDJUMP((base_addr == (void *) -1), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
+ } else { /* node leader failed, no need to try here */
+ MPIU_ERR_CHKANDJUMP((shm_id == -1), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
+ }
+ }
+ mpi_errno = MPIR_Barrier_impl(comm_ptr, &errflag);
+ /* compute the base addresses of each process within the shared memory segment */
+ {
+ win->base = win->mpid.shm->base_addr;
+ winfo = &win->mpid.info[rank];
+ winfo->win = win;
+ winfo->disp_unit = disp_unit;
+ win->mpid.info[0].base_addr = win->mpid.shm->base_addr;
+ char *cur_base = (*win_ptr)->mpid.shm->base_addr;
+ for (i = 1; i < comm_size; ++i) {
+ size = tmp_buf[i];
+ if (size) {
+ if (noncontig)
+ /* Round up to next page size */
+ win->mpid.info[i].base_addr =(void *) ((MPI_Aint) cur_base + (MPI_Aint) MPIDI_ROUND_UP_PAGESIZE(size));
+ else
+ win->mpid.info[i].base_addr = (void *) ((MPI_Aint) cur_base + size);
+ cur_base = win->mpid.info[i].base_addr;
+ } else {
+ win->mpid.info[i].base_addr = NULL;
+ }
+ }
+ }
+ *base_pp = win->mpid.info[rank].base_addr;
+ }
+
+fn_exit:
+ MPIU_Free(tmp_buf);
+ return mpi_errno;
+ /* --BEGIN ERROR HANDLING-- */
+fn_fail:
+ goto fn_exit;
+ /* --END ERROR HANDLING-- */
+
+}
+
+/**
+ * \brief MPI-PAMI glue for MPI_Win_allocate_shared function
+ *
+ * Create a window object. Allocates a MPID_Win object and initializes it,
+ * then allocates the collective info array, initalizes our entry, and
+ * performs an Allgather to distribute/collect the rest of the array entries.
+ * On each process, it allocates memory of at least size bytes that is shared
+ * among all processes in comm, and returns a pointer to the locally allocated
+ * segment in base_ptr that can be used for load/store accesses on the calling
+ * process. The locally allocated memory can be the target of load/store accessses
+ * by remote processes; the base pointers for other processes can be queried using
+ * the function 'MPI_Win_shared_query'.
+ *
+ * The call also returns a window object that can be used by all processes in comm
+ * to perform RMA operations. The size argument may be different at each process
+ * and size = 0 is valid. It is the user''s responsibility to ensure that the
+ * communicator comm represents a group of processes that can create a shared
+ * memory segment that can be accessed by all processes in the group. The
+ * allocated memory is contiguous across process ranks unless the info key
+ * alloc_shared_noncontig is specified. Contiguous across process ranks means that
+ * the first address in the memory segment of process i is consecutive with the
+ * last address in the memory segment of process i - 1. This may enable the user
+ * to calculate remote address offsets with local information only.
+ *
+ * Input Parameters:
+ * \param[in] size size of window in bytes (nonnegative integer)
+ * \param[in] disp_unit local unit size for displacements, in bytes (positive integer)
+ * \param[in] info info argument (handle))
+ * \param[in] comm_ptr intra-Communicator (handle)
+ * \param[out] base_ptr address of local allocated window segment
+ * \param[out] win_ptr window object returned by the call (handle)
+ * \return MPI_SUCCESS, MPI_ERR_ARG, MPI_ERR_COMM, MPI_ERR_INFO. MPI_ERR_OTHER,
+ * MPI_ERR_SIZE
+ */
+int
+MPID_Win_allocate_shared(MPI_Aint size,
+ int disp_unit,
+ MPID_Info * info,
+ MPID_Comm * comm_ptr,
+ void **base_ptr,
+ MPID_Win ** win_ptr)
+{
+ int mpi_errno = MPI_SUCCESS;
+ void **baseP = base_ptr;
+ MPIDI_Win_info *winfo;
+ MPID_Win *win;
+ int rank, comm_size;
+ int onNode;
+
+
+ mpi_errno =MPIDI_Win_init(size,disp_unit,win_ptr, info, comm_ptr, MPI_WIN_FLAVOR_SHARED, MPI_WIN_UNIFIED);
+ if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+ win = *win_ptr;
+ mpi_errno=CheckRankOnNode(comm_ptr,&onNode);
+ MPIU_ERR_CHKANDJUMP((onNode == 0), mpi_errno, MPI_ERR_RMA_SHARED, "**rmashared");
+
+ rank = (*win_ptr)->comm_ptr->rank;
+ comm_size = (*win_ptr)->comm_ptr->local_size;
+ win->mpid.shm = MPIU_Malloc(sizeof(MPIDI_Win_shm_t));
+ win->mpid.shm->allocated=0;
+ MPID_assert(win->mpid.shm != NULL);
+ MPID_getSharedSegment(size, disp_unit,info,comm_ptr,baseP, win_ptr);
+
+ win->base = *baseP;
+ winfo = &win->mpid.info[rank];
+ winfo->win = win;
+ winfo->disp_unit = disp_unit;
+ win->base = (void *) MPIU_PtrToAint(winfo->base_addr);
+ winfo->base_addr = win->base;
+
+ mpi_errno = MPIDI_Win_allgather(*baseP,size,win_ptr);
+ if (mpi_errno != MPI_SUCCESS)
+ return mpi_errno;
+
+ *(void**) base_ptr = (void *) win->base;
+
+ mpi_errno = MPIR_Barrier_impl(comm_ptr, &mpi_errno);
+
+fn_exit:
+ return mpi_errno;
+ /* --BEGIN ERROR HANDLING-- */
+fn_fail:
+ MPIU_Free(win->mpid.shm);
+ goto fn_exit;
+ /* --END ERROR HANDLING-- */
+
+}
+
diff --git a/src/mpid/pamid/src/onesided/mpid_win_free.c b/src/mpid/pamid/src/onesided/mpid_win_free.c
index 5155ce8..2ba83d9 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_free.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_free.c
@@ -21,6 +21,17 @@
*/
#include "mpidi_onesided.h"
+int MPIDI_SHM_Win_free(MPID_Win **win_ptr)
+{
+ int mpi_errno = MPI_SUCCESS;
+ /* Free shared memory region */
+ /* free shm_base_addrs that's only used for shared memory windows */
+ if ((*win_ptr)->mpid.shm->allocated)
+ mpi_errno = shmdt((*win_ptr)->base);
+ MPIU_Free((*win_ptr)->mpid.shm);
+ (*win_ptr)->mpid.shm = NULL;
+ return mpi_errno;
+}
/**
* \brief MPI-PAMI glue for MPI_Win_free function
@@ -41,6 +52,7 @@ MPID_Win_free(MPID_Win **win_ptr)
MPID_Win *win = *win_ptr;
size_t rank = win->comm_ptr->rank;
+ int errflag = FALSE;
if(win->mpid.sync.origin_epoch_type != win->mpid.sync.target_epoch_type ||
(win->mpid.sync.origin_epoch_type != MPID_EPOTYPE_NONE &&
@@ -48,9 +60,11 @@ MPID_Win_free(MPID_Win **win_ptr)
MPIU_ERR_SETANDSTMT(mpi_errno, MPI_ERR_RMA_SYNC, return mpi_errno, "**rmasync");
}
- mpi_errno = MPIR_Barrier_impl(win->comm_ptr, &mpi_errno);
- if (mpi_errno != MPI_SUCCESS)
- return mpi_errno;
+ mpi_errno = MPIR_Barrier_impl(win->comm_ptr, &errflag);
+ MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**mpi_bcast");
+
+ if (win->create_flavor == MPI_WIN_FLAVOR_SHARED)
+ mpi_errno=MPIDI_SHM_Win_free(win_ptr);
struct MPIDI_Win_info *winfo = &win->mpid.info[rank];
#ifdef USE_PAMI_RDMA
@@ -68,9 +82,12 @@ MPID_Win_free(MPID_Win **win_ptr)
MPID_assert(rc == PAMI_SUCCESS);
}
#endif
-
MPIU_Free(win->mpid.info);
MPIU_Free(win->mpid.origin);
+ if (win->mpid.work.msgQ)
+ MPIU_Free(win->mpid.work.msgQ);
+ if (win->create_flavor == MPI_WIN_FLAVOR_SHARED)
+ MPIU_Free(win->base);
MPIR_Comm_release(win->comm_ptr, 0);
diff --git a/src/mpid/pamid/src/onesided/mpid_win_pscw.c b/src/mpid/pamid/src/onesided/mpid_win_pscw.c
index 1ee4b4a..ff22460 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_pscw.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_pscw.c
@@ -36,7 +36,7 @@ MPIDI_WinPost_post(pami_context_t context,
void * _info)
{
MPIDI_WinPSCW_info * info = (MPIDI_WinPSCW_info*)_info;
- unsigned peer, index;
+ unsigned peer, index, pid,i;
MPID_Group *group = info->win->mpid.sync.pw.group;
MPID_assert(group != NULL);
MPIDI_Win_control_t msg = {
@@ -44,7 +44,13 @@ MPIDI_WinPost_post(pami_context_t context,
};
for (index=0; index < group->size; ++index) {
- peer = group->lrank_to_lpid[index].lrank;
+ pid = group->lrank_to_lpid[index].lpid;
+ for (i=0;i < ((info->win)->comm_ptr->local_size); i++) {
+ if ((info->win)->comm_ptr->local_group->lrank_to_lpid[i].lpid == pid) {
+ peer = ((info->win)->comm_ptr->local_group->lrank_to_lpid[i].lrank);
+ break;
+ }
+ }
MPIDI_WinCtrlSend(context, &msg, peer, info->win);
}
@@ -67,7 +73,7 @@ MPIDI_WinComplete_post(pami_context_t context,
void * _info)
{
MPIDI_WinPSCW_info * info = (MPIDI_WinPSCW_info*)_info;
- unsigned peer, index;
+ unsigned peer, index,pid,i;
MPID_Group *group = info->win->mpid.sync.sc.group;
MPID_assert(group != NULL);
MPIDI_Win_control_t msg = {
@@ -75,8 +81,14 @@ MPIDI_WinComplete_post(pami_context_t context,
};
for (index=0; index < group->size; ++index) {
- peer = group->lrank_to_lpid[index].lrank;
- MPIDI_WinCtrlSend(context, &msg, peer, info->win);
+ pid = group->lrank_to_lpid[index].lpid;
+ for (i=0;i < ((info->win)->comm_ptr->local_size); i++) {
+ if ((info->win)->comm_ptr->local_group->lrank_to_lpid[i].lpid == pid) {
+ peer = ((info->win)->comm_ptr->local_group->lrank_to_lpid[i].lrank);
+ break;
+ }
+ }
+ MPIDI_WinCtrlSend(context, &msg, peer, info->win);
}
info->done = 1;
diff --git a/src/mpid/pamid/src/onesided/mpid_win_shared_query.c b/src/mpid/pamid/src/onesided/mpid_win_shared_query.c
new file mode 100644
index 0000000..fb304ee
--- /dev/null
+++ b/src/mpid/pamid/src/onesided/mpid_win_shared_query.c
@@ -0,0 +1,50 @@
+/* begin_generated_IBM_copyright_prolog */
+/* */
+/* This is an automatically generated copyright prolog. */
+/* After initializing, DO NOT MODIFY OR MOVE */
+/* --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM */
+/* Blue Gene/Q 5765-PER 5765-PRP */
+/* */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved */
+/* US Government Users Restricted Rights - */
+/* Use, duplication, or disclosure restricted */
+/* by GSA ADP Schedule Contract with IBM Corp. */
+/* */
+/* --------------------------------------------------------------- */
+/* */
+/* end_generated_IBM_copyright_prolog */
+/* (C)Copyright IBM Corp. 2007, 2011 */
+/**
+ * \file src/onesided/mpid_win_shared_query.c
+ * \brief queries the process-local address for remote memory segments
+ * created with MPI_Win_allocate_shared.
+ */
+#include "mpidi_onesided.h"
+
+/**
+ * \brief MPI-PAMI glue for MPI_Win_shared_query function
+ *
+ * Query the size and base pointer for a patch of a shared memory window
+ *
+ * \param[in] win shared memory window object
+ * \param[in] rank rank in the group of window win or MPI_PROC_NULL
+ * \param[out] size size of the window segment (non-negative integer)
+ * \param[out] disp_unit local unit size for displacements, in bytes
+ * \param[out] base_ptr address for load/store access to window segment
+ * \return MPI_SUCCESS, MPI_ERR_OTHER, or error returned from
+ */
+
+int
+MPID_Win_shared_query(MPID_Win *win, int rank, MPI_Aint *size,
+ int *disp_unit, void *base_ptr)
+{
+ int mpi_errno = MPI_SUCCESS;
+
+ *(void**) base_ptr = (void *) win->base;
+ *size = win->size;
+ *disp_unit = win->disp_unit;
+
+ return mpi_errno;
+}
+
-----------------------------------------------------------------------
Summary of changes:
src/mpid/pamid/include/mpidi_datatypes.h | 27 +-
.../pamid/src/dyntask/mpid_comm_spawn_multiple.c | 5 +-
src/mpid/pamid/src/dyntask/mpidi_port.c | 1 -
src/mpid/pamid/src/misc/mpid_unimpl.c | 14 -
src/mpid/pamid/src/mpid_finalize.c | 7 +
src/mpid/pamid/src/mpidi_util.c | 2 +-
src/mpid/pamid/src/onesided/Makefile.mk | 2 +
src/mpid/pamid/src/onesided/mpid_1s.c | 10 +-
src/mpid/pamid/src/onesided/mpid_win_accumulate.c | 1 -
src/mpid/pamid/src/onesided/mpid_win_allocate.c | 2 +-
.../pamid/src/onesided/mpid_win_allocate_shared.c | 439 ++++++++++++++++++++
.../pamid/src/onesided/mpid_win_compare_and_swap.c | 6 -
src/mpid/pamid/src/onesided/mpid_win_create.c | 5 +-
.../pamid/src/onesided/mpid_win_create_dynamic.c | 2 +-
.../pamid/src/onesided/mpid_win_fetch_and_op.c | 6 -
src/mpid/pamid/src/onesided/mpid_win_flush.c | 52 ++--
src/mpid/pamid/src/onesided/mpid_win_free.c | 43 ++-
src/mpid/pamid/src/onesided/mpid_win_get.c | 2 -
.../pamid/src/onesided/mpid_win_get_accumulate.c | 135 +++----
src/mpid/pamid/src/onesided/mpid_win_lock.c | 11 +-
src/mpid/pamid/src/onesided/mpid_win_lock_all.c | 91 +++-
src/mpid/pamid/src/onesided/mpid_win_pscw.c | 22 +-
src/mpid/pamid/src/onesided/mpid_win_put.c | 2 -
.../{mpid_win_attach.c => mpid_win_shared_query.c} | 46 +--
src/mpid/pamid/src/onesided/mpidi_onesided.h | 70 +++-
src/mpid/pamid/src/onesided/mpidi_win_control.c | 5 +
26 files changed, 783 insertions(+), 225 deletions(-)
create mode 100644 src/mpid/pamid/src/onesided/mpid_win_allocate_shared.c
copy src/mpid/pamid/src/onesided/{mpid_win_attach.c => mpid_win_shared_query.c} (56%)
hooks/post-receive
--
MPICH primary repository
More information about the commits
mailing list