[mpich-commits] [mpich] MPICH primary repository branch, mpich-master, created. v3.2a2-163-gd9c15cf
Service Account
noreply at mpich.org
Fri Feb 13 14:58:26 CST 2015
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".
The branch, mpich-master has been created
at d9c15cf3a93674c129d94ecb5b2e2b13bc186ff9 (commit)
- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/d9c15cf3a93674c129d94ecb5b2e2b13bc186ff9
commit d9c15cf3a93674c129d94ecb5b2e2b13bc186ff9
Author: Sameh Sharkawi <sssharka at us.ibm.com>
Date: Wed Jan 21 13:30:50 2015 -0500
PAMID: Initial CUDA support
This is an initial limited implementation for CUDA support. This is not
performance optimized and only for testing.
(ibm) D202477
Signed-off-by: Su Huang <suhuang at us.ibm.com>
diff --git a/src/mpid/pamid/include/mpidi_datatypes.h b/src/mpid/pamid/include/mpidi_datatypes.h
index 7c6e566..1324187 100644
--- a/src/mpid/pamid/include/mpidi_datatypes.h
+++ b/src/mpid/pamid/include/mpidi_datatypes.h
@@ -38,6 +38,11 @@
#include "opa_primitives.h"
+#if CUDA_AWARE_SUPPORT
+#include <cuda_runtime_api.h>
+#endif
+
+
#if (MPIU_HANDLE_ALLOCATION_METHOD == MPIU_HANDLE_ALLOCATION_THREAD_LOCAL) && defined(__BGQ__)
struct MPID_Request;
typedef struct
@@ -105,6 +110,11 @@ typedef struct
#if QUEUE_BINARY_SEARCH_SUPPORT
unsigned queue_binary_search_support_on;
#endif
+
+#if CUDA_AWARE_SUPPORT
+ unsigned cuda_aware_support_on;
+#endif
+
unsigned verbose; /**< The current level of verbosity for end-of-job stats. */
unsigned statistics; /**< The current level of stats collection. */
unsigned rma_pending; /**< The max num outstanding requests during an RMA op */
diff --git a/src/mpid/pamid/include/mpidi_platform.h b/src/mpid/pamid/include/mpidi_platform.h
index e5ac1d8..cd95e72 100644
--- a/src/mpid/pamid/include/mpidi_platform.h
+++ b/src/mpid/pamid/include/mpidi_platform.h
@@ -86,6 +86,7 @@ typedef int32_t MPID_Node_id_t;
#define PAMIX_IS_LOCAL_TASK_SHIFT (6)
#define MPIDI_SMP_DETECT_DEFAULT 1
#define TOKEN_FLOW_CONTROL 0
+#define CUDA_AWARE_SUPPORT 0
/*
* Enable both the 'internal vs application' and the 'local vs remote'
@@ -142,6 +143,7 @@ static const char _ibm_release_version_[] = "V1R2M0";
#define MPIDI_NO_ASSERT 1
#define TOKEN_FLOW_CONTROL 1
#define DYNAMIC_TASKING 1
+#define CUDA_AWARE_SUPPORT 1
/* 'is local task' extension and limits */
#define PAMIX_IS_LOCAL_TASK
diff --git a/src/mpid/pamid/include/mpidi_prototypes.h b/src/mpid/pamid/include/mpidi_prototypes.h
index d833637..083699e 100644
--- a/src/mpid/pamid/include/mpidi_prototypes.h
+++ b/src/mpid/pamid/include/mpidi_prototypes.h
@@ -266,6 +266,7 @@ pami_result_t MPIDI_Register_algorithms_ext(void *cookie,
size_t *num_algorithms);
int MPIDI_collsel_pami_tune_parse_params(int argc, char ** argv);
void MPIDI_collsel_pami_tune_cleanup();
+inline bool MPIDI_cuda_is_device_buf(const void* ptr);
void MPIDI_Coll_Comm_create (MPID_Comm *comm);
void MPIDI_Coll_Comm_destroy(MPID_Comm *comm);
void MPIDI_Comm_coll_query (MPID_Comm *comm);
diff --git a/src/mpid/pamid/src/coll/allreduce/mpido_allreduce.c b/src/mpid/pamid/src/coll/allreduce/mpido_allreduce.c
index 5920817..75d2a9c 100644
--- a/src/mpid/pamid/src/coll/allreduce/mpido_allreduce.c
+++ b/src/mpid/pamid/src/coll/allreduce/mpido_allreduce.c
@@ -117,6 +117,22 @@ int MPIDO_Allreduce(const void *sendbuf,
fprintf(stderr,"Using MPICH allreduce type %u.\n",
selected_type);
MPIDI_Update_last_algorithm(comm_ptr, "ALLREDUCE_MPICH");
+#if CUDA_AWARE_SUPPORT
+ if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(sendbuf))
+ {
+ MPI_Aint dt_extent;
+ MPID_Datatype_get_extent_macro(dt, dt_extent);
+ char *buf = MPIU_Malloc(dt_extent * count);
+ cudaError_t cudaerr = cudaMemcpy(buf, sendbuf, dt_extent * count, cudaMemcpyDeviceToHost);
+ if (cudaSuccess != cudaerr) {
+ fprintf(stderr, "cudaMalloc failed: %s\n", cudaGetErrorString(cudaerr));
+ }
+ int cuda_res = MPIR_Allreduce(buf, recvbuf, count, dt, op, comm_ptr, mpierrno);
+ MPIU_Free(buf);
+ return cuda_res;
+ }
+ else
+#endif
return MPIR_Allreduce(sendbuf, recvbuf, count, dt, op, comm_ptr, mpierrno);
}
diff --git a/src/mpid/pamid/src/coll/reduce/mpido_reduce.c b/src/mpid/pamid/src/coll/reduce/mpido_reduce.c
index d3ac54b..2aeab7b 100644
--- a/src/mpid/pamid/src/coll/reduce/mpido_reduce.c
+++ b/src/mpid/pamid/src/coll/reduce/mpido_reduce.c
@@ -117,6 +117,22 @@ int MPIDO_Reduce(const void *sendbuf,
{
if(unlikely(verbose))
fprintf(stderr,"Using MPICH reduce algorithm\n");
+#if CUDA_AWARE_SUPPORT
+ if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(sendbuf))
+ {
+ MPI_Aint dt_extent;
+ MPID_Datatype_get_extent_macro(datatype, dt_extent);
+ char *buf = MPIU_Malloc(dt_extent * count);
+ cudaError_t cudaerr = cudaMemcpy(buf, sendbuf, dt_extent * count, cudaMemcpyDeviceToHost);
+ if (cudaSuccess != cudaerr) {
+ fprintf(stderr, "cudaMalloc failed: %s\n", cudaGetErrorString(cudaerr));
+ }
+ int cuda_res = MPIR_Reduce(buf, recvbuf, count, datatype, op, root, comm_ptr, mpierrno);
+ MPIU_Free(buf);
+ return cuda_res;
+ }
+ else
+#endif
return MPIR_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, mpierrno);
}
diff --git a/src/mpid/pamid/src/coll/scan/mpido_scan.c b/src/mpid/pamid/src/coll/scan/mpido_scan.c
index 6f33c02..c0100c7 100644
--- a/src/mpid/pamid/src/coll/scan/mpido_scan.c
+++ b/src/mpid/pamid/src/coll/scan/mpido_scan.c
@@ -135,6 +135,26 @@ int MPIDO_Doscan(const void *sendbuf, void *recvbuf,
{
if(unlikely(verbose))
fprintf(stderr,"Using MPICH scan algorithm (exflag %d)\n",exflag);
+#if CUDA_AWARE_SUPPORT
+ if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(sendbuf))
+ {
+ MPI_Aint dt_extent;
+ MPID_Datatype_get_extent_macro(datatype, dt_extent);
+ char *buf = MPIU_Malloc(dt_extent * count);
+ cudaError_t cudaerr = cudaMemcpy(buf, sendbuf, dt_extent * count, cudaMemcpyDeviceToHost);
+ if (cudaSuccess != cudaerr) {
+ fprintf(stderr, "cudaMalloc failed: %s\n", cudaGetErrorString(cudaerr));
+ }
+ int cuda_res;
+ if(exflag)
+ cuda_res = MPIR_Exscan(buf, recvbuf, count, datatype, op, comm_ptr, mpierrno);
+ else
+ cuda_res = MPIR_Scan(buf, recvbuf, count, datatype, op, comm_ptr, mpierrno);
+ MPIU_Free(buf);
+ return cuda_res;
+ }
+ else
+#endif
if(exflag)
return MPIR_Exscan(sendbuf, recvbuf, count, datatype, op, comm_ptr, mpierrno);
else
diff --git a/src/mpid/pamid/src/mpid_buffer.c b/src/mpid/pamid/src/mpid_buffer.c
index 884b92c..0e2ad5a 100644
--- a/src/mpid/pamid/src/mpid_buffer.c
+++ b/src/mpid/pamid/src/mpid_buffer.c
@@ -57,10 +57,12 @@ void MPIDI_Buffer_copy(
MPID_Datatype * sdt_ptr;
MPID_Datatype * rdt_ptr;
+ MPI_Aint sdt_extent;
+ MPI_Aint rdt_extent;
+
*smpi_errno = MPI_SUCCESS;
*rmpi_errno = MPI_SUCCESS;
-
/* printf("bufcopy: src count=%d dt=%d\n", scount, sdt); */
/* printf("bufcopy: dst count=%d dt=%d\n", rcount, rdt); */
@@ -83,11 +85,52 @@ void MPIDI_Buffer_copy(
if (sdt_contig && rdt_contig)
{
+#if CUDA_AWARE_SUPPORT
+ if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rbuf))
+ {
+ cudaError_t cudaerr = cudaMemcpy(rbuf + rdt_true_lb, sbuf + sdt_true_lb, sdata_sz, cudaMemcpyHostToDevice);
+ }
+ else
+#endif
memcpy((char*)rbuf + rdt_true_lb, (const char *)sbuf + sdt_true_lb, sdata_sz);
*rsz = sdata_sz;
}
else if (sdt_contig)
{
+#if CUDA_AWARE_SUPPORT
+ // This will need to be done in two steps:
+ // 1 - Allocate a temp buffer which is the same size as user buffer and unpack in it.
+ // 2 - Copy unpacked data into user buffer from temp buffer.
+ if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rbuf))
+ {
+ MPID_Datatype_get_extent_macro(rdt, rdt_extent);
+ char *buf = MPIU_Malloc(rdt_extent * rcount);
+ memset(buf, 0, rdt_extent * rcount);
+ MPID_Segment seg;
+ DLOOP_Offset last;
+
+ MPID_Segment_init(buf, rcount, rdt, &seg, 0);
+ last = sdata_sz;
+ MPID_Segment_unpack(&seg, 0, &last, (char*)sbuf + sdt_true_lb);
+ /* --BEGIN ERROR HANDLING-- */
+ if (last != sdata_sz)
+ {
+ *rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, __FUNCTION__, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0);
+ }
+ /* --END ERROR HANDLING-- */
+
+ *rsz = last;
+
+
+ cudaError_t cudaerr = cudaMemcpy(rbuf + rdt_true_lb, buf, rdt_extent * rcount, cudaMemcpyHostToDevice);
+
+ MPIU_Free(buf);
+
+ goto fn_exit;
+
+ }
+#endif
+
MPID_Segment seg;
DLOOP_Offset last;
diff --git a/src/mpid/pamid/src/mpid_init.c b/src/mpid/pamid/src/mpid_init.c
index 52f0b7c..e768119 100644
--- a/src/mpid/pamid/src/mpid_init.c
+++ b/src/mpid/pamid/src/mpid_init.c
@@ -119,6 +119,9 @@ MPIDI_Process_t MPIDI_Process = {
#ifdef QUEUE_BINARY_SEARCH_SUPPORT
.queue_binary_search_support_on = 0,
#endif
+#if CUDA_AWARE_SUPPORT
+ .cuda_aware_support_on = 0,
+#endif
.rma_pending = 1000,
.shmem_pt2pt = 1,
.smp_detect = MPIDI_SMP_DETECT_DEFAULT,
diff --git a/src/mpid/pamid/src/mpidi_env.c b/src/mpid/pamid/src/mpidi_env.c
index dd1912f..c931858 100644
--- a/src/mpid/pamid/src/mpidi_env.c
+++ b/src/mpid/pamid/src/mpidi_env.c
@@ -1139,6 +1139,12 @@ MPIDI_Env_setup(int rank, int requested)
char* names[] = {"MP_S_USE_QUEUE_BINARY_SEARCH_SUPPORT", NULL};
ENV_Char(names, &MPIDI_Process.queue_binary_search_support_on);
#endif
+
+#if CUDA_AWARE_SUPPORT
+ char* names[] = {"MP_CUDA_AWARE", NULL};
+ ENV_Char(names, &MPIDI_Process.cuda_aware_support_on);
+#endif
+
/* Exit if any deprecated environment variables were specified. */
if (found_deprecated_env_var)
{
diff --git a/src/mpid/pamid/src/mpidi_util.c b/src/mpid/pamid/src/mpidi_util.c
index 774e786..896dd86 100644
--- a/src/mpid/pamid/src/mpidi_util.c
+++ b/src/mpid/pamid/src/mpidi_util.c
@@ -1921,6 +1921,34 @@ void MPIDI_collsel_pami_tune_cleanup()
MPIDI_collsel_free_advisor_params(&MPIDI_Collsel_advisor_params);
}
+
+
+/**********************************************************/
+/* CUDA Utilities */
+/**********************************************************/
+
+inline bool MPIDI_cuda_is_device_buf(const void* ptr)
+{
+ bool result = false;
+#if CUDA_AWARE_SUPPORT
+ struct cudaPointerAttributes cuda_attr;
+ cudaError_t e= cudaPointerGetAttributes ( & cuda_attr, ptr);
+
+ if (e != cudaSuccess)
+ result = false;
+ else if (cuda_attr.memoryType == cudaMemoryTypeDevice)
+ result = true;
+ else
+ result = false;
+#endif
+ return result;
+}
+
+
+/**********************************************************/
+/* End CUDA Utilities */
+/**********************************************************/
+
#if defined(MPID_USE_NODE_IDS)
#undef FUNCNAME
#define FUNCNAME MPID_Get_node_id
diff --git a/src/mpid/pamid/src/pt2pt/mpidi_callback_short.c b/src/mpid/pamid/src/pt2pt/mpidi_callback_short.c
index 705d623..1746c45 100644
--- a/src/mpid/pamid/src/pt2pt/mpidi_callback_short.c
+++ b/src/mpid/pamid/src/pt2pt/mpidi_callback_short.c
@@ -195,7 +195,16 @@ MPIDI_RecvShortCB(pami_context_t context,
void* rcvbuf = rreq->mpid.userbuf;
if (sndlen > 0)
- memcpy(rcvbuf, sndbuf, sndlen);
+ {
+#if CUDA_AWARE_SUPPORT
+ if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rcvbuf))
+ {
+ cudaError_t cudaerr = cudaMemcpy(rcvbuf, sndbuf, (size_t)sndlen, cudaMemcpyHostToDevice);
+ }
+ else
+#endif
+ memcpy(rcvbuf, sndbuf, sndlen);
+ }
TRACE_SET_R_VAL(source,(rreq->mpid.idx),rlen,sndlen);
TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.comp_in_HH);
TRACE_SET_R_VAL(source,(rreq->mpid.idx),bufadd,rreq->mpid.userbuf);
diff --git a/src/mpid/pamid/src/pt2pt/mpidi_callback_util.c b/src/mpid/pamid/src/pt2pt/mpidi_callback_util.c
index fa3b48b..c91e7da 100644
--- a/src/mpid/pamid/src/pt2pt/mpidi_callback_util.c
+++ b/src/mpid/pamid/src/pt2pt/mpidi_callback_util.c
@@ -175,7 +175,13 @@ MPIDI_Callback_process_userdefined_dt(pami_context_t context,
MPID_assert(rreq->mpid.uebuf == NULL);
MPID_assert(rreq->mpid.uebuflen == 0);
void* rcvbuf = rreq->mpid.userbuf + dt_true_lb;;
-
+#if CUDA_AWARE_SUPPORT
+ if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rcvbuf))
+ {
+ cudaError_t cudaerr = cudaMemcpy(rcvbuf, sndbuf, (size_t)sndlen, cudaMemcpyHostToDevice);
+ }
+ else
+#endif
memcpy(rcvbuf, sndbuf, sndlen);
MPIDI_Request_complete(rreq);
return;
diff --git a/src/mpid/pamid/src/pt2pt/mpidi_rendezvous.c b/src/mpid/pamid/src/pt2pt/mpidi_rendezvous.c
index ec4569b..d7f0a0c 100644
--- a/src/mpid/pamid/src/pt2pt/mpidi_rendezvous.c
+++ b/src/mpid/pamid/src/pt2pt/mpidi_rendezvous.c
@@ -107,6 +107,16 @@ MPIDI_RendezvousTransfer(pami_context_t context,
pami_endpoint_t dest;
MPIDI_Context_endpoint(rreq, &dest);
+#if CUDA_AWARE_SUPPORT
+ if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rcvbuf))
+ {
+ MPIDI_RendezvousTransfer_use_pami_get(context,dest,rcvbuf,rreq);
+ }
+ else
+ {
+#endif
+
+
#ifdef USE_PAMI_RDMA
size_t rcvlen_out;
rc = PAMI_Memregion_create(context,
@@ -153,6 +163,10 @@ MPIDI_RendezvousTransfer(pami_context_t context,
}
#endif
+#if CUDA_AWARE_SUPPORT
+ }
+#endif
+
return PAMI_SUCCESS;
}
diff --git a/src/mpid/pamid/src/pt2pt/mpidi_sendmsg.c b/src/mpid/pamid/src/pt2pt/mpidi_sendmsg.c
index 2086e43..65cf773 100644
--- a/src/mpid/pamid/src/pt2pt/mpidi_sendmsg.c
+++ b/src/mpid/pamid/src/pt2pt/mpidi_sendmsg.c
@@ -345,6 +345,26 @@ MPIDI_SendMsg_process_userdefined_dt(MPID_Request * sreq,
*/
else
{
+ char *buf = NULL;
+#if CUDA_AWARE_SUPPORT
+ // This will need to be done in two steps:
+ // 1 - Allocate a temp buffer which is the same size as user buffer and copy in it.
+ // 2 - Pack data into ue buffer from temp buffer.
+ int on_device = MPIDI_cuda_is_device_buf(sreq->mpid.userbuf);
+ if(MPIDI_Process.cuda_aware_support_on && on_device)
+ {
+ MPI_Aint dt_extent;
+ MPID_Datatype_get_extent_macro(sreq->mpid.datatype, dt_extent);
+ buf = MPIU_Malloc(dt_extent * sreq->mpid.userbufcount);
+
+ cudaError_t cudaerr = cudaMemcpy(buf, sreq->mpid.userbuf, dt_extent * sreq->mpid.userbufcount, cudaMemcpyDeviceToHost);
+ if (cudaSuccess != cudaerr) {
+ fprintf(stderr, "cudaMalloc failed: %s\n", cudaGetErrorString(cudaerr));
+ }
+
+ }
+#endif
+
MPID_Segment segment;
if(data_sz != 0) {
@@ -359,13 +379,23 @@ MPIDI_SendMsg_process_userdefined_dt(MPID_Request * sreq,
sreq->mpid.uebuf_malloc = mpiuMalloc;
DLOOP_Offset last = data_sz;
- MPID_Segment_init(sreq->mpid.userbuf,
+#if CUDA_AWARE_SUPPORT
+ if(!MPIDI_Process.cuda_aware_support_on || !on_device)
+ buf = sreq->mpid.userbuf;
+#endif
+ MPID_assert(buf != NULL);
+
+ MPID_Segment_init(buf,
sreq->mpid.userbufcount,
sreq->mpid.datatype,
&segment,
0);
MPID_Segment_pack(&segment, 0, &last, sndbuf);
MPID_assert(last == data_sz);
+#if CUDA_AWARE_SUPPORT
+ if(MPIDI_Process.cuda_aware_support_on && on_device)
+ MPIU_Free(buf);
+#endif
} else {
sndbuf = NULL;
}
-----------------------------------------------------------------------
hooks/post-receive
--
MPICH primary repository
More information about the commits
mailing list