[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2b1-26-g94e8a80

Service Account noreply at mpich.org
Thu Apr 2 10:35:34 CDT 2015


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  94e8a8058e775a224367d2eeffcd73c905d7e071 (commit)
      from  1c38bd3e927329e308c9838061e9383eebe2010b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/94e8a8058e775a224367d2eeffcd73c905d7e071

commit 94e8a8058e775a224367d2eeffcd73c905d7e071
Author: Sameh Sharkawi <sssharka at us.ibm.com>
Date:   Wed Apr 1 15:08:11 2015 -0400

    PAMID: Dynamically loading libcuda instead of static linking to libmpi.so
    
         This allows the systems that don't have GPUs installed and no libcuda
         to use the libmpi.so w/o getting unable to load lib errors
    
    (ibm) D203056
    
    Signed-off-by: Tsai-Yang (Alan) Jea <tjea at us.ibm.com>
    Signed-off-by: Sameh Sharkawi <sssharka at us.ibm.com>

diff --git a/src/mpid/pamid/include/mpidi_datatypes.h b/src/mpid/pamid/include/mpidi_datatypes.h
index 1324187..14e8dc8 100644
--- a/src/mpid/pamid/include/mpidi_datatypes.h
+++ b/src/mpid/pamid/include/mpidi_datatypes.h
@@ -38,9 +38,6 @@
 
 #include "opa_primitives.h"
 
-#if CUDA_AWARE_SUPPORT
-#include <cuda_runtime_api.h>
-#endif
 
 
 #if (MPIU_HANDLE_ALLOCATION_METHOD == MPIU_HANDLE_ALLOCATION_THREAD_LOCAL) && defined(__BGQ__)
diff --git a/src/mpid/pamid/include/mpidi_prototypes.h b/src/mpid/pamid/include/mpidi_prototypes.h
index 1986c30..f2e60ae 100644
--- a/src/mpid/pamid/include/mpidi_prototypes.h
+++ b/src/mpid/pamid/include/mpidi_prototypes.h
@@ -28,6 +28,10 @@
 #ifndef __include_mpidi_prototypes_h__
 #define __include_mpidi_prototypes_h__
 
+#if CUDA_AWARE_SUPPORT
+#include <cuda_runtime_api.h>
+#endif
+
 
 /**
  * \addtogroup MPID_RECVQ
@@ -266,6 +270,12 @@ pami_result_t MPIDI_Register_algorithms_ext(void                 *cookie,
                                             size_t               *num_algorithms);
 int MPIDI_collsel_pami_tune_parse_params(int argc, char ** argv);
 void MPIDI_collsel_pami_tune_cleanup();
+#if CUDA_AWARE_SUPPORT
+int CudaMemcpy( void* dst, const void* src, size_t count, int kind );
+int CudaPointerGetAttributes( struct cudaPointerAttributes* attributes, const void* ptr );
+const char * CudaGetErrorString( int error);
+#endif
+inline bool MPIDI_enable_cuda();
 inline bool MPIDI_cuda_is_device_buf(const void* ptr);
 void MPIDI_Coll_Comm_create (MPID_Comm *comm);
 void MPIDI_Coll_Comm_destroy(MPID_Comm *comm);
diff --git a/src/mpid/pamid/include/mpidi_util.h b/src/mpid/pamid/include/mpidi_util.h
index 5428c10..5a010c4 100644
--- a/src/mpid/pamid/include/mpidi_util.h
+++ b/src/mpid/pamid/include/mpidi_util.h
@@ -157,6 +157,13 @@ extern void set_mpich_env(int *,int*);
 extern void MPIDI_open_pe_extension();
 extern void MPIDI_close_pe_extension();
 extern MPIDI_Statistics_write(FILE *);
+
+#if CUDA_AWARE_SUPPORT
+int (*pamidCudaMemcpy)( void* dst, const void* src, size_t count, int kind );
+int (*pamidCudaPointerGetAttributes)( struct cudaPointerAttributes* attributes, const void* ptr );
+const char* (*pamidCudaGetErrorString)( int error );
+extern void * pamidCudaPtr;
+#endif
 /*************************************************************
  *    MPIDI_STATISTICS
  *************************************************************/
diff --git a/src/mpid/pamid/src/coll/allgather/mpido_allgather.c b/src/mpid/pamid/src/coll/allgather/mpido_allgather.c
index 0861fa9..429cf32 100644
--- a/src/mpid/pamid/src/coll/allgather/mpido_allgather.c
+++ b/src/mpid/pamid/src/coll/allgather/mpido_allgather.c
@@ -369,9 +369,9 @@ MPIDO_Allgather(const void *sendbuf,
        if(is_send_dev_buf)
        {
          scbuf = MPIU_Malloc(sdt_extent * sendcount);
-         cudaError_t cudaerr = cudaMemcpy(scbuf, sendbuf, sdt_extent * sendcount, cudaMemcpyDeviceToHost);
+         cudaError_t cudaerr = CudaMemcpy(scbuf, sendbuf, sdt_extent * sendcount, cudaMemcpyDeviceToHost);
          if (cudaSuccess != cudaerr)
-           fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+           fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
        }
        else
          scbuf = sendbuf;
@@ -386,9 +386,9 @@ MPIDO_Allgather(const void *sendbuf,
        if(is_send_dev_buf)MPIU_Free(scbuf);
        if(is_recv_dev_buf)
          {
-           cudaError_t cudaerr = cudaMemcpy(recvbuf, rcbuf, rdt_extent * recvcount, cudaMemcpyHostToDevice);
+           cudaError_t cudaerr = CudaMemcpy(recvbuf, rcbuf, rdt_extent * recvcount, cudaMemcpyHostToDevice);
            if (cudaSuccess != cudaerr)
-             fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+             fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
            MPIU_Free(rcbuf);
          }
        return cuda_res;
diff --git a/src/mpid/pamid/src/coll/allgatherv/mpido_allgatherv.c b/src/mpid/pamid/src/coll/allgatherv/mpido_allgatherv.c
index 37a767f..7c3121a 100644
--- a/src/mpid/pamid/src/coll/allgatherv/mpido_allgatherv.c
+++ b/src/mpid/pamid/src/coll/allgatherv/mpido_allgatherv.c
@@ -383,9 +383,9 @@ MPIDO_Allgatherv(const void *sendbuf,
        if(is_send_dev_buf)
        {
          scbuf = MPIU_Malloc(sdt_extent * sendcount);
-         cudaError_t cudaerr = cudaMemcpy(scbuf, sendbuf, sdt_extent * sendcount, cudaMemcpyDeviceToHost);
+         cudaError_t cudaerr = CudaMemcpy(scbuf, sendbuf, sdt_extent * sendcount, cudaMemcpyDeviceToHost);
          if (cudaSuccess != cudaerr)
-           fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+           fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
        }
        else
          scbuf = sendbuf;
@@ -413,9 +413,9 @@ MPIDO_Allgatherv(const void *sendbuf,
        if(is_send_dev_buf)MPIU_Free(scbuf);
        if(is_recv_dev_buf)
          {
-           cudaError_t cudaerr = cudaMemcpy(recvbuf, rcbuf, rtotal_buf, cudaMemcpyHostToDevice);
+           cudaError_t cudaerr = CudaMemcpy(recvbuf, rcbuf, rtotal_buf, cudaMemcpyHostToDevice);
            if (cudaSuccess != cudaerr)
-             fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+             fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
            MPIU_Free(rcbuf);
          }
        return cuda_res;
diff --git a/src/mpid/pamid/src/coll/allreduce/mpido_allreduce.c b/src/mpid/pamid/src/coll/allreduce/mpido_allreduce.c
index d720279..e7b9222 100644
--- a/src/mpid/pamid/src/coll/allreduce/mpido_allreduce.c
+++ b/src/mpid/pamid/src/coll/allreduce/mpido_allreduce.c
@@ -129,9 +129,9 @@ int MPIDO_Allreduce(const void *sendbuf,
        if(is_send_dev_buf)
        {
          scbuf = MPIU_Malloc(dt_extent * count);
-         cudaError_t cudaerr = cudaMemcpy(scbuf, sendbuf, dt_extent * count, cudaMemcpyDeviceToHost);
+         cudaError_t cudaerr = CudaMemcpy(scbuf, sendbuf, dt_extent * count, cudaMemcpyDeviceToHost);
          if (cudaSuccess != cudaerr) 
-           fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+           fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
        }
        else
          scbuf = sendbuf;
@@ -146,9 +146,9 @@ int MPIDO_Allreduce(const void *sendbuf,
        if(is_send_dev_buf)MPIU_Free(scbuf);
        if(is_recv_dev_buf)
          {
-           cudaError_t cudaerr = cudaMemcpy(recvbuf, rcbuf, dt_extent * count, cudaMemcpyHostToDevice);
+           cudaError_t cudaerr = CudaMemcpy(recvbuf, rcbuf, dt_extent * count, cudaMemcpyHostToDevice);
            if (cudaSuccess != cudaerr)
-             fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+             fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
            MPIU_Free(rcbuf);
          }
        return cuda_res;
diff --git a/src/mpid/pamid/src/coll/alltoall/mpido_alltoall.c b/src/mpid/pamid/src/coll/alltoall/mpido_alltoall.c
index 42b34d1..fedb2c8 100644
--- a/src/mpid/pamid/src/coll/alltoall/mpido_alltoall.c
+++ b/src/mpid/pamid/src/coll/alltoall/mpido_alltoall.c
@@ -104,9 +104,9 @@ int MPIDO_Alltoall(const void *sendbuf,
        if(is_send_dev_buf)
        {
          scbuf = MPIU_Malloc(sdt_extent * sendcount);
-         cudaError_t cudaerr = cudaMemcpy(scbuf, sendbuf, sdt_extent * sendcount, cudaMemcpyDeviceToHost);
+         cudaError_t cudaerr = CudaMemcpy(scbuf, sendbuf, sdt_extent * sendcount, cudaMemcpyDeviceToHost);
          if (cudaSuccess != cudaerr)
-           fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+           fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
        }
        else
          scbuf = sendbuf;
@@ -121,9 +121,9 @@ int MPIDO_Alltoall(const void *sendbuf,
        if(is_send_dev_buf)MPIU_Free(scbuf);
        if(is_recv_dev_buf)
          {
-           cudaError_t cudaerr = cudaMemcpy(recvbuf, rcbuf, recvcount * rdt_extent, cudaMemcpyHostToDevice);
+           cudaError_t cudaerr = CudaMemcpy(recvbuf, rcbuf, recvcount * rdt_extent, cudaMemcpyHostToDevice);
            if (cudaSuccess != cudaerr)
-             fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+             fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
            MPIU_Free(rcbuf);
          }
        return cuda_res;
diff --git a/src/mpid/pamid/src/coll/gather/mpido_gather.c b/src/mpid/pamid/src/coll/gather/mpido_gather.c
index 8676f02..cbe9672 100644
--- a/src/mpid/pamid/src/coll/gather/mpido_gather.c
+++ b/src/mpid/pamid/src/coll/gather/mpido_gather.c
@@ -204,9 +204,9 @@ int MPIDO_Gather(const void *sendbuf,
        if(is_send_dev_buf)
        {
          scbuf = MPIU_Malloc(sdt_extent * sendcount);
-         cudaError_t cudaerr = cudaMemcpy(scbuf, sendbuf, sdt_extent * sendcount, cudaMemcpyDeviceToHost);
+         cudaError_t cudaerr = CudaMemcpy(scbuf, sendbuf, sdt_extent * sendcount, cudaMemcpyDeviceToHost);
          if (cudaSuccess != cudaerr)
-           fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+           fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
        }
        else
          scbuf = sendbuf;
@@ -221,9 +221,9 @@ int MPIDO_Gather(const void *sendbuf,
        if(is_send_dev_buf)MPIU_Free(scbuf);
        if(is_recv_dev_buf)
          {
-           cudaError_t cudaerr = cudaMemcpy(recvbuf, rcbuf, rdt_extent * recvcount, cudaMemcpyHostToDevice);
+           cudaError_t cudaerr = CudaMemcpy(recvbuf, rcbuf, rdt_extent * recvcount, cudaMemcpyHostToDevice);
            if (cudaSuccess != cudaerr)
-             fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+             fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
            MPIU_Free(rcbuf);
          }
        return cuda_res;
diff --git a/src/mpid/pamid/src/coll/gatherv/mpido_gatherv.c b/src/mpid/pamid/src/coll/gatherv/mpido_gatherv.c
index b906c5e..834a4e4 100644
--- a/src/mpid/pamid/src/coll/gatherv/mpido_gatherv.c
+++ b/src/mpid/pamid/src/coll/gatherv/mpido_gatherv.c
@@ -97,9 +97,9 @@ int MPIDO_Gatherv(const void *sendbuf,
        if(is_send_dev_buf)
        {
          scbuf = MPIU_Malloc(sdt_extent * sendcount);
-         cudaError_t cudaerr = cudaMemcpy(scbuf, sendbuf, sdt_extent * sendcount, cudaMemcpyDeviceToHost);
+         cudaError_t cudaerr = CudaMemcpy(scbuf, sendbuf, sdt_extent * sendcount, cudaMemcpyDeviceToHost);
          if (cudaSuccess != cudaerr)
-           fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+           fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
        }
        else
          scbuf = sendbuf;
@@ -127,9 +127,9 @@ int MPIDO_Gatherv(const void *sendbuf,
        if(is_send_dev_buf)MPIU_Free(scbuf);
        if(is_recv_dev_buf)
          {
-           cudaError_t cudaerr = cudaMemcpy(recvbuf, rcbuf, rtotal_buf, cudaMemcpyHostToDevice);
+           cudaError_t cudaerr = CudaMemcpy(recvbuf, rcbuf, rtotal_buf, cudaMemcpyHostToDevice);
            if (cudaSuccess != cudaerr)
-             fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+             fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
            MPIU_Free(rcbuf);
          }
        return cuda_res;
diff --git a/src/mpid/pamid/src/coll/red_scat/mpido_red_scat.c b/src/mpid/pamid/src/coll/red_scat/mpido_red_scat.c
index d80bbda..4dd18c6 100644
--- a/src/mpid/pamid/src/coll/red_scat/mpido_red_scat.c
+++ b/src/mpid/pamid/src/coll/red_scat/mpido_red_scat.c
@@ -63,9 +63,9 @@ int MPIDO_Reduce_scatter(const void *sendbuf,
        if(is_send_dev_buf)
        {
          scbuf = MPIU_Malloc(dt_extent * total_buf);
-         cudaError_t cudaerr = cudaMemcpy(scbuf, sendbuf, dt_extent * total_buf, cudaMemcpyDeviceToHost);
+         cudaError_t cudaerr = CudaMemcpy(scbuf, sendbuf, dt_extent * total_buf, cudaMemcpyDeviceToHost);
          if (cudaSuccess != cudaerr) 
-           fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+           fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
        }
        else
          scbuf = sendbuf;
@@ -82,9 +82,9 @@ int MPIDO_Reduce_scatter(const void *sendbuf,
        if(is_send_dev_buf)MPIU_Free(scbuf);
        if(is_recv_dev_buf)
        {
-         cudaError_t cudaerr = cudaMemcpy(recvbuf, rcbuf, dt_extent * total_buf, cudaMemcpyHostToDevice);
+         cudaError_t cudaerr = CudaMemcpy(recvbuf, rcbuf, dt_extent * total_buf, cudaMemcpyHostToDevice);
          if (cudaSuccess != cudaerr)
-           fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+           fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
          MPIU_Free(rcbuf);
        }
        return cuda_res;
@@ -129,9 +129,9 @@ int MPIDO_Reduce_scatter_block(const void *sendbuf,
        if(is_send_dev_buf)
        {
          scbuf = MPIU_Malloc(dt_extent * recvcount * size);
-         cudaError_t cudaerr = cudaMemcpy(scbuf, sendbuf, dt_extent * recvcount * size, cudaMemcpyDeviceToHost);
+         cudaError_t cudaerr = CudaMemcpy(scbuf, sendbuf, dt_extent * recvcount * size, cudaMemcpyDeviceToHost);
          if (cudaSuccess != cudaerr) 
-           fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+           fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
        }
        else
          scbuf = sendbuf;
@@ -152,9 +152,9 @@ int MPIDO_Reduce_scatter_block(const void *sendbuf,
        if(is_send_dev_buf)MPIU_Free(scbuf);
        if(is_recv_dev_buf)
        {
-         cudaError_t cudaerr = cudaMemcpy(recvbuf, rcbuf, dt_extent * recvcount * size, cudaMemcpyHostToDevice);
+         cudaError_t cudaerr = CudaMemcpy(recvbuf, rcbuf, dt_extent * recvcount * size, cudaMemcpyHostToDevice);
          if (cudaSuccess != cudaerr)
-           fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+           fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
          MPIU_Free(rcbuf);
        }
        return cuda_res;
diff --git a/src/mpid/pamid/src/coll/reduce/mpido_reduce.c b/src/mpid/pamid/src/coll/reduce/mpido_reduce.c
index 8e83765..0c27f07 100644
--- a/src/mpid/pamid/src/coll/reduce/mpido_reduce.c
+++ b/src/mpid/pamid/src/coll/reduce/mpido_reduce.c
@@ -129,9 +129,9 @@ int MPIDO_Reduce(const void *sendbuf,
          if(is_send_dev_buf)
          {
            scbuf = MPIU_Malloc(dt_extent * count);
-           cudaError_t cudaerr = cudaMemcpy(scbuf, sendbuf, dt_extent * count, cudaMemcpyDeviceToHost);
+           cudaError_t cudaerr = CudaMemcpy(scbuf, sendbuf, dt_extent * count, cudaMemcpyDeviceToHost);
            if (cudaSuccess != cudaerr) 
-             fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+             fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
          }
          else
            scbuf = sendbuf;
@@ -146,9 +146,9 @@ int MPIDO_Reduce(const void *sendbuf,
          if(is_send_dev_buf)MPIU_Free(scbuf);
          if(is_recv_dev_buf)
          {
-           cudaError_t cudaerr = cudaMemcpy(recvbuf, rcbuf, dt_extent * count, cudaMemcpyHostToDevice);
+           cudaError_t cudaerr = CudaMemcpy(recvbuf, rcbuf, dt_extent * count, cudaMemcpyHostToDevice);
            if (cudaSuccess != cudaerr)
-             fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+             fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
            MPIU_Free(rcbuf);
          }
          return cuda_res;
diff --git a/src/mpid/pamid/src/coll/scan/mpido_scan.c b/src/mpid/pamid/src/coll/scan/mpido_scan.c
index 8bdce61..3fbb5f3 100644
--- a/src/mpid/pamid/src/coll/scan/mpido_scan.c
+++ b/src/mpid/pamid/src/coll/scan/mpido_scan.c
@@ -147,9 +147,9 @@ int MPIDO_Doscan(const void *sendbuf, void *recvbuf,
          if(is_send_dev_buf)
          {
            scbuf = MPIU_Malloc(dt_extent * count);
-           cudaError_t cudaerr = cudaMemcpy(scbuf, sendbuf, dt_extent * count, cudaMemcpyDeviceToHost);
+           cudaError_t cudaerr = CudaMemcpy(scbuf, sendbuf, dt_extent * count, cudaMemcpyDeviceToHost);
            if (cudaSuccess != cudaerr) 
-             fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+             fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
          }
          else
            scbuf = sendbuf;
@@ -168,9 +168,9 @@ int MPIDO_Doscan(const void *sendbuf, void *recvbuf,
          if(is_send_dev_buf)MPIU_Free(scbuf);
          if(is_recv_dev_buf)
          {
-           cudaError_t cudaerr = cudaMemcpy(recvbuf, rcbuf, dt_extent * count, cudaMemcpyHostToDevice);
+           cudaError_t cudaerr = CudaMemcpy(recvbuf, rcbuf, dt_extent * count, cudaMemcpyHostToDevice);
            if (cudaSuccess != cudaerr)
-             fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+             fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
            MPIU_Free(rcbuf);
          }
          return cuda_res;
diff --git a/src/mpid/pamid/src/coll/scatter/mpido_scatter.c b/src/mpid/pamid/src/coll/scatter/mpido_scatter.c
index 0fcf07c..6ab1479 100644
--- a/src/mpid/pamid/src/coll/scatter/mpido_scatter.c
+++ b/src/mpid/pamid/src/coll/scatter/mpido_scatter.c
@@ -152,9 +152,9 @@ int MPIDO_Scatter(const void *sendbuf,
        if(is_send_dev_buf)
        {
          scbuf = MPIU_Malloc(sdt_extent * sendcount);
-         cudaError_t cudaerr = cudaMemcpy(scbuf, sendbuf, sdt_extent * sendcount, cudaMemcpyDeviceToHost);
+         cudaError_t cudaerr = CudaMemcpy(scbuf, sendbuf, sdt_extent * sendcount, cudaMemcpyDeviceToHost);
          if (cudaSuccess != cudaerr)
-           fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+           fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
        }
        else
          scbuf = sendbuf;
@@ -169,9 +169,9 @@ int MPIDO_Scatter(const void *sendbuf,
        if(is_send_dev_buf)MPIU_Free(scbuf);
        if(is_recv_dev_buf)
          {
-           cudaError_t cudaerr = cudaMemcpy(recvbuf, rcbuf, rdt_extent * recvcount, cudaMemcpyHostToDevice);
+           cudaError_t cudaerr = CudaMemcpy(recvbuf, rcbuf, rdt_extent * recvcount, cudaMemcpyHostToDevice);
            if (cudaSuccess != cudaerr)
-             fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+             fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
            MPIU_Free(rcbuf);
          }
        return cuda_res;
diff --git a/src/mpid/pamid/src/coll/scatterv/mpido_scatterv.c b/src/mpid/pamid/src/coll/scatterv/mpido_scatterv.c
index 5dad933..1487dc1 100644
--- a/src/mpid/pamid/src/coll/scatterv/mpido_scatterv.c
+++ b/src/mpid/pamid/src/coll/scatterv/mpido_scatterv.c
@@ -272,9 +272,9 @@ int MPIDO_Scatterv(const void *sendbuf,
          }
          stotal_buf = (highest_displs+highest_sendcount)*sdt_extent;
          scbuf = MPIU_Malloc(stotal_buf);
-         cudaError_t cudaerr = cudaMemcpy(scbuf, sendbuf, stotal_buf, cudaMemcpyDeviceToHost);
+         cudaError_t cudaerr = CudaMemcpy(scbuf, sendbuf, stotal_buf, cudaMemcpyDeviceToHost);
          if (cudaSuccess != cudaerr)
-           fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+           fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
        }
        else
          scbuf = sendbuf;
@@ -289,9 +289,9 @@ int MPIDO_Scatterv(const void *sendbuf,
        if(is_send_dev_buf)MPIU_Free(scbuf);
        if(is_recv_dev_buf)
          {
-           cudaError_t cudaerr = cudaMemcpy(recvbuf, rcbuf, recvcount * rdt_extent, cudaMemcpyHostToDevice);
+           cudaError_t cudaerr = CudaMemcpy(recvbuf, rcbuf, recvcount * rdt_extent, cudaMemcpyHostToDevice);
            if (cudaSuccess != cudaerr)
-             fprintf(stderr, "cudaMemcpy failed: %s\n", cudaGetErrorString(cudaerr));
+             fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
            MPIU_Free(rcbuf);
          }
        return cuda_res;
diff --git a/src/mpid/pamid/src/mpid_buffer.c b/src/mpid/pamid/src/mpid_buffer.c
index 0e2ad5a..2a4407b 100644
--- a/src/mpid/pamid/src/mpid_buffer.c
+++ b/src/mpid/pamid/src/mpid_buffer.c
@@ -88,7 +88,7 @@ void MPIDI_Buffer_copy(
 #if CUDA_AWARE_SUPPORT
       if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rbuf))
       {
-        cudaError_t cudaerr = cudaMemcpy(rbuf + rdt_true_lb, sbuf + sdt_true_lb, sdata_sz, cudaMemcpyHostToDevice);
+        cudaError_t cudaerr = CudaMemcpy(rbuf + rdt_true_lb, sbuf + sdt_true_lb, sdata_sz, cudaMemcpyHostToDevice);
       }
       else
 #endif
@@ -122,7 +122,7 @@ void MPIDI_Buffer_copy(
        *rsz = last;
 
         
-        cudaError_t cudaerr = cudaMemcpy(rbuf + rdt_true_lb, buf, rdt_extent * rcount, cudaMemcpyHostToDevice);
+        cudaError_t cudaerr = CudaMemcpy(rbuf + rdt_true_lb, buf, rdt_extent * rcount, cudaMemcpyHostToDevice);
 
         MPIU_Free(buf);
 
diff --git a/src/mpid/pamid/src/mpidi_env.c b/src/mpid/pamid/src/mpidi_env.c
index c931858..3eb1aac 100644
--- a/src/mpid/pamid/src/mpidi_env.c
+++ b/src/mpid/pamid/src/mpidi_env.c
@@ -1143,6 +1143,14 @@ MPIDI_Env_setup(int rank, int requested)
 #if CUDA_AWARE_SUPPORT
     char* names[] = {"MP_CUDA_AWARE", NULL};
     ENV_Char(names, &MPIDI_Process.cuda_aware_support_on);
+    if(MPIDI_Process.cuda_aware_support_on && MPIDI_enable_cuda() == false)
+    {
+      MPIDI_Process.cuda_aware_support_on = false;
+      if(rank == 0)
+      {
+        fprintf(stderr, "Error loading libcudart\n");fflush(stderr);sleep(1);exit(1);
+      }
+    }
 #endif
 
   /* Exit if any deprecated environment variables were specified. */
diff --git a/src/mpid/pamid/src/mpidi_util.c b/src/mpid/pamid/src/mpidi_util.c
index dd25865..fa0b5f4 100644
--- a/src/mpid/pamid/src/mpidi_util.c
+++ b/src/mpid/pamid/src/mpidi_util.c
@@ -24,7 +24,7 @@
 /*
  * \brief
  */
-
+#include <dlfcn.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -43,6 +43,9 @@
 
 #define PAMI_ASYNC_EXT_ATTR 2000
 
+#if CUDA_AWARE_SUPPORT
+void * pamidCudaPtr = NULL;
+#endif
 #if (MPIDI_PRINTENV || MPIDI_STATISTICS || MPIDI_BANNER)
 MPIDI_printenv_t  *mpich_env=NULL;
 extern char* mp_euilib;
@@ -1913,13 +1916,63 @@ void MPIDI_collsel_pami_tune_cleanup()
 /**********************************************************/
 /*                 CUDA Utilities                         */
 /**********************************************************/
+#if CUDA_AWARE_SUPPORT
+int CudaMemcpy(void* dst, const void* src, size_t count, int kind)
+{
+  return (*pamidCudaMemcpy)(dst, src, count, kind);
+}
+int CudaPointerGetAttributes(struct cudaPointerAttributes* attributes, const void* ptr)
+{
+  return  (*pamidCudaPointerGetAttributes)(attributes, ptr);
+}
+const char*  CudaGetErrorString( int error)
+{
+  return (*pamidCudaGetErrorString)(error);
+}
+#endif
+
+inline bool MPIDI_enable_cuda()
+{
+  bool result = false;
+#if CUDA_AWARE_SUPPORT
+  pamidCudaPtr = dlopen("libcudart.so", RTLD_NOW|RTLD_GLOBAL);
+  if(pamidCudaPtr == NULL)
+  {
+    TRACE_ERR("failed to open libcudart.so error=%s\n", dlerror());
+    return result;
+  }
+  else
+  {
+    pamidCudaMemcpy =  (int (*)())dlsym(pamidCudaPtr, "cudaMemcpy");
+    if(pamidCudaMemcpy == NULL)
+    {
+      dlclose(pamidCudaPtr);
+      return result;
+    }
+    pamidCudaPointerGetAttributes = (int (*)())dlsym(pamidCudaPtr, "cudaPointerGetAttributes");
+    if(pamidCudaPointerGetAttributes == NULL)
+    {
+      dlclose(pamidCudaPtr);
+      return result;
+    }
+    pamidCudaGetErrorString = (const char* (*)())dlsym(pamidCudaPtr, "cudaGetErrorString");
+    if(pamidCudaGetErrorString == NULL)
+    {
+      dlclose(pamidCudaPtr);
+      return result;
+    }
+    result = true;
+  }
+#endif
+  return result;
+}
 
 inline bool MPIDI_cuda_is_device_buf(const void* ptr)
 {
     bool result = false;
 #if CUDA_AWARE_SUPPORT
     struct cudaPointerAttributes cuda_attr;
-    cudaError_t e= cudaPointerGetAttributes  ( & cuda_attr, ptr);
+    cudaError_t e= CudaPointerGetAttributes  ( & cuda_attr, ptr);
 
     if (e != cudaSuccess)
         result = false;
diff --git a/src/mpid/pamid/src/pt2pt/mpidi_callback_short.c b/src/mpid/pamid/src/pt2pt/mpidi_callback_short.c
index 1746c45..063fe1d 100644
--- a/src/mpid/pamid/src/pt2pt/mpidi_callback_short.c
+++ b/src/mpid/pamid/src/pt2pt/mpidi_callback_short.c
@@ -199,7 +199,7 @@ MPIDI_RecvShortCB(pami_context_t    context,
 #if CUDA_AWARE_SUPPORT
     if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rcvbuf))
     {
-      cudaError_t cudaerr = cudaMemcpy(rcvbuf, sndbuf, (size_t)sndlen, cudaMemcpyHostToDevice);
+      cudaError_t cudaerr = CudaMemcpy(rcvbuf, sndbuf, (size_t)sndlen, cudaMemcpyHostToDevice);
     }
     else
 #endif
diff --git a/src/mpid/pamid/src/pt2pt/mpidi_callback_util.c b/src/mpid/pamid/src/pt2pt/mpidi_callback_util.c
index c91e7da..8bc98d1 100644
--- a/src/mpid/pamid/src/pt2pt/mpidi_callback_util.c
+++ b/src/mpid/pamid/src/pt2pt/mpidi_callback_util.c
@@ -178,7 +178,7 @@ MPIDI_Callback_process_userdefined_dt(pami_context_t      context,
 #if CUDA_AWARE_SUPPORT
     if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rcvbuf))
     {
-      cudaError_t cudaerr = cudaMemcpy(rcvbuf, sndbuf, (size_t)sndlen, cudaMemcpyHostToDevice);
+      cudaError_t cudaerr = CudaMemcpy(rcvbuf, sndbuf, (size_t)sndlen, cudaMemcpyHostToDevice);
     }
     else
 #endif
diff --git a/src/mpid/pamid/src/pt2pt/mpidi_sendmsg.c b/src/mpid/pamid/src/pt2pt/mpidi_sendmsg.c
index 65cf773..0987cfd 100644
--- a/src/mpid/pamid/src/pt2pt/mpidi_sendmsg.c
+++ b/src/mpid/pamid/src/pt2pt/mpidi_sendmsg.c
@@ -357,9 +357,9 @@ MPIDI_SendMsg_process_userdefined_dt(MPID_Request      * sreq,
         MPID_Datatype_get_extent_macro(sreq->mpid.datatype, dt_extent);
         buf =  MPIU_Malloc(dt_extent * sreq->mpid.userbufcount);
 
-        cudaError_t cudaerr = cudaMemcpy(buf, sreq->mpid.userbuf, dt_extent * sreq->mpid.userbufcount, cudaMemcpyDeviceToHost);
+        cudaError_t cudaerr = CudaMemcpy(buf, sreq->mpid.userbuf, dt_extent * sreq->mpid.userbufcount, cudaMemcpyDeviceToHost);
         if (cudaSuccess != cudaerr) {
-          fprintf(stderr, "cudaMalloc failed: %s\n", cudaGetErrorString(cudaerr));
+          fprintf(stderr, "cudaMalloc failed: %s\n", CudaGetErrorString(cudaerr));
         }
 
       }

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/pamid/include/mpidi_datatypes.h           |    3 -
 src/mpid/pamid/include/mpidi_prototypes.h          |   10 ++++
 src/mpid/pamid/include/mpidi_util.h                |    7 +++
 .../pamid/src/coll/allgather/mpido_allgather.c     |    8 ++--
 .../pamid/src/coll/allgatherv/mpido_allgatherv.c   |    8 ++--
 .../pamid/src/coll/allreduce/mpido_allreduce.c     |    8 ++--
 src/mpid/pamid/src/coll/alltoall/mpido_alltoall.c  |    8 ++--
 src/mpid/pamid/src/coll/gather/mpido_gather.c      |    8 ++--
 src/mpid/pamid/src/coll/gatherv/mpido_gatherv.c    |    8 ++--
 src/mpid/pamid/src/coll/red_scat/mpido_red_scat.c  |   16 +++---
 src/mpid/pamid/src/coll/reduce/mpido_reduce.c      |    8 ++--
 src/mpid/pamid/src/coll/scan/mpido_scan.c          |    8 ++--
 src/mpid/pamid/src/coll/scatter/mpido_scatter.c    |    8 ++--
 src/mpid/pamid/src/coll/scatterv/mpido_scatterv.c  |    8 ++--
 src/mpid/pamid/src/mpid_buffer.c                   |    4 +-
 src/mpid/pamid/src/mpidi_env.c                     |    8 +++
 src/mpid/pamid/src/mpidi_util.c                    |   57 +++++++++++++++++++-
 src/mpid/pamid/src/pt2pt/mpidi_callback_short.c    |    2 +-
 src/mpid/pamid/src/pt2pt/mpidi_callback_util.c     |    2 +-
 src/mpid/pamid/src/pt2pt/mpidi_sendmsg.c           |    4 +-
 20 files changed, 134 insertions(+), 59 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list