[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.0.4-193-g4959541

mysql vizuser noreply at mpich.org
Wed May 15 12:00:37 CDT 2013


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  49595414949ac4099b3a7ce6d6bc41772b5111cf (commit)
       via  57f5b7b17decc5dfb447186b02fc1f35495618da (commit)
       via  2eefd3a26e6de5e0b57fa6164ec3f5fdfb56c4fe (commit)
      from  635969e76f66b3039f8150bc582f41e324864662 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/49595414949ac4099b3a7ce6d6bc41772b5111cf

commit 49595414949ac4099b3a7ce6d6bc41772b5111cf
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Tue May 14 20:34:46 2013 -0500

    Remove C99-ism from IBM contributed code.
    
    Reviewed by Mike Blocksome @ IBM.

diff --git a/src/mpi/coll/iallgather.c b/src/mpi/coll/iallgather.c
index 0a0858e..8313e78 100644
--- a/src/mpi/coll/iallgather.c
+++ b/src/mpi/coll/iallgather.c
@@ -589,6 +589,8 @@ int MPIR_Iallgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendty
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -604,9 +606,6 @@ int MPIR_Iallgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendty
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/iallgatherv.c b/src/mpi/coll/iallgatherv.c
index acffde4..40f6f67 100644
--- a/src/mpi/coll/iallgatherv.c
+++ b/src/mpi/coll/iallgatherv.c
@@ -691,6 +691,8 @@ int MPIR_Iallgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendt
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -706,9 +708,6 @@ int MPIR_Iallgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendt
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/iallreduce.c b/src/mpi/coll/iallreduce.c
index 0a0147c..cefa272 100644
--- a/src/mpi/coll/iallreduce.c
+++ b/src/mpi/coll/iallreduce.c
@@ -647,6 +647,8 @@ int MPIR_Iallreduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Data
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -662,9 +664,6 @@ int MPIR_Iallreduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Data
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/ialltoall.c b/src/mpi/coll/ialltoall.c
index af7484c..bf1ba73 100644
--- a/src/mpi/coll/ialltoall.c
+++ b/src/mpi/coll/ialltoall.c
@@ -519,6 +519,8 @@ int MPIR_Ialltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -534,9 +536,6 @@ int MPIR_Ialltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/ialltoallv.c b/src/mpi/coll/ialltoallv.c
index d7063b4..0d72e01 100644
--- a/src/mpi/coll/ialltoallv.c
+++ b/src/mpi/coll/ialltoallv.c
@@ -246,6 +246,8 @@ int MPIR_Ialltoallv_impl(const void *sendbuf, const int sendcounts[], const int
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -261,10 +263,6 @@ int MPIR_Ialltoallv_impl(const void *sendbuf, const int sendcounts[], const int
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/ialltoallw.c b/src/mpi/coll/ialltoallw.c
index eb7eab2..1b3e6af 100644
--- a/src/mpi/coll/ialltoallw.c
+++ b/src/mpi/coll/ialltoallw.c
@@ -252,6 +252,8 @@ int MPIR_Ialltoallw_impl(const void *sendbuf, const int sendcounts[], const int
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -267,9 +269,6 @@ int MPIR_Ialltoallw_impl(const void *sendbuf, const int sendcounts[], const int
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/ibarrier.c b/src/mpi/coll/ibarrier.c
index fe78e3a..eecf07d 100644
--- a/src/mpi/coll/ibarrier.c
+++ b/src/mpi/coll/ibarrier.c
@@ -171,6 +171,8 @@ int MPIR_Ibarrier_impl(MPID_Comm *comm_ptr, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -186,9 +188,6 @@ int MPIR_Ibarrier_impl(MPID_Comm *comm_ptr, MPI_Request *request)
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/ibcast.c b/src/mpi/coll/ibcast.c
index 7070e24..d619407 100644
--- a/src/mpi/coll/ibcast.c
+++ b/src/mpi/coll/ibcast.c
@@ -857,6 +857,8 @@ int MPIR_Ibcast_impl(void *buffer, int count, MPI_Datatype datatype, int root, M
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -872,9 +874,6 @@ int MPIR_Ibcast_impl(void *buffer, int count, MPI_Datatype datatype, int root, M
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/iexscan.c b/src/mpi/coll/iexscan.c
index d962980..6393861 100644
--- a/src/mpi/coll/iexscan.c
+++ b/src/mpi/coll/iexscan.c
@@ -184,6 +184,8 @@ int MPIR_Iexscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatyp
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -199,9 +201,6 @@ int MPIR_Iexscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatyp
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/igather.c b/src/mpi/coll/igather.c
index 63fdccb..36efa80 100644
--- a/src/mpi/coll/igather.c
+++ b/src/mpi/coll/igather.c
@@ -511,6 +511,8 @@ int MPIR_Igather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -528,9 +530,6 @@ int MPIR_Igather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/igatherv.c b/src/mpi/coll/igatherv.c
index 300925c..f87d163 100644
--- a/src/mpi/coll/igatherv.c
+++ b/src/mpi/coll/igatherv.c
@@ -110,6 +110,8 @@ int MPIR_Igatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -127,9 +129,6 @@ int MPIR_Igatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/ired_scat.c b/src/mpi/coll/ired_scat.c
index dbafea1..dc59220 100644
--- a/src/mpi/coll/ired_scat.c
+++ b/src/mpi/coll/ired_scat.c
@@ -1009,6 +1009,8 @@ int MPIR_Ireduce_scatter_impl(const void *sendbuf, void *recvbuf, const int recv
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -1026,9 +1028,6 @@ int MPIR_Ireduce_scatter_impl(const void *sendbuf, void *recvbuf, const int recv
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/ired_scat_block.c b/src/mpi/coll/ired_scat_block.c
index d8c2b65..8d363d2 100644
--- a/src/mpi/coll/ired_scat_block.c
+++ b/src/mpi/coll/ired_scat_block.c
@@ -911,6 +911,8 @@ int MPIR_Ireduce_scatter_block_impl(const void *sendbuf, void *recvbuf, int recv
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -928,10 +930,6 @@ int MPIR_Ireduce_scatter_block_impl(const void *sendbuf, void *recvbuf, int recv
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/ireduce.c b/src/mpi/coll/ireduce.c
index 138b1f4..cedd331 100644
--- a/src/mpi/coll/ireduce.c
+++ b/src/mpi/coll/ireduce.c
@@ -762,6 +762,8 @@ int MPIR_Ireduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatyp
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -779,9 +781,6 @@ int MPIR_Ireduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatyp
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/iscan.c b/src/mpi/coll/iscan.c
index 695bf11..a8c242e 100644
--- a/src/mpi/coll/iscan.c
+++ b/src/mpi/coll/iscan.c
@@ -318,6 +318,8 @@ int MPIR_Iscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -335,9 +337,6 @@ int MPIR_Iscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/iscatter.c b/src/mpi/coll/iscatter.c
index 8a00336..4ce45d6 100644
--- a/src/mpi/coll/iscatter.c
+++ b/src/mpi/coll/iscatter.c
@@ -546,6 +546,8 @@ int MPIR_Iscatter_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -563,9 +565,6 @@ int MPIR_Iscatter_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/iscatterv.c b/src/mpi/coll/iscatterv.c
index 90a04b8..4e86fcd 100644
--- a/src/mpi/coll/iscatterv.c
+++ b/src/mpi/coll/iscatterv.c
@@ -117,6 +117,8 @@ int MPIR_Iscatterv_impl(const void *sendbuf, const int sendcounts[], const int d
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_Request *reqp = NULL;
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
@@ -134,9 +136,6 @@ int MPIR_Iscatterv_impl(const void *sendbuf, const int sendcounts[], const int d
         /* --END USEREXTENSION-- */
     }
 
-    int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
-
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);

http://git.mpich.org/mpich.git/commitdiff/57f5b7b17decc5dfb447186b02fc1f35495618da

commit 57f5b7b17decc5dfb447186b02fc1f35495618da
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Tue May 14 11:51:43 2013 -0500

    Naming updates to IBM's NBC patch.
    
    We now provide two types of function pointers: _sched and _req.  The
    _sched functions just add instructions to the schedule, but no request
    is created.  The _req functions return an MPID request.
    
    Reviewed by Mike Blocksome @ IBM.

diff --git a/src/include/mpiimpl.h b/src/include/mpiimpl.h
index 5c6f1d8..d98ba9f 100644
--- a/src/include/mpiimpl.h
+++ b/src/include/mpiimpl.h
@@ -1975,95 +1975,95 @@ typedef struct MPID_Collops {
                            MPID_Comm *, int *);
 
     /* MPI-3 nonblocking collectives */
-    int (*Ibarrier)(MPID_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Ibarrier_optimized)(MPID_Comm *comm_ptr, MPID_Request **request);
-    int (*Ibcast)(void *buffer, int count, MPI_Datatype datatype, int root,
+    int (*Ibarrier_sched)(MPID_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Ibarrier_req)(MPID_Comm *comm_ptr, MPID_Request **request);
+    int (*Ibcast_sched)(void *buffer, int count, MPI_Datatype datatype, int root,
                   MPID_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Ibcast_optimized)(void *buffer, int count, MPI_Datatype datatype, int root,
+    int (*Ibcast_req)(void *buffer, int count, MPI_Datatype datatype, int root,
                             MPID_Comm *comm_ptr, MPID_Request **request);
-    int (*Igather)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+    int (*Igather_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                    int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr,
                    MPID_Sched_t s);
-    int (*Igather_optimized)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+    int (*Igather_req)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                              int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr,
                              MPID_Request **request);
-    int (*Igatherv)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+    int (*Igatherv_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                     const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root,
                     MPID_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Igatherv_optimized)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+    int (*Igatherv_req)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                               const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root,
                               MPID_Comm *comm_ptr, MPID_Request **request);
-    int (*Iscatter)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+    int (*Iscatter_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                     int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr,
                     MPID_Sched_t s);
-    int (*Iscatter_optimized)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+    int (*Iscatter_req)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                               int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr,
                               MPID_Request **request);
-    int (*Iscatterv)(const void *sendbuf, const int *sendcounts, const int *displs,
+    int (*Iscatterv_sched)(const void *sendbuf, const int *sendcounts, const int *displs,
                      MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype,
                      int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Iscatterv_optimized)(const void *sendbuf, const int *sendcounts, const int *displs,
+    int (*Iscatterv_req)(const void *sendbuf, const int *sendcounts, const int *displs,
                                MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype,
                                int root, MPID_Comm *comm_ptr, MPID_Request **request);
-    int (*Iallgather)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+    int (*Iallgather_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                       int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
                       MPID_Sched_t s);
-    int (*Iallgather_optimized)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+    int (*Iallgather_req)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                                 int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
                                 MPID_Request **request);
-    int (*Iallgatherv)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+    int (*Iallgatherv_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                        const int *recvcounts, const int *displs, MPI_Datatype recvtype,
                        MPID_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Iallgatherv_optimized)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+    int (*Iallgatherv_req)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                                  const int *recvcounts, const int *displs, MPI_Datatype recvtype,
                                  MPID_Comm *comm_ptr, MPID_Request ** request);
-    int (*Ialltoall)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+    int (*Ialltoall_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                      int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
                      MPID_Sched_t s);
-    int (*Ialltoall_optimized)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+    int (*Ialltoall_req)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                                int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
                                MPID_Request **request);
-    int (*Ialltoallv)(const void *sendbuf, const int *sendcounts, const int *sdispls,
+    int (*Ialltoallv_sched)(const void *sendbuf, const int *sendcounts, const int *sdispls,
                       MPI_Datatype sendtype, void *recvbuf, const int *recvcounts,
                       const int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
                       MPID_Sched_t s);
-    int (*Ialltoallv_optimized)(const void *sendbuf, const int *sendcounts, const int *sdispls,
+    int (*Ialltoallv_req)(const void *sendbuf, const int *sendcounts, const int *sdispls,
                                 MPI_Datatype sendtype, void *recvbuf, const int *recvcounts,
                                 const int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
                                 MPID_Request **request);
-    int (*Ialltoallw)(const void *sendbuf, const int *sendcounts, const int *sdispls,
+    int (*Ialltoallw_sched)(const void *sendbuf, const int *sendcounts, const int *sdispls,
                       const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts,
                       const int *rdispls, const MPI_Datatype *recvtypes,
                       MPID_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Ialltoallw_optimized)(const void *sendbuf, const int *sendcounts, const int *sdispls,
+    int (*Ialltoallw_req)(const void *sendbuf, const int *sendcounts, const int *sdispls,
                                 const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts,
                                 const int *rdispls, const MPI_Datatype *recvtypes,
                                 MPID_Comm *comm_ptr, MPID_Request **request);
-    int (*Ireduce)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
+    int (*Ireduce_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
                    int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Ireduce_optimized)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
+    int (*Ireduce_req)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
                    int root, MPID_Comm *comm_ptr, MPID_Request **request);
-    int (*Iallreduce)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+    int (*Iallreduce_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
                       MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Iallreduce_optimized)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+    int (*Iallreduce_req)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
                                 MPI_Op op, MPID_Comm *comm_ptr, MPID_Request **request);
-    int (*Ireduce_scatter)(const void *sendbuf, void *recvbuf, const int *recvcounts,
+    int (*Ireduce_scatter_sched)(const void *sendbuf, void *recvbuf, const int *recvcounts,
                            MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Ireduce_scatter_optimized)(const void *sendbuf, void *recvbuf, const int *recvcounts,
+    int (*Ireduce_scatter_req)(const void *sendbuf, void *recvbuf, const int *recvcounts,
                                      MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Request **request);
-    int (*Ireduce_scatter_block)(const void *sendbuf, void *recvbuf, int recvcount,
+    int (*Ireduce_scatter_block_sched)(const void *sendbuf, void *recvbuf, int recvcount,
                                  MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr,
                                  MPID_Sched_t s);
-    int (*Ireduce_scatter_block_optimized)(const void *sendbuf, void *recvbuf, int recvcount,
+    int (*Ireduce_scatter_block_req)(const void *sendbuf, void *recvbuf, int recvcount,
                                            MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr,
                                            MPID_Request **request);
-    int (*Iscan)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
+    int (*Iscan_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
                  MPID_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Iscan_optimized)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
+    int (*Iscan_req)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
                            MPID_Comm *comm_ptr, MPID_Request **request);
-    int (*Iexscan)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
+    int (*Iexscan_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
                    MPID_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Iexscan_optimized)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
+    int (*Iexscan_req)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
                              MPID_Comm *comm_ptr, MPID_Request **request);
 
     struct MPID_Collops *prev_coll_fns; /* when overriding this table, set this to point to the old table */
diff --git a/src/mpi/coll/iallgather.c b/src/mpi/coll/iallgather.c
index 095c5c4..0a0858e 100644
--- a/src/mpi/coll/iallgather.c
+++ b/src/mpi/coll/iallgather.c
@@ -521,8 +521,8 @@ int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendt
     newcomm_ptr = comm_ptr->local_comm;
 
     if (sendcount != 0) {
-        MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Igather);
-        mpi_errno = newcomm_ptr->coll_fns->Igather(sendbuf, sendcount, sendtype,
+        MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Igather_sched);
+        mpi_errno = newcomm_ptr->coll_fns->Igather_sched(sendbuf, sendcount, sendtype,
                                                    tmp_buf, sendcount, sendtype,
                                                    0, newcomm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -531,12 +531,12 @@ int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendt
 
     /* first broadcast from left to right group, then from right to
        left group */
-    MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ibcast);
+    MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ibcast_sched);
     if (comm_ptr->is_low_group) {
         /* bcast to right*/
         if (sendcount != 0) {
             root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-            mpi_errno = comm_ptr->coll_fns->Ibcast(tmp_buf, sendcount*local_size,
+            mpi_errno = comm_ptr->coll_fns->Ibcast_sched(tmp_buf, sendcount*local_size,
                                                    sendtype, root, comm_ptr, s);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         }
@@ -546,7 +546,7 @@ int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendt
         /* receive bcast from right */
         if (recvcount != 0) {
             root = 0;
-            mpi_errno = comm_ptr->coll_fns->Ibcast(recvbuf, recvcount*remote_size,
+            mpi_errno = comm_ptr->coll_fns->Ibcast_sched(recvbuf, recvcount*remote_size,
                                                    recvtype, root, comm_ptr, s);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         }
@@ -556,7 +556,7 @@ int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendt
         /* receive bcast from left */
         if (recvcount != 0) {
             root = 0;
-            mpi_errno = comm_ptr->coll_fns->Ibcast(recvbuf, recvcount*remote_size,
+            mpi_errno = comm_ptr->coll_fns->Ibcast_sched(recvbuf, recvcount*remote_size,
                                                    recvtype, root, comm_ptr, s);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         }
@@ -566,7 +566,7 @@ int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendt
         /* bcast to left */
         if (sendcount != 0) {
             root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-            mpi_errno = comm_ptr->coll_fns->Ibcast(tmp_buf, sendcount*local_size,
+            mpi_errno = comm_ptr->coll_fns->Ibcast_sched(tmp_buf, sendcount*local_size,
                                                    sendtype, root, comm_ptr, s);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         }
@@ -593,9 +593,9 @@ int MPIR_Iallgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendty
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Iallgather_optimized != NULL) {
+    if (comm_ptr->coll_fns->Iallgather_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Iallgather_optimized(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,comm_ptr, &reqp);
+        mpi_errno = comm_ptr->coll_fns->Iallgather_req(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,comm_ptr, &reqp);
         if (reqp) {
             *request = reqp->handle;
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -613,8 +613,8 @@ int MPIR_Iallgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendty
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Iallgather != NULL);
-    mpi_errno = comm_ptr->coll_fns->Iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Iallgather_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Iallgather_sched(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/iallgatherv.c b/src/mpi/coll/iallgatherv.c
index 37b2841..acffde4 100644
--- a/src/mpi/coll/iallgatherv.c
+++ b/src/mpi/coll/iallgatherv.c
@@ -617,20 +617,20 @@ int MPIR_Iallgatherv_inter(const void *sendbuf, int sendcount, MPI_Datatype send
     remote_size = comm_ptr->remote_size;
     rank = comm_ptr->rank;
 
-    MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Igatherv);
+    MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Igatherv_sched);
 
     /* first do an intercommunicator gatherv from left to right group,
        then from right to left group */
     if (comm_ptr->is_low_group) {
         /* gatherv from right group */
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-        mpi_errno = comm_ptr->coll_fns->Igatherv(sendbuf, sendcount, sendtype, recvbuf,
+        mpi_errno = comm_ptr->coll_fns->Igatherv_sched(sendbuf, sendcount, sendtype, recvbuf,
                                                  recvcounts, displs, recvtype, root,
                                                  comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         /* gatherv to right group */
         root = 0;
-        mpi_errno = comm_ptr->coll_fns->Igatherv(sendbuf, sendcount, sendtype, recvbuf,
+        mpi_errno = comm_ptr->coll_fns->Igatherv_sched(sendbuf, sendcount, sendtype, recvbuf,
                                                  recvcounts, displs, recvtype, root,
                                                  comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -638,13 +638,13 @@ int MPIR_Iallgatherv_inter(const void *sendbuf, int sendcount, MPI_Datatype send
     else {
         /* gatherv to left group  */
         root = 0;
-        mpi_errno = comm_ptr->coll_fns->Igatherv(sendbuf, sendcount, sendtype, recvbuf,
+        mpi_errno = comm_ptr->coll_fns->Igatherv_sched(sendbuf, sendcount, sendtype, recvbuf,
                                                  recvcounts, displs, recvtype, root,
                                                  comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         /* gatherv from left group */
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-        mpi_errno = comm_ptr->coll_fns->Igatherv(sendbuf, sendcount, sendtype, recvbuf,
+        mpi_errno = comm_ptr->coll_fns->Igatherv_sched(sendbuf, sendcount, sendtype, recvbuf,
                                                  recvcounts, displs, recvtype, root,
                                                  comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -662,7 +662,7 @@ int MPIR_Iallgatherv_inter(const void *sendbuf, int sendcount, MPI_Datatype send
     }
 
     newcomm_ptr = comm_ptr->local_comm;
-    MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Ibcast);
+    MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Ibcast_sched);
 
     mpi_errno = MPIR_Type_indexed_impl(remote_size, recvcounts, displs, recvtype, &newtype);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -670,7 +670,7 @@ int MPIR_Iallgatherv_inter(const void *sendbuf, int sendcount, MPI_Datatype send
     mpi_errno = MPIR_Type_commit_impl(&newtype);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-    mpi_errno = newcomm_ptr->coll_fns->Ibcast(recvbuf, 1, newtype, 0, newcomm_ptr, s);
+    mpi_errno = newcomm_ptr->coll_fns->Ibcast_sched(recvbuf, 1, newtype, 0, newcomm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     MPIR_Type_free_impl(&newtype);
@@ -695,9 +695,9 @@ int MPIR_Iallgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendt
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Iallgatherv_optimized != NULL) {
+    if (comm_ptr->coll_fns->Iallgatherv_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Iallgatherv_optimized(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm_ptr, &reqp);
+        mpi_errno = comm_ptr->coll_fns->Iallgatherv_req(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm_ptr, &reqp);
         if (reqp) {
             *request = reqp->handle;
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -715,8 +715,8 @@ int MPIR_Iallgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendt
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Iallgatherv != NULL);
-    mpi_errno = comm_ptr->coll_fns->Iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Iallgatherv_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Iallgatherv_sched(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/iallreduce.c b/src/mpi/coll/iallreduce.c
index f51bee1..0a0147c 100644
--- a/src/mpi/coll/iallreduce.c
+++ b/src/mpi/coll/iallreduce.c
@@ -549,8 +549,8 @@ int MPIR_Iallreduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Dat
     }
     lcomm_ptr = comm_ptr->local_comm;
 
-    MPIU_Assert(lcomm_ptr->coll_fns && lcomm_ptr->coll_fns->Ibcast);
-    mpi_errno = lcomm_ptr->coll_fns->Ibcast(recvbuf, count, datatype, 0, lcomm_ptr, s);
+    MPIU_Assert(lcomm_ptr->coll_fns && lcomm_ptr->coll_fns->Ibcast_sched);
+    mpi_errno = lcomm_ptr->coll_fns->Ibcast_sched(recvbuf, count, datatype, 0, lcomm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
 fn_exit:
@@ -594,15 +594,15 @@ int MPIR_Iallreduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datat
         /* take care of the MPI_IN_PLACE case. For reduce,
            MPI_IN_PLACE is specified only on the root;
            for allreduce it is specified on all processes. */
-        MPIU_Assert(nc->coll_fns && nc->coll_fns->Ireduce);
+        MPIU_Assert(nc->coll_fns && nc->coll_fns->Ireduce_sched);
 
         if ((sendbuf == MPI_IN_PLACE) && (comm_ptr->node_comm->rank != 0)) {
             /* IN_PLACE and not root of reduce. Data supplied to this
                allreduce is in recvbuf. Pass that as the sendbuf to reduce. */
-            mpi_errno = nc->coll_fns->Ireduce(recvbuf, NULL, count, datatype, op, 0, nc, s);
+            mpi_errno = nc->coll_fns->Ireduce_sched(recvbuf, NULL, count, datatype, op, 0, nc, s);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         } else {
-            mpi_errno = nc->coll_fns->Ireduce(sendbuf, recvbuf, count, datatype, op, 0, nc, s);
+            mpi_errno = nc->coll_fns->Ireduce_sched(sendbuf, recvbuf, count, datatype, op, 0, nc, s);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         }
         MPID_SCHED_BARRIER(s);
@@ -618,16 +618,16 @@ int MPIR_Iallreduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datat
 
     /* now do an IN_PLACE allreduce among the local roots of all nodes */
     if (nrc != NULL) {
-        MPIU_Assert(nrc->coll_fns && nrc->coll_fns->Iallreduce);
-        mpi_errno = nrc->coll_fns->Iallreduce(MPI_IN_PLACE, recvbuf, count, datatype, op, nrc, s);
+        MPIU_Assert(nrc->coll_fns && nrc->coll_fns->Iallreduce_sched);
+        mpi_errno = nrc->coll_fns->Iallreduce_sched(MPI_IN_PLACE, recvbuf, count, datatype, op, nrc, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         MPID_SCHED_BARRIER(s);
     }
 
     /* now broadcast the result among local processes */
     if (comm_ptr->node_comm != NULL) {
-        MPIU_Assert(nc->coll_fns && nc->coll_fns->Ibcast);
-        mpi_errno = nc->coll_fns->Ibcast(recvbuf, count, datatype, 0, nc, s);
+        MPIU_Assert(nc->coll_fns && nc->coll_fns->Ibcast_sched);
+        mpi_errno = nc->coll_fns->Ibcast_sched(recvbuf, count, datatype, 0, nc, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         MPID_SCHED_BARRIER(s);
     }
@@ -651,9 +651,9 @@ int MPIR_Iallreduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Data
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Iallreduce_optimized != NULL) {
+    if (comm_ptr->coll_fns->Iallreduce_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Iallreduce_optimized(sendbuf, recvbuf, count, datatype, op, comm_ptr, &reqp);
+        mpi_errno = comm_ptr->coll_fns->Iallreduce_req(sendbuf, recvbuf, count, datatype, op, comm_ptr, &reqp);
         if (reqp) {
             *request = reqp->handle;
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -671,8 +671,8 @@ int MPIR_Iallreduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Data
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Iallreduce != NULL);
-    mpi_errno = comm_ptr->coll_fns->Iallreduce(sendbuf, recvbuf, count, datatype, op, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Iallreduce_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Iallreduce_sched(sendbuf, recvbuf, count, datatype, op, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/ialltoall.c b/src/mpi/coll/ialltoall.c
index 593ae79..af7484c 100644
--- a/src/mpi/coll/ialltoall.c
+++ b/src/mpi/coll/ialltoall.c
@@ -523,9 +523,9 @@ int MPIR_Ialltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Ialltoall_optimized != NULL) {
+    if (comm_ptr->coll_fns->Ialltoall_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Ialltoall_optimized(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, &reqp);
+        mpi_errno = comm_ptr->coll_fns->Ialltoall_req(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, &reqp);
         if (reqp) {
             *request = reqp->handle;
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -543,8 +543,8 @@ int MPIR_Ialltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ialltoall != NULL);
-    mpi_errno = comm_ptr->coll_fns->Ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Ialltoall_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Ialltoall_sched(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/ialltoallv.c b/src/mpi/coll/ialltoallv.c
index b73e211..d7063b4 100644
--- a/src/mpi/coll/ialltoallv.c
+++ b/src/mpi/coll/ialltoallv.c
@@ -250,9 +250,9 @@ int MPIR_Ialltoallv_impl(const void *sendbuf, const int sendcounts[], const int
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Ialltoallv_optimized != NULL) {
+    if (comm_ptr->coll_fns->Ialltoallv_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Ialltoallv_optimized(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm_ptr, &reqp);
+        mpi_errno = comm_ptr->coll_fns->Ialltoallv_req(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm_ptr, &reqp);
         if (reqp) {
             *request = reqp->handle;
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -271,8 +271,8 @@ int MPIR_Ialltoallv_impl(const void *sendbuf, const int sendcounts[], const int
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ialltoallv != NULL);
-    mpi_errno = comm_ptr->coll_fns->Ialltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Ialltoallv_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Ialltoallv_sched(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/ialltoallw.c b/src/mpi/coll/ialltoallw.c
index 9409b44..eb7eab2 100644
--- a/src/mpi/coll/ialltoallw.c
+++ b/src/mpi/coll/ialltoallw.c
@@ -256,9 +256,9 @@ int MPIR_Ialltoallw_impl(const void *sendbuf, const int sendcounts[], const int
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Ialltoallw_optimized != NULL) {
+    if (comm_ptr->coll_fns->Ialltoallw_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Ialltoallw_optimized(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm_ptr, &reqp);
+        mpi_errno = comm_ptr->coll_fns->Ialltoallw_req(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm_ptr, &reqp);
         if (reqp) {
             *request = reqp->handle;
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -276,8 +276,8 @@ int MPIR_Ialltoallw_impl(const void *sendbuf, const int sendcounts[], const int
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ialltoallw != NULL);
-    mpi_errno = comm_ptr->coll_fns->Ialltoallw(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Ialltoallw_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Ialltoallw_sched(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/ibarrier.c b/src/mpi/coll/ibarrier.c
index 3bf12da..fe78e3a 100644
--- a/src/mpi/coll/ibarrier.c
+++ b/src/mpi/coll/ibarrier.c
@@ -112,8 +112,8 @@ int MPIR_Ibarrier_inter(MPID_Comm *comm_ptr, MPID_Sched_t s)
     }
 
     /* do a barrier on the local intracommunicator */
-    MPIU_Assert(comm_ptr->local_comm->coll_fns && comm_ptr->local_comm->coll_fns->Ibarrier);
-    mpi_errno = comm_ptr->local_comm->coll_fns->Ibarrier(comm_ptr->local_comm, s);
+    MPIU_Assert(comm_ptr->local_comm->coll_fns && comm_ptr->local_comm->coll_fns->Ibarrier_sched);
+    mpi_errno = comm_ptr->local_comm->coll_fns->Ibarrier_sched(comm_ptr->local_comm, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     MPID_SCHED_BARRIER(s);
@@ -128,30 +128,30 @@ int MPIR_Ibarrier_inter(MPID_Comm *comm_ptr, MPID_Sched_t s)
 
     /* first broadcast from left to right group, then from right to
        left group */
-    MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ibcast);
+    MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ibcast_sched);
     if (comm_ptr->is_low_group) {
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-        mpi_errno = comm_ptr->coll_fns->Ibcast(buf, 1, MPI_BYTE, root, comm_ptr, s);
+        mpi_errno = comm_ptr->coll_fns->Ibcast_sched(buf, 1, MPI_BYTE, root, comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
         MPID_SCHED_BARRIER(s);
 
         /* receive bcast from right */
         root = 0;
-        mpi_errno = comm_ptr->coll_fns->Ibcast(buf, 1, MPI_BYTE, root, comm_ptr, s);
+        mpi_errno = comm_ptr->coll_fns->Ibcast_sched(buf, 1, MPI_BYTE, root, comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     else {
         /* receive bcast from left */
         root = 0;
-        mpi_errno = comm_ptr->coll_fns->Ibcast(buf, 1, MPI_BYTE, root, comm_ptr, s);
+        mpi_errno = comm_ptr->coll_fns->Ibcast_sched(buf, 1, MPI_BYTE, root, comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
         MPID_SCHED_BARRIER(s);
 
         /* bcast to left */
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-        mpi_errno = comm_ptr->coll_fns->Ibcast(buf, 1, MPI_BYTE, root, comm_ptr, s);
+        mpi_errno = comm_ptr->coll_fns->Ibcast_sched(buf, 1, MPI_BYTE, root, comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -175,9 +175,9 @@ int MPIR_Ibarrier_impl(MPID_Comm *comm_ptr, MPI_Request *request)
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Ibarrier_optimized != NULL) {
+    if (comm_ptr->coll_fns->Ibarrier_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Ibarrier_optimized(comm_ptr, &reqp);
+        mpi_errno = comm_ptr->coll_fns->Ibarrier_req(comm_ptr, &reqp);
         if (reqp) {
             *request = reqp->handle;
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -194,8 +194,8 @@ int MPIR_Ibarrier_impl(MPID_Comm *comm_ptr, MPI_Request *request)
     mpi_errno = MPID_Sched_create(&s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns->Ibarrier != NULL);
-    mpi_errno = comm_ptr->coll_fns->Ibarrier(comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Ibarrier_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Ibarrier_sched(comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/ibcast.c b/src/mpi/coll/ibcast.c
index 57ddd5d..7070e24 100644
--- a/src/mpi/coll/ibcast.c
+++ b/src/mpi/coll/ibcast.c
@@ -291,8 +291,8 @@ fn_fail:
 */
 /* It would be nice to just call:
  * ----8<----
- * comm_ptr->coll_fns->Iscatter(...);
- * comm_ptr->coll_fns->Iallgather(...);
+ * comm_ptr->coll_fns->Iscatter_sched(...);
+ * comm_ptr->coll_fns->Iallgather_sched(...);
  * ----8<----
  *
  * But that results in inefficient additional memory allocation and copies
@@ -685,11 +685,11 @@ int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MP
     MPIU_Assert(is_homogeneous); /* we don't handle the hetero case yet */
     if (comm_ptr->node_comm) {
         MPIU_Assert(comm_ptr->node_comm->coll_fns);
-        MPIU_Assert(comm_ptr->node_comm->coll_fns->Ibcast);
+        MPIU_Assert(comm_ptr->node_comm->coll_fns->Ibcast_sched);
     }
     if (comm_ptr->node_roots_comm) {
         MPIU_Assert(comm_ptr->node_roots_comm->coll_fns);
-        MPIU_Assert(comm_ptr->node_roots_comm->coll_fns->Ibcast);
+        MPIU_Assert(comm_ptr->node_roots_comm->coll_fns->Ibcast_sched);
     }
 
     /* MPI_Type_size() might not give the accurate size of the packed
@@ -724,7 +724,7 @@ int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MP
     /* perform the internode broadcast */
     if (comm_ptr->node_roots_comm != NULL)
     {
-        mpi_errno = comm_ptr->node_roots_comm->coll_fns->Ibcast(buffer, count, datatype,
+        mpi_errno = comm_ptr->node_roots_comm->coll_fns->Ibcast_sched(buffer, count, datatype,
                                                                 MPIU_Get_internode_rank(comm_ptr, root),
                                                                 comm_ptr->node_roots_comm, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -735,7 +735,7 @@ int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MP
     /* perform the intranode broadcast on all except for the root's node */
     if (comm_ptr->node_comm != NULL)
     {
-        mpi_errno = comm_ptr->node_comm->coll_fns->Ibcast(buffer, count, datatype, 0, comm_ptr->node_comm, s);
+        mpi_errno = comm_ptr->node_comm->coll_fns->Ibcast_sched(buffer, count, datatype, 0, comm_ptr->node_comm, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -838,8 +838,8 @@ int MPIR_Ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root,
 
         /* now do the usual broadcast on this intracommunicator
            with rank 0 as root. */
-        MPIU_Assert(comm_ptr->local_comm->coll_fns && comm_ptr->local_comm->coll_fns->Ibcast);
-        mpi_errno = comm_ptr->local_comm->coll_fns->Ibcast(buffer, count, datatype, root, comm_ptr->local_comm, s);
+        MPIU_Assert(comm_ptr->local_comm->coll_fns && comm_ptr->local_comm->coll_fns->Ibcast_sched);
+        mpi_errno = comm_ptr->local_comm->coll_fns->Ibcast_sched(buffer, count, datatype, root, comm_ptr->local_comm, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
 
@@ -861,9 +861,9 @@ int MPIR_Ibcast_impl(void *buffer, int count, MPI_Datatype datatype, int root, M
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Ibcast_optimized != NULL) {
+    if (comm_ptr->coll_fns->Ibcast_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Ibcast_optimized(buffer, count, datatype, root, comm_ptr, &reqp);
+        mpi_errno = comm_ptr->coll_fns->Ibcast_req(buffer, count, datatype, root, comm_ptr, &reqp);
         if (reqp) {
             *request = reqp->handle;
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -880,8 +880,8 @@ int MPIR_Ibcast_impl(void *buffer, int count, MPI_Datatype datatype, int root, M
     mpi_errno = MPID_Sched_create(&s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns->Ibcast != NULL);
-    mpi_errno = comm_ptr->coll_fns->Ibcast(buffer, count, datatype, root, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Ibcast_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Ibcast_sched(buffer, count, datatype, root, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/iexscan.c b/src/mpi/coll/iexscan.c
index c28c2c8..d962980 100644
--- a/src/mpi/coll/iexscan.c
+++ b/src/mpi/coll/iexscan.c
@@ -188,9 +188,9 @@ int MPIR_Iexscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatyp
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Iexscan_optimized != NULL) {
+    if (comm_ptr->coll_fns->Iexscan_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Iexscan_optimized(sendbuf, recvbuf, count, datatype, op, comm_ptr, &reqp);
+        mpi_errno = comm_ptr->coll_fns->Iexscan_req(sendbuf, recvbuf, count, datatype, op, comm_ptr, &reqp);
         if (reqp) {
             *request = reqp->handle;
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -208,8 +208,8 @@ int MPIR_Iexscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatyp
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Iexscan != NULL);
-    mpi_errno = comm_ptr->coll_fns->Iexscan(sendbuf, recvbuf, count, datatype, op, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Iexscan_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Iexscan_sched(sendbuf, recvbuf, count, datatype, op, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/igather.c b/src/mpi/coll/igather.c
index 3e1b51d..63fdccb 100644
--- a/src/mpi/coll/igather.c
+++ b/src/mpi/coll/igather.c
@@ -464,8 +464,8 @@ int MPIR_Igather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype
             newcomm_ptr = comm_ptr->local_comm;
 
             /* now do the a local gather on this intracommunicator */
-            MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Igather);
-            mpi_errno = newcomm_ptr->coll_fns->Igather(sendbuf, sendcount, sendtype,
+            MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Igather_sched);
+            mpi_errno = newcomm_ptr->coll_fns->Igather_sched(sendbuf, sendcount, sendtype,
                                                        tmp_buf, sendcount, sendtype, 0,
                                                        newcomm_ptr, s);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -515,9 +515,9 @@ int MPIR_Igather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Igather_optimized != NULL) {
+    if (comm_ptr->coll_fns->Igather_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Igather_optimized(sendbuf, sendcount, sendtype,
+        mpi_errno = comm_ptr->coll_fns->Igather_req(sendbuf, sendcount, sendtype,
                                                           recvbuf, recvcount, recvtype,
                                                           root, comm_ptr, &reqp);
         if (reqp) {
@@ -537,8 +537,8 @@ int MPIR_Igather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Igather != NULL);
-    mpi_errno = comm_ptr->coll_fns->Igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Igather_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Igather_sched(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/igatherv.c b/src/mpi/coll/igatherv.c
index 388c012..300925c 100644
--- a/src/mpi/coll/igatherv.c
+++ b/src/mpi/coll/igatherv.c
@@ -114,9 +114,9 @@ int MPIR_Igatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Igatherv_optimized != NULL) {
+    if (comm_ptr->coll_fns->Igatherv_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Igatherv_optimized(sendbuf, sendcount, sendtype,
+        mpi_errno = comm_ptr->coll_fns->Igatherv_req(sendbuf, sendcount, sendtype,
                                                            recvbuf, recvcounts, displs, recvtype,
                                                            root, comm_ptr, &reqp);
         if (reqp) {
@@ -136,8 +136,8 @@ int MPIR_Igatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Igatherv != NULL);
-    mpi_errno = comm_ptr->coll_fns->Igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Igatherv_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Igatherv_sched(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/ired_scat.c b/src/mpi/coll/ired_scat.c
index 83a16aa..dbafea1 100644
--- a/src/mpi/coll/ired_scat.c
+++ b/src/mpi/coll/ired_scat.c
@@ -942,11 +942,11 @@ int MPIR_Ireduce_scatter_inter(const void *sendbuf, void *recvbuf, const int rec
 
     /* first do a reduce from right group to rank 0 in left group,
        then from left group to rank 0 in right group*/
-    MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ireduce);
+    MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ireduce_sched);
     if (comm_ptr->is_low_group) {
         /* reduce from right group to rank 0*/
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-        mpi_errno = comm_ptr->coll_fns->Ireduce(sendbuf, tmp_buf, total_count,
+        mpi_errno = comm_ptr->coll_fns->Ireduce_sched(sendbuf, tmp_buf, total_count,
                                                 datatype, op, root, comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
@@ -955,14 +955,14 @@ int MPIR_Ireduce_scatter_inter(const void *sendbuf, void *recvbuf, const int rec
 
         /* reduce to rank 0 of right group */
         root = 0;
-        mpi_errno = comm_ptr->coll_fns->Ireduce(sendbuf, tmp_buf, total_count,
+        mpi_errno = comm_ptr->coll_fns->Ireduce_sched(sendbuf, tmp_buf, total_count,
                                                 datatype, op, root, comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     else {
         /* reduce to rank 0 of right group */
         root = 0;
-        mpi_errno = comm_ptr->coll_fns->Ireduce(sendbuf, tmp_buf, total_count,
+        mpi_errno = comm_ptr->coll_fns->Ireduce_sched(sendbuf, tmp_buf, total_count,
                                                 datatype, op, root, comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
@@ -971,7 +971,7 @@ int MPIR_Ireduce_scatter_inter(const void *sendbuf, void *recvbuf, const int rec
 
         /* reduce from right group to rank 0*/
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-        mpi_errno = comm_ptr->coll_fns->Ireduce(sendbuf, tmp_buf, total_count,
+        mpi_errno = comm_ptr->coll_fns->Ireduce_sched(sendbuf, tmp_buf, total_count,
                                                 datatype, op, root, comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
@@ -985,8 +985,8 @@ int MPIR_Ireduce_scatter_inter(const void *sendbuf, void *recvbuf, const int rec
 
     newcomm_ptr = comm_ptr->local_comm;
 
-    MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Iscatterv);
-    mpi_errno = newcomm_ptr->coll_fns->Iscatterv(tmp_buf, recvcounts, disps, datatype,
+    MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Iscatterv_sched);
+    mpi_errno = newcomm_ptr->coll_fns->Iscatterv_sched(tmp_buf, recvcounts, disps, datatype,
                                                  recvbuf, recvcounts[rank], datatype, 0,
                                                  newcomm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -1013,9 +1013,9 @@ int MPIR_Ireduce_scatter_impl(const void *sendbuf, void *recvbuf, const int recv
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Ireduce_scatter_optimized != NULL) {
+    if (comm_ptr->coll_fns->Ireduce_scatter_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Ireduce_scatter_optimized(sendbuf, recvbuf, recvcounts,
+        mpi_errno = comm_ptr->coll_fns->Ireduce_scatter_req(sendbuf, recvbuf, recvcounts,
                                                                   datatype, op,
                                                                   comm_ptr, &reqp);
         if (reqp) {
@@ -1035,8 +1035,8 @@ int MPIR_Ireduce_scatter_impl(const void *sendbuf, void *recvbuf, const int recv
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ireduce_scatter != NULL);
-    mpi_errno = comm_ptr->coll_fns->Ireduce_scatter(sendbuf, recvbuf, recvcounts, datatype, op, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Ireduce_scatter_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Ireduce_scatter_sched(sendbuf, recvbuf, recvcounts, datatype, op, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/ired_scat_block.c b/src/mpi/coll/ired_scat_block.c
index 06d2dbc..d8c2b65 100644
--- a/src/mpi/coll/ired_scat_block.c
+++ b/src/mpi/coll/ired_scat_block.c
@@ -846,11 +846,11 @@ int MPIR_Ireduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int rec
 
     /* first do a reduce from right group to rank 0 in left group,
        then from left group to rank 0 in right group*/
-    MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ireduce);
+    MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ireduce_sched);
     if (comm_ptr->is_low_group) {
         /* reduce from right group to rank 0*/
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-        mpi_errno = comm_ptr->coll_fns->Ireduce(sendbuf, tmp_buf, total_count,
+        mpi_errno = comm_ptr->coll_fns->Ireduce_sched(sendbuf, tmp_buf, total_count,
                                                 datatype, op, root, comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
@@ -859,14 +859,14 @@ int MPIR_Ireduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int rec
 
         /* reduce to rank 0 of right group */
         root = 0;
-        mpi_errno = comm_ptr->coll_fns->Ireduce(sendbuf, tmp_buf, total_count,
+        mpi_errno = comm_ptr->coll_fns->Ireduce_sched(sendbuf, tmp_buf, total_count,
                                                 datatype, op, root, comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
     else {
         /* reduce to rank 0 of right group */
         root = 0;
-        mpi_errno = comm_ptr->coll_fns->Ireduce(sendbuf, tmp_buf, total_count,
+        mpi_errno = comm_ptr->coll_fns->Ireduce_sched(sendbuf, tmp_buf, total_count,
                                                 datatype, op, root, comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
@@ -875,7 +875,7 @@ int MPIR_Ireduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int rec
 
         /* reduce from right group to rank 0*/
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
-        mpi_errno = comm_ptr->coll_fns->Ireduce(sendbuf, tmp_buf, total_count,
+        mpi_errno = comm_ptr->coll_fns->Ireduce_sched(sendbuf, tmp_buf, total_count,
                                                 datatype, op, root, comm_ptr, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
@@ -889,8 +889,8 @@ int MPIR_Ireduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int rec
 
     newcomm_ptr = comm_ptr->local_comm;
 
-    MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Iscatter);
-    mpi_errno = newcomm_ptr->coll_fns->Iscatter(tmp_buf, recvcount, datatype,
+    MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Iscatter_sched);
+    mpi_errno = newcomm_ptr->coll_fns->Iscatter_sched(tmp_buf, recvcount, datatype,
                                                 recvbuf, recvcount, datatype, 0,
                                                 newcomm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -915,9 +915,9 @@ int MPIR_Ireduce_scatter_block_impl(const void *sendbuf, void *recvbuf, int recv
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Ireduce_scatter_block_optimized != NULL) {
+    if (comm_ptr->coll_fns->Ireduce_scatter_block_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Ireduce_scatter_block_optimized(sendbuf, recvbuf, recvcount,
+        mpi_errno = comm_ptr->coll_fns->Ireduce_scatter_block_req(sendbuf, recvbuf, recvcount,
                                                                         datatype, op,
                                                                         comm_ptr, &reqp);
         if (reqp) {
@@ -938,8 +938,8 @@ int MPIR_Ireduce_scatter_block_impl(const void *sendbuf, void *recvbuf, int recv
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ireduce_scatter_block != NULL);
-    mpi_errno = comm_ptr->coll_fns->Ireduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Ireduce_scatter_block_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Ireduce_scatter_block_sched(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/ireduce.c b/src/mpi/coll/ireduce.c
index cec9b50..138b1f4 100644
--- a/src/mpi/coll/ireduce.c
+++ b/src/mpi/coll/ireduce.c
@@ -612,20 +612,20 @@ int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
 
     /* do the intranode reduce on all nodes other than the root's node */
     if (nc != NULL && MPIU_Get_intranode_rank(comm_ptr, root) == -1) {
-        MPIU_Assert(nc->coll_fns && nc->coll_fns->Ireduce);
-        mpi_errno = nc->coll_fns->Ireduce(sendbuf, tmp_buf, count, datatype, op, 0, nc, s);
+        MPIU_Assert(nc->coll_fns && nc->coll_fns->Ireduce_sched);
+        mpi_errno = nc->coll_fns->Ireduce_sched(sendbuf, tmp_buf, count, datatype, op, 0, nc, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         MPID_SCHED_BARRIER(s);
     }
 
     /* do the internode reduce to the root's node */
     if (nrc != NULL) {
-        MPIU_Assert(nrc->coll_fns && nrc->coll_fns->Ireduce);
+        MPIU_Assert(nrc->coll_fns && nrc->coll_fns->Ireduce_sched);
         if (nrc->rank != MPIU_Get_internode_rank(comm_ptr, root)) {
             /* I am not on root's node.  Use tmp_buf if we
                participated in the first reduce, otherwise use sendbuf */
             const void *buf = (nc == NULL ? sendbuf : tmp_buf);
-            mpi_errno = nrc->coll_fns->Ireduce(buf, NULL, count, datatype,
+            mpi_errno = nrc->coll_fns->Ireduce_sched(buf, NULL, count, datatype,
                                                op, MPIU_Get_internode_rank(comm_ptr, root),
                                                nrc, s);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -636,7 +636,7 @@ int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
                 /* I am not the root though. I don't have a valid recvbuf.
                    Use tmp_buf as recvbuf. */
 
-                mpi_errno = nrc->coll_fns->Ireduce(sendbuf, tmp_buf, count, datatype,
+                mpi_errno = nrc->coll_fns->Ireduce_sched(sendbuf, tmp_buf, count, datatype,
                                                    op, MPIU_Get_internode_rank(comm_ptr, root),
                                                    nrc, s);
                 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -648,7 +648,7 @@ int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
             else {
                 /* I am the root. in_place is automatically handled. */
 
-                mpi_errno = nrc->coll_fns->Ireduce(sendbuf, recvbuf, count, datatype,
+                mpi_errno = nrc->coll_fns->Ireduce_sched(sendbuf, recvbuf, count, datatype,
                                                    op, MPIU_Get_internode_rank(comm_ptr, root),
                                                    nrc, s);
                 if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -662,7 +662,7 @@ int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
 
     /* do the intranode reduce on the root's node */
     if (nc != NULL && MPIU_Get_intranode_rank(comm_ptr, root) != -1) {
-        mpi_errno = nc->coll_fns->Ireduce(sendbuf, recvbuf, count, datatype,
+        mpi_errno = nc->coll_fns->Ireduce_sched(sendbuf, recvbuf, count, datatype,
                                           op, MPIU_Get_intranode_rank(comm_ptr, root),
                                           nc, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -766,9 +766,9 @@ int MPIR_Ireduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatyp
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Ireduce_optimized != NULL) {
+    if (comm_ptr->coll_fns->Ireduce_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Ireduce_optimized(sendbuf, recvbuf, count,
+        mpi_errno = comm_ptr->coll_fns->Ireduce_req(sendbuf, recvbuf, count,
                                                           datatype, op, root,
                                                           comm_ptr, &reqp);
         if (reqp) {
@@ -788,8 +788,8 @@ int MPIR_Ireduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatyp
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ireduce != NULL);
-    mpi_errno = comm_ptr->coll_fns->Ireduce(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Ireduce_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Ireduce_sched(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/iscan.c b/src/mpi/coll/iscan.c
index a71f278..695bf11 100644
--- a/src/mpi/coll/iscan.c
+++ b/src/mpi/coll/iscan.c
@@ -195,10 +195,10 @@ int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype d
     node_comm = comm_ptr->node_comm;
     roots_comm = comm_ptr->node_roots_comm;
     if (node_comm) {
-        MPIU_Assert(node_comm->coll_fns && node_comm->coll_fns->Iscan && node_comm->coll_fns->Ibcast);
+        MPIU_Assert(node_comm->coll_fns && node_comm->coll_fns->Iscan_sched && node_comm->coll_fns->Ibcast_sched);
     }
     if (roots_comm) {
-        MPIU_Assert(roots_comm->coll_fns && roots_comm->coll_fns->Iscan);
+        MPIU_Assert(roots_comm->coll_fns && roots_comm->coll_fns->Iscan_sched);
     }
 
     MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
@@ -226,7 +226,7 @@ int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype d
     /* perform intranode scan to get temporary result in recvbuf. if there is only
        one process, just copy the raw data. */
     if (node_comm != NULL) {
-        mpi_errno = node_comm->coll_fns->Iscan(sendbuf, recvbuf, count, datatype, op, node_comm, s);
+        mpi_errno = node_comm->coll_fns->Iscan_sched(sendbuf, recvbuf, count, datatype, op, node_comm, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         MPID_SCHED_BARRIER(s);
     }
@@ -265,7 +265,7 @@ int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype d
         int roots_rank = MPIU_Get_internode_rank(comm_ptr, rank);
         MPIU_Assert(roots_rank == roots_comm->rank);
 
-        mpi_errno = roots_comm->coll_fns->Iscan(localfulldata, prefulldata, count, datatype, op, roots_comm, s);
+        mpi_errno = roots_comm->coll_fns->Iscan_sched(localfulldata, prefulldata, count, datatype, op, roots_comm, s);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         MPID_SCHED_BARRIER(s);
 
@@ -292,7 +292,7 @@ int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype d
          * "prefulldata" from another leader into "tempbuf" */
 
         if (node_comm != NULL) {
-            mpi_errno = node_comm->coll_fns->Ibcast(tempbuf, count, datatype, 0, node_comm, s);
+            mpi_errno = node_comm->coll_fns->Ibcast_sched(tempbuf, count, datatype, 0, node_comm, s);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
             MPID_SCHED_BARRIER(s);
         }
@@ -322,9 +322,9 @@ int MPIR_Iscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Iscan_optimized != NULL) {
+    if (comm_ptr->coll_fns->Iscan_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Iscan_optimized(sendbuf, recvbuf, count,
+        mpi_errno = comm_ptr->coll_fns->Iscan_req(sendbuf, recvbuf, count,
                                                         datatype, op,
                                                         comm_ptr, &reqp);
         if (reqp) {
@@ -343,8 +343,8 @@ int MPIR_Iscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
     mpi_errno = MPID_Sched_create(&s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns->Iscan != NULL);
-    mpi_errno = comm_ptr->coll_fns->Iscan(sendbuf, recvbuf, count, datatype, op, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Iscan_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Iscan_sched(sendbuf, recvbuf, count, datatype, op, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/iscatter.c b/src/mpi/coll/iscatter.c
index 66a1e98..8a00336 100644
--- a/src/mpi/coll/iscatter.c
+++ b/src/mpi/coll/iscatter.c
@@ -503,8 +503,8 @@ int MPIR_Iscatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
 
             /* now do the usual scatter on this intracommunicator */
             MPIU_Assert(newcomm_ptr->coll_fns != NULL);
-            MPIU_Assert(newcomm_ptr->coll_fns->Iscatter != NULL);
-            mpi_errno = newcomm_ptr->coll_fns->Iscatter(tmp_buf, recvcount, recvtype,
+            MPIU_Assert(newcomm_ptr->coll_fns->Iscatter_sched != NULL);
+            mpi_errno = newcomm_ptr->coll_fns->Iscatter_sched(tmp_buf, recvcount, recvtype,
                                                         recvbuf, recvcount, recvtype,
                                                         0, newcomm_ptr, s);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -550,9 +550,9 @@ int MPIR_Iscatter_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Iscatter_optimized != NULL) {
+    if (comm_ptr->coll_fns->Iscatter_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Iscatter_optimized(sendbuf, sendcount, sendtype,
+        mpi_errno = comm_ptr->coll_fns->Iscatter_req(sendbuf, sendcount, sendtype,
                                                            recvbuf, recvcount, recvtype,
                                                            root, comm_ptr, &reqp);
         if (reqp) {
@@ -571,8 +571,8 @@ int MPIR_Iscatter_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype
     mpi_errno = MPID_Sched_create(&s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns->Iscatter != NULL);
-    mpi_errno = comm_ptr->coll_fns->Iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Iscatter_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Iscatter_sched(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/coll/iscatterv.c b/src/mpi/coll/iscatterv.c
index 0b12958..90a04b8 100644
--- a/src/mpi/coll/iscatterv.c
+++ b/src/mpi/coll/iscatterv.c
@@ -121,9 +121,9 @@ int MPIR_Iscatterv_impl(const void *sendbuf, const int sendcounts[], const int d
     *request = MPI_REQUEST_NULL;
 
     MPIU_Assert(comm_ptr->coll_fns != NULL);
-    if (comm_ptr->coll_fns->Iscatterv_optimized != NULL) {
+    if (comm_ptr->coll_fns->Iscatterv_req != NULL) {
         /* --BEGIN USEREXTENSION-- */
-        mpi_errno = comm_ptr->coll_fns->Iscatterv_optimized(sendbuf, sendcounts, displs, sendtype,
+        mpi_errno = comm_ptr->coll_fns->Iscatterv_req(sendbuf, sendcounts, displs, sendtype,
                                                            recvbuf, recvcount, recvtype,
                                                            root, comm_ptr, &reqp);
         if (reqp) {
@@ -142,8 +142,8 @@ int MPIR_Iscatterv_impl(const void *sendbuf, const int sendcounts[], const int d
     mpi_errno = MPID_Sched_create(&s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns->Iscatterv != NULL);
-    mpi_errno = comm_ptr->coll_fns->Iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, s);
+    MPIU_Assert(comm_ptr->coll_fns->Iscatterv_sched != NULL);
+    mpi_errno = comm_ptr->coll_fns->Iscatterv_sched(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
     mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
diff --git a/src/mpi/comm/commutil.c b/src/mpi/comm/commutil.c
index 95fcb6f..9982694 100644
--- a/src/mpi/comm/commutil.c
+++ b/src/mpi/comm/commutil.c
@@ -261,23 +261,23 @@ static int init_default_collops(void)
         ops->ref_count = 1; /* force existence until finalize time */
 
         /* intracomm default defaults... */
-        ops->Ibcast = &MPIR_Ibcast_intra;
-        ops->Ibarrier = &MPIR_Ibarrier_intra;
-        ops->Ireduce = &MPIR_Ireduce_intra;
-        ops->Ialltoall = &MPIR_Ialltoall_intra;
-        ops->Ialltoallv = &MPIR_Ialltoallv_intra;
-        ops->Ialltoallw = &MPIR_Ialltoallw_intra;
-        ops->Iallreduce = &MPIR_Iallreduce_intra;
-        ops->Igather = &MPIR_Igather_intra;
-        ops->Igatherv = &MPIR_Igatherv;
-        ops->Iscatter = &MPIR_Iscatter_intra;
-        ops->Iscatterv = &MPIR_Iscatterv;
-        ops->Ireduce_scatter = &MPIR_Ireduce_scatter_intra;
-        ops->Ireduce_scatter_block = &MPIR_Ireduce_scatter_block_intra;
-        ops->Iallgather = &MPIR_Iallgather_intra;
-        ops->Iallgatherv = &MPIR_Iallgatherv_intra;
-        ops->Iscan = &MPIR_Iscan_rec_dbl;
-        ops->Iexscan = &MPIR_Iexscan;
+        ops->Ibcast_sched = &MPIR_Ibcast_intra;
+        ops->Ibarrier_sched = &MPIR_Ibarrier_intra;
+        ops->Ireduce_sched = &MPIR_Ireduce_intra;
+        ops->Ialltoall_sched = &MPIR_Ialltoall_intra;
+        ops->Ialltoallv_sched = &MPIR_Ialltoallv_intra;
+        ops->Ialltoallw_sched = &MPIR_Ialltoallw_intra;
+        ops->Iallreduce_sched = &MPIR_Iallreduce_intra;
+        ops->Igather_sched = &MPIR_Igather_intra;
+        ops->Igatherv_sched = &MPIR_Igatherv;
+        ops->Iscatter_sched = &MPIR_Iscatter_intra;
+        ops->Iscatterv_sched = &MPIR_Iscatterv;
+        ops->Ireduce_scatter_sched = &MPIR_Ireduce_scatter_intra;
+        ops->Ireduce_scatter_block_sched = &MPIR_Ireduce_scatter_block_intra;
+        ops->Iallgather_sched = &MPIR_Iallgather_intra;
+        ops->Iallgatherv_sched = &MPIR_Iallgatherv_intra;
+        ops->Iscan_sched = &MPIR_Iscan_rec_dbl;
+        ops->Iexscan_sched = &MPIR_Iexscan;
         ops->Neighbor_allgather   = &MPIR_Neighbor_allgather_default;
         ops->Neighbor_allgatherv  = &MPIR_Neighbor_allgatherv_default;
         ops->Neighbor_alltoall    = &MPIR_Neighbor_alltoall_default;
@@ -294,10 +294,10 @@ static int init_default_collops(void)
             case MPID_HIERARCHY_FLAT:
                 break;
             case MPID_HIERARCHY_PARENT:
-                ops->Ibcast = &MPIR_Ibcast_SMP;
-                ops->Iscan = &MPIR_Iscan_SMP;
-                ops->Iallreduce = &MPIR_Iallreduce_SMP;
-                ops->Ireduce = &MPIR_Ireduce_SMP;
+                ops->Ibcast_sched = &MPIR_Ibcast_SMP;
+                ops->Iscan_sched = &MPIR_Iscan_SMP;
+                ops->Iallreduce_sched = &MPIR_Iallreduce_SMP;
+                ops->Ireduce_sched = &MPIR_Ireduce_SMP;
                 break;
             case MPID_HIERARCHY_NODE:
                 break;
@@ -323,21 +323,21 @@ static int init_default_collops(void)
         ops->ref_count = 1; /* force existence until finalize time */
 
         /* intercomm defaults */
-        ops->Ibcast = &MPIR_Ibcast_inter;
-        ops->Ibarrier = &MPIR_Ibarrier_inter;
-        ops->Ireduce = &MPIR_Ireduce_inter;
-        ops->Ialltoall = &MPIR_Ialltoall_inter;
-        ops->Ialltoallv = &MPIR_Ialltoallv_inter;
-        ops->Ialltoallw = &MPIR_Ialltoallw_inter;
-        ops->Iallreduce = &MPIR_Iallreduce_inter;
-        ops->Igather = &MPIR_Igather_inter;
-        ops->Igatherv = &MPIR_Igatherv;
-        ops->Iscatter = &MPIR_Iscatter_inter;
-        ops->Iscatterv = &MPIR_Iscatterv;
-        ops->Ireduce_scatter = &MPIR_Ireduce_scatter_inter;
-        ops->Ireduce_scatter_block = &MPIR_Ireduce_scatter_block_inter;
-        ops->Iallgather = &MPIR_Iallgather_inter;
-        ops->Iallgatherv = &MPIR_Iallgatherv_inter;
+        ops->Ibcast_sched = &MPIR_Ibcast_inter;
+        ops->Ibarrier_sched = &MPIR_Ibarrier_inter;
+        ops->Ireduce_sched = &MPIR_Ireduce_inter;
+        ops->Ialltoall_sched = &MPIR_Ialltoall_inter;
+        ops->Ialltoallv_sched = &MPIR_Ialltoallv_inter;
+        ops->Ialltoallw_sched = &MPIR_Ialltoallw_inter;
+        ops->Iallreduce_sched = &MPIR_Iallreduce_inter;
+        ops->Igather_sched = &MPIR_Igather_inter;
+        ops->Igatherv_sched = &MPIR_Igatherv;
+        ops->Iscatter_sched = &MPIR_Iscatter_inter;
+        ops->Iscatterv_sched = &MPIR_Iscatterv;
+        ops->Ireduce_scatter_sched = &MPIR_Ireduce_scatter_inter;
+        ops->Ireduce_scatter_block_sched = &MPIR_Ireduce_scatter_block_inter;
+        ops->Iallgather_sched = &MPIR_Iallgather_inter;
+        ops->Iallgatherv_sched = &MPIR_Iallgatherv_inter;
         /* scan and exscan are not valid for intercommunicators, leave them NULL */
         /* Ineighbor_all* routines are not valid for intercommunicators, leave
          * them NULL */
@@ -1236,7 +1236,7 @@ static int gcn_sch(MPID_Comm *comm_ptr, MPIR_Context_id_t *ctx0, MPIR_Context_id
     st->ctx1 = ctx1;
     MPIU_Memcpy(st->local_mask, context_mask, MPIR_MAX_CONTEXT_MASK * sizeof(uint32_t));
 
-    mpi_errno = comm_ptr->coll_fns->Iallreduce(MPI_IN_PLACE, st->local_mask, MPIR_MAX_CONTEXT_MASK,
+    mpi_errno = comm_ptr->coll_fns->Iallreduce_sched(MPI_IN_PLACE, st->local_mask, MPIR_MAX_CONTEXT_MASK,
                                                MPI_UINT32_T, MPI_BAND, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
@@ -1342,7 +1342,7 @@ int MPIR_Get_intercomm_contextid_nonblock(MPID_Comm *comm_ptr, MPID_Comm *newcom
         MPID_SCHED_BARRIER(s);
     }
 
-    mpi_errno = lcomm->coll_fns->Ibcast(&newcommp->context_id, 1,
+    mpi_errno = lcomm->coll_fns->Ibcast_sched(&newcommp->context_id, 1,
                                         MPIR_CONTEXT_ID_T_DATATYPE, 0, lcomm, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
diff --git a/src/mpid/pamid/src/comm/mpid_selectcolls.c b/src/mpid/pamid/src/comm/mpid_selectcolls.c
index 2e658e9..b79cc76 100644
--- a/src/mpid/pamid/src/comm/mpid_selectcolls.c
+++ b/src/mpid/pamid/src/comm/mpid_selectcolls.c
@@ -691,23 +691,23 @@ void MPIDI_Comm_coll_query(MPID_Comm *comm)
    comm->coll_fns->Exscan       = MPIDO_Exscan;
 
    /* MPI-3 Support, no optimized collectives hooked in yet */
-   comm->coll_fns->Ibarrier              = MPIR_Ibarrier_intra;
-   comm->coll_fns->Ibcast                = MPIR_Ibcast_intra;
-   comm->coll_fns->Igather               = MPIR_Igather_intra;
-   comm->coll_fns->Igatherv              = MPIR_Igatherv;
-   comm->coll_fns->Iscatter              = MPIR_Iscatter_intra;
-   comm->coll_fns->Iscatterv             = MPIR_Iscatterv;
-   comm->coll_fns->Iallgather            = MPIR_Iallgather_intra;
-   comm->coll_fns->Iallgatherv           = MPIR_Iallgatherv_intra;
-   comm->coll_fns->Ialltoall             = MPIR_Ialltoall_intra;
-   comm->coll_fns->Ialltoallv            = MPIR_Ialltoallv_intra;
-   comm->coll_fns->Ialltoallw            = MPIR_Ialltoallw_intra;
-   comm->coll_fns->Iallreduce            = MPIR_Iallreduce_intra;
-   comm->coll_fns->Ireduce               = MPIR_Ireduce_intra;
-   comm->coll_fns->Ireduce_scatter       = MPIR_Ireduce_scatter_intra;
-   comm->coll_fns->Ireduce_scatter_block = MPIR_Ireduce_scatter_block_intra;
-   comm->coll_fns->Iscan                 = MPIR_Iscan_rec_dbl;
-   comm->coll_fns->Iexscan               = MPIR_Iexscan;
+   comm->coll_fns->Ibarrier_sched              = MPIR_Ibarrier_intra;
+   comm->coll_fns->Ibcast_sched                = MPIR_Ibcast_intra;
+   comm->coll_fns->Igather_sched               = MPIR_Igather_intra;
+   comm->coll_fns->Igatherv_sched              = MPIR_Igatherv;
+   comm->coll_fns->Iscatter_sched              = MPIR_Iscatter_intra;
+   comm->coll_fns->Iscatterv_sched             = MPIR_Iscatterv;
+   comm->coll_fns->Iallgather_sched            = MPIR_Iallgather_intra;
+   comm->coll_fns->Iallgatherv_sched           = MPIR_Iallgatherv_intra;
+   comm->coll_fns->Ialltoall_sched             = MPIR_Ialltoall_intra;
+   comm->coll_fns->Ialltoallv_sched            = MPIR_Ialltoallv_intra;
+   comm->coll_fns->Ialltoallw_sched            = MPIR_Ialltoallw_intra;
+   comm->coll_fns->Iallreduce_sched            = MPIR_Iallreduce_intra;
+   comm->coll_fns->Ireduce_sched               = MPIR_Ireduce_intra;
+   comm->coll_fns->Ireduce_scatter_sched       = MPIR_Ireduce_scatter_intra;
+   comm->coll_fns->Ireduce_scatter_block_sched = MPIR_Ireduce_scatter_block_intra;
+   comm->coll_fns->Iscan_sched                 = MPIR_Iscan_rec_dbl;
+   comm->coll_fns->Iexscan_sched               = MPIR_Iexscan;
    comm->coll_fns->Neighbor_allgather    = MPIR_Neighbor_allgather_default;
    comm->coll_fns->Neighbor_allgatherv   = MPIR_Neighbor_allgatherv_default;
    comm->coll_fns->Neighbor_alltoall     = MPIR_Neighbor_alltoall_default;
@@ -715,23 +715,23 @@ void MPIDI_Comm_coll_query(MPID_Comm *comm)
    comm->coll_fns->Neighbor_alltoallw    = MPIR_Neighbor_alltoallw_default;
 
    /* MPI-3 Support, optimized collectives hooked in */
-   comm->coll_fns->Ibarrier_optimized              = MPIDO_Ibarrier;
-   comm->coll_fns->Ibcast_optimized                = MPIDO_Ibcast;
-   comm->coll_fns->Iallgather_optimized            = MPIDO_Iallgather;
-   comm->coll_fns->Iallgatherv_optimized           = MPIDO_Iallgatherv;
-   comm->coll_fns->Iallreduce_optimized            = MPIDO_Iallreduce;
-   comm->coll_fns->Ialltoall_optimized             = MPIDO_Ialltoall;
-   comm->coll_fns->Ialltoallv_optimized            = MPIDO_Ialltoallv;
-   comm->coll_fns->Ialltoallw_optimized            = MPIDO_Ialltoallw;
-   comm->coll_fns->Iexscan_optimized               = MPIDO_Iexscan;
-   comm->coll_fns->Igather_optimized               = MPIDO_Igather;
-   comm->coll_fns->Igatherv_optimized              = MPIDO_Igatherv;
-   comm->coll_fns->Ireduce_scatter_block_optimized = MPIDO_Ireduce_scatter_block;
-   comm->coll_fns->Ireduce_scatter_optimized       = MPIDO_Ireduce_scatter;
-   comm->coll_fns->Ireduce_optimized               = MPIDO_Ireduce;
-   comm->coll_fns->Iscan_optimized                 = MPIDO_Iscan;
-   comm->coll_fns->Iscatter_optimized              = MPIDO_Iscatter;
-   comm->coll_fns->Iscatterv_optimized             = MPIDO_Iscatterv;
+   comm->coll_fns->Ibarrier_req              = MPIDO_Ibarrier;
+   comm->coll_fns->Ibcast_req                = MPIDO_Ibcast;
+   comm->coll_fns->Iallgather_req            = MPIDO_Iallgather;
+   comm->coll_fns->Iallgatherv_req           = MPIDO_Iallgatherv;
+   comm->coll_fns->Iallreduce_req            = MPIDO_Iallreduce;
+   comm->coll_fns->Ialltoall_req             = MPIDO_Ialltoall;
+   comm->coll_fns->Ialltoallv_req            = MPIDO_Ialltoallv;
+   comm->coll_fns->Ialltoallw_req            = MPIDO_Ialltoallw;
+   comm->coll_fns->Iexscan_req               = MPIDO_Iexscan;
+   comm->coll_fns->Igather_req               = MPIDO_Igather;
+   comm->coll_fns->Igatherv_req              = MPIDO_Igatherv;
+   comm->coll_fns->Ireduce_scatter_block_req = MPIDO_Ireduce_scatter_block;
+   comm->coll_fns->Ireduce_scatter_req       = MPIDO_Ireduce_scatter;
+   comm->coll_fns->Ireduce_req               = MPIDO_Ireduce;
+   comm->coll_fns->Iscan_req                 = MPIDO_Iscan;
+   comm->coll_fns->Iscatter_req              = MPIDO_Iscatter;
+   comm->coll_fns->Iscatterv_req             = MPIDO_Iscatterv;
 
    TRACE_ERR("MPIDI_Comm_coll_query exit\n");
 }

http://git.mpich.org/mpich.git/commitdiff/2eefd3a26e6de5e0b57fa6164ec3f5fdfb56c4fe

commit 2eefd3a26e6de5e0b57fa6164ec3f5fdfb56c4fe
Author: Michael Blocksome <blocksom at us.ibm.com>
Date:   Thu Nov 1 13:58:13 2012 -0500

    Add hooks for optimized MPIX_* non-blocking collectives.
    
    The following collectives are updated:
    
      MPIX_Ibcast
      MPIX_Iallgather
      MPIX_Iallgatherv
      MPIX_Iallreduce
      MPIX_Ialltoall
      MPIX_Ialltoallv
      MPIX_Ialltoallw
      MPIX_Iexscan
      MPIX_Igather
      MPIX_Igatherv
      MPIX_Ireduce_scatter_block
      MPIX_Ireduce_scatter
      MPIX_Ireduce
      MPIX_Iscan
      MPIX_Iscatter
      MPIX_Iscatterv
    
    These collectives are updated similar to how the optimized MPIX_Ibarrier
    is implemented:
    
    | The previous MPIR_Ibarrier_impl() function forced all adi implementations
    | to create a MPID_Sched_t opaque object which was then passed in to the
    | specific ibarrier implementation via a function pointer table.
    |
    | The MPID_Sched_t object represents a completely new state machine that
    | must be advanced whenever mpi progress is made.
    |
    | The required construction of the MPID_Sched_t object and the required
    | advance of the schedule state machine would be extremely detrimental to
    | pamid performance.
    
    (ibm) 47b497e15cd87df666c6031a28f5f8a5ae53cece
    (ibm) aee872eb91f4300066957349e5a3d8103da33f40
    
    Signed-off-by: Charles Archer <archerc at us.ibm.com>

diff --git a/src/include/mpiimpl.h b/src/include/mpiimpl.h
index bd0db7c..5c6f1d8 100644
--- a/src/include/mpiimpl.h
+++ b/src/include/mpiimpl.h
@@ -1976,50 +1976,95 @@ typedef struct MPID_Collops {
 
     /* MPI-3 nonblocking collectives */
     int (*Ibarrier)(MPID_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Ibarrier_optimized)(MPID_Comm *comm_ptr, MPID_Request **request);
     int (*Ibcast)(void *buffer, int count, MPI_Datatype datatype, int root,
                   MPID_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Ibcast_optimized)(void *buffer, int count, MPI_Datatype datatype, int root,
+                            MPID_Comm *comm_ptr, MPID_Request **request);
     int (*Igather)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                    int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr,
                    MPID_Sched_t s);
+    int (*Igather_optimized)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+                             int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr,
+                             MPID_Request **request);
     int (*Igatherv)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                     const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root,
                     MPID_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Igatherv_optimized)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+                              const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root,
+                              MPID_Comm *comm_ptr, MPID_Request **request);
     int (*Iscatter)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                     int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr,
                     MPID_Sched_t s);
+    int (*Iscatter_optimized)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+                              int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr,
+                              MPID_Request **request);
     int (*Iscatterv)(const void *sendbuf, const int *sendcounts, const int *displs,
                      MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype,
                      int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Iscatterv_optimized)(const void *sendbuf, const int *sendcounts, const int *displs,
+                               MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                               int root, MPID_Comm *comm_ptr, MPID_Request **request);
     int (*Iallgather)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                       int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
                       MPID_Sched_t s);
+    int (*Iallgather_optimized)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+                                int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
+                                MPID_Request **request);
     int (*Iallgatherv)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                        const int *recvcounts, const int *displs, MPI_Datatype recvtype,
                        MPID_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Iallgatherv_optimized)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+                                 const int *recvcounts, const int *displs, MPI_Datatype recvtype,
+                                 MPID_Comm *comm_ptr, MPID_Request ** request);
     int (*Ialltoall)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                      int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
                      MPID_Sched_t s);
+    int (*Ialltoall_optimized)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+                               int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
+                               MPID_Request **request);
     int (*Ialltoallv)(const void *sendbuf, const int *sendcounts, const int *sdispls,
                       MPI_Datatype sendtype, void *recvbuf, const int *recvcounts,
                       const int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
                       MPID_Sched_t s);
+    int (*Ialltoallv_optimized)(const void *sendbuf, const int *sendcounts, const int *sdispls,
+                                MPI_Datatype sendtype, void *recvbuf, const int *recvcounts,
+                                const int *rdispls, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
+                                MPID_Request **request);
     int (*Ialltoallw)(const void *sendbuf, const int *sendcounts, const int *sdispls,
                       const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts,
                       const int *rdispls, const MPI_Datatype *recvtypes,
                       MPID_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Ialltoallw_optimized)(const void *sendbuf, const int *sendcounts, const int *sdispls,
+                                const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts,
+                                const int *rdispls, const MPI_Datatype *recvtypes,
+                                MPID_Comm *comm_ptr, MPID_Request **request);
     int (*Ireduce)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
                    int root, MPID_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Ireduce_optimized)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
+                   int root, MPID_Comm *comm_ptr, MPID_Request **request);
     int (*Iallreduce)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
                       MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Iallreduce_optimized)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+                                MPI_Op op, MPID_Comm *comm_ptr, MPID_Request **request);
     int (*Ireduce_scatter)(const void *sendbuf, void *recvbuf, const int *recvcounts,
                            MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Ireduce_scatter_optimized)(const void *sendbuf, void *recvbuf, const int *recvcounts,
+                                     MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Request **request);
     int (*Ireduce_scatter_block)(const void *sendbuf, void *recvbuf, int recvcount,
                                  MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr,
                                  MPID_Sched_t s);
+    int (*Ireduce_scatter_block_optimized)(const void *sendbuf, void *recvbuf, int recvcount,
+                                           MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr,
+                                           MPID_Request **request);
     int (*Iscan)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
                  MPID_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Iscan_optimized)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
+                           MPID_Comm *comm_ptr, MPID_Request **request);
     int (*Iexscan)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
                    MPID_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Iexscan_optimized)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
+                             MPID_Comm *comm_ptr, MPID_Request **request);
 
     struct MPID_Collops *prev_coll_fns; /* when overriding this table, set this to point to the old table */
 
diff --git a/src/mpi/coll/iallgather.c b/src/mpi/coll/iallgather.c
index aaa1ab0..095c5c4 100644
--- a/src/mpi/coll/iallgather.c
+++ b/src/mpi/coll/iallgather.c
@@ -588,12 +588,25 @@ fn_fail:
 int MPIR_Iallgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Iallgather_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Iallgather_optimized(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/iallgatherv.c b/src/mpi/coll/iallgatherv.c
index 535b639..37b2841 100644
--- a/src/mpi/coll/iallgatherv.c
+++ b/src/mpi/coll/iallgatherv.c
@@ -690,12 +690,25 @@ int MPIR_Iallgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendt
                           MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Iallgatherv_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Iallgatherv_optimized(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/iallreduce.c b/src/mpi/coll/iallreduce.c
index 590192e..f51bee1 100644
--- a/src/mpi/coll/iallreduce.c
+++ b/src/mpi/coll/iallreduce.c
@@ -646,12 +646,25 @@ fn_fail:
 int MPIR_Iallreduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Iallreduce_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Iallreduce_optimized(sendbuf, recvbuf, count, datatype, op, comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/ialltoall.c b/src/mpi/coll/ialltoall.c
index 3988581..593ae79 100644
--- a/src/mpi/coll/ialltoall.c
+++ b/src/mpi/coll/ialltoall.c
@@ -518,12 +518,25 @@ fn_fail:
 int MPIR_Ialltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Ialltoall_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Ialltoall_optimized(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/ialltoallv.c b/src/mpi/coll/ialltoallv.c
index 72ba2c6..b73e211 100644
--- a/src/mpi/coll/ialltoallv.c
+++ b/src/mpi/coll/ialltoallv.c
@@ -245,12 +245,26 @@ int MPIR_Ialltoallv_impl(const void *sendbuf, const int sendcounts[], const int
                          MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Ialltoallv_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Ialltoallv_optimized(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/ialltoallw.c b/src/mpi/coll/ialltoallw.c
index 125412d..9409b44 100644
--- a/src/mpi/coll/ialltoallw.c
+++ b/src/mpi/coll/ialltoallw.c
@@ -251,12 +251,25 @@ int MPIR_Ialltoallw_impl(const void *sendbuf, const int sendcounts[], const int
                          MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Ialltoallw_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Ialltoallw_optimized(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/ibarrier.c b/src/mpi/coll/ibarrier.c
index 66fe1fc..3bf12da 100644
--- a/src/mpi/coll/ibarrier.c
+++ b/src/mpi/coll/ibarrier.c
@@ -170,18 +170,30 @@ fn_fail:
 int MPIR_Ibarrier_impl(MPID_Comm *comm_ptr, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Ibarrier_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Ibarrier_optimized(comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
     MPIU_Assert(comm_ptr->coll_fns->Ibarrier != NULL);
     mpi_errno = comm_ptr->coll_fns->Ibarrier(comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
diff --git a/src/mpi/coll/ibcast.c b/src/mpi/coll/ibcast.c
index 652c096..57ddd5d 100644
--- a/src/mpi/coll/ibcast.c
+++ b/src/mpi/coll/ibcast.c
@@ -856,18 +856,30 @@ fn_fail:
 int MPIR_Ibcast_impl(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Ibcast_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Ibcast_optimized(buffer, count, datatype, root, comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
     MPIU_Assert(comm_ptr->coll_fns->Ibcast != NULL);
     mpi_errno = comm_ptr->coll_fns->Ibcast(buffer, count, datatype, root, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
diff --git a/src/mpi/coll/iexscan.c b/src/mpi/coll/iexscan.c
index cdb8354..c28c2c8 100644
--- a/src/mpi/coll/iexscan.c
+++ b/src/mpi/coll/iexscan.c
@@ -183,12 +183,25 @@ fn_fail:
 int MPIR_Iexscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Iexscan_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Iexscan_optimized(sendbuf, recvbuf, count, datatype, op, comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/igather.c b/src/mpi/coll/igather.c
index ee0a06d..3e1b51d 100644
--- a/src/mpi/coll/igather.c
+++ b/src/mpi/coll/igather.c
@@ -510,12 +510,27 @@ fn_fail:
 int MPIR_Igather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Igather_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Igather_optimized(sendbuf, sendcount, sendtype,
+                                                          recvbuf, recvcount, recvtype,
+                                                          root, comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/igatherv.c b/src/mpi/coll/igatherv.c
index 2ece10f..388c012 100644
--- a/src/mpi/coll/igatherv.c
+++ b/src/mpi/coll/igatherv.c
@@ -109,12 +109,27 @@ int MPIR_Igatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype
                        int root, MPID_Comm *comm_ptr, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Igatherv_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Igatherv_optimized(sendbuf, sendcount, sendtype,
+                                                           recvbuf, recvcounts, displs, recvtype,
+                                                           root, comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/ired_scat.c b/src/mpi/coll/ired_scat.c
index a4829ee..83a16aa 100644
--- a/src/mpi/coll/ired_scat.c
+++ b/src/mpi/coll/ired_scat.c
@@ -1008,12 +1008,27 @@ int MPIR_Ireduce_scatter_impl(const void *sendbuf, void *recvbuf, const int recv
                               MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Ireduce_scatter_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Ireduce_scatter_optimized(sendbuf, recvbuf, recvcounts,
+                                                                  datatype, op,
+                                                                  comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/ired_scat_block.c b/src/mpi/coll/ired_scat_block.c
index 1018417..06d2dbc 100644
--- a/src/mpi/coll/ired_scat_block.c
+++ b/src/mpi/coll/ired_scat_block.c
@@ -910,12 +910,28 @@ fn_fail:
 int MPIR_Ireduce_scatter_block_impl(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Ireduce_scatter_block_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Ireduce_scatter_block_optimized(sendbuf, recvbuf, recvcount,
+                                                                        datatype, op,
+                                                                        comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/ireduce.c b/src/mpi/coll/ireduce.c
index 50c3f96..cec9b50 100644
--- a/src/mpi/coll/ireduce.c
+++ b/src/mpi/coll/ireduce.c
@@ -761,12 +761,27 @@ fn_fail:
 int MPIR_Ireduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPID_Comm *comm_ptr, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Ireduce_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Ireduce_optimized(sendbuf, recvbuf, count,
+                                                          datatype, op, root,
+                                                          comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
diff --git a/src/mpi/coll/iscan.c b/src/mpi/coll/iscan.c
index 2cf631c..a71f278 100644
--- a/src/mpi/coll/iscan.c
+++ b/src/mpi/coll/iscan.c
@@ -317,18 +317,32 @@ fn_fail:
 int MPIR_Iscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Iscan_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Iscan_optimized(sendbuf, recvbuf, count,
+                                                        datatype, op,
+                                                        comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
     MPIU_Assert(comm_ptr->coll_fns->Iscan != NULL);
     mpi_errno = comm_ptr->coll_fns->Iscan(sendbuf, recvbuf, count, datatype, op, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
diff --git a/src/mpi/coll/iscatter.c b/src/mpi/coll/iscatter.c
index 75a972b..66a1e98 100644
--- a/src/mpi/coll/iscatter.c
+++ b/src/mpi/coll/iscatter.c
@@ -545,18 +545,32 @@ fn_fail:
 int MPIR_Iscatter_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Iscatter_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Iscatter_optimized(sendbuf, sendcount, sendtype,
+                                                           recvbuf, recvcount, recvtype,
+                                                           root, comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
     MPIU_Assert(comm_ptr->coll_fns->Iscatter != NULL);
     mpi_errno = comm_ptr->coll_fns->Iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
diff --git a/src/mpi/coll/iscatterv.c b/src/mpi/coll/iscatterv.c
index 7d82bd4..0b12958 100644
--- a/src/mpi/coll/iscatterv.c
+++ b/src/mpi/coll/iscatterv.c
@@ -116,18 +116,32 @@ int MPIR_Iscatterv_impl(const void *sendbuf, const int sendcounts[], const int d
                         MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
-    int tag = -1;
     MPID_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
+    MPIU_Assert(comm_ptr->coll_fns != NULL);
+    if (comm_ptr->coll_fns->Iscatterv_optimized != NULL) {
+        /* --BEGIN USEREXTENSION-- */
+        mpi_errno = comm_ptr->coll_fns->Iscatterv_optimized(sendbuf, sendcounts, displs, sendtype,
+                                                           recvbuf, recvcount, recvtype,
+                                                           root, comm_ptr, &reqp);
+        if (reqp) {
+            *request = reqp->handle;
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            goto fn_exit;
+        }
+        /* --END USEREXTENSION-- */
+    }
+
+    int tag = -1;
+    MPID_Sched_t s = MPID_SCHED_NULL;
+
     mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     mpi_errno = MPID_Sched_create(&s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
     MPIU_Assert(comm_ptr->coll_fns->Iscatterv != NULL);
     mpi_errno = comm_ptr->coll_fns->Iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, s);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
diff --git a/src/mpid/pamid/include/mpidi_prototypes.h b/src/mpid/pamid/include/mpidi_prototypes.h
index 55566f2..ec50f71 100644
--- a/src/mpid/pamid/include/mpidi_prototypes.h
+++ b/src/mpid/pamid/include/mpidi_prototypes.h
@@ -238,20 +238,27 @@ void MPIDI_Coll_register    (void);
 int MPIDO_Bcast(void *buffer, int count, MPI_Datatype dt, int root, MPID_Comm *comm_ptr, int *mpierrno);
 int MPIDO_Bcast_simple(void *buffer, int count, MPI_Datatype dt, int root, MPID_Comm *comm_ptr, int *mpierrno);
 int MPIDO_CSWrapper_bcast(pami_xfer_t *bcast, void *comm);
+int MPIDO_Ibcast(void *buffer, int count, MPI_Datatype datatype, int root, MPID_Comm *comm_ptr, MPID_Request **request);
 int MPIDO_Barrier(MPID_Comm *comm_ptr, int *mpierrno);
 int MPIDO_Barrier_simple(MPID_Comm *comm_ptr, int *mpierrno);
 int MPIDO_CSWrapper_barrier(pami_xfer_t *barrier, void *comm);
+int MPIDO_Ibarrier(MPID_Comm *comm_ptr, MPID_Request **request);
 
 int MPIDO_Allreduce(const void *sbuffer, void *rbuffer, int count,
                     MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *mpierrno);
 int MPIDO_Allreduce_simple(const void *sbuffer, void *rbuffer, int count,
                     MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *mpierrno);
 int MPIDO_CSWrapper_allreduce(pami_xfer_t *allreduce, void *comm);
+int MPIDO_Iallreduce(const void *sbuffer, void *rbuffer, int count,
+                     MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr,
+                     MPID_Request ** request);
 int MPIDO_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, 
                  MPI_Op op, int root, MPID_Comm *comm_ptr, int *mpierrno);
 int MPIDO_Reduce_simple(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, 
                  MPI_Op op, int root, MPID_Comm *comm_ptr, int *mpierrno);
 int MPIDO_CSWrapper_reduce(pami_xfer_t *reduce, void *comm);
+int MPIDO_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+                  MPI_Op op, int root, MPID_Comm *comm_ptr, MPID_Request **request);
 int MPIDO_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                     void *recvbuf, int recvcount, MPI_Datatype recvtype,
                     MPID_Comm *comm_ptr, int *mpierrno);
@@ -259,6 +266,9 @@ int MPIDO_Allgather_simple(const void *sendbuf, int sendcount, MPI_Datatype send
                     void *recvbuf, int recvcount, MPI_Datatype recvtype,
                     MPID_Comm *comm_ptr, int *mpierrno);
 int MPIDO_CSWrapper_allgather(pami_xfer_t *allgather, void *comm);
+int MPIDO_Iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+                     int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr,
+                     MPID_Request **request);
 
 int MPIDO_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                      void *recvbuf, const int *recvcounts, const int *displs,
@@ -279,6 +289,9 @@ int MPIDO_Gather_simple(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
                  void *recvbuf, int recvcount, MPI_Datatype recvtype,
                  int root, MPID_Comm * comm_ptr, int *mpierrno);
 int MPIDO_CSWrapper_gather(pami_xfer_t *gather, void *comm);
+int MPIDO_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                  void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                  int root, MPID_Comm * comm_ptr, MPID_Request **request);
 
 int MPIDO_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                   void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype,
@@ -287,17 +300,24 @@ int MPIDO_Gatherv_simple(const void *sendbuf, int sendcount, MPI_Datatype sendty
                   void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype,
                   int root, MPID_Comm * comm_ptr, int *mpierrno);
 int MPIDO_CSWrapper_gatherv(pami_xfer_t *gatherv, void *comm);
+int MPIDO_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                   void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype,
+                   int root, MPID_Comm * comm_ptr, MPID_Request **request);
 
 int MPIDO_Scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
                MPI_Op op, MPID_Comm * comm_ptr, int *mpierrno);
 int MPIDO_Scan_simple(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
                MPI_Op op, MPID_Comm * comm_ptr, int *mpierrno);
 int MPIDO_CSWrapper_scan(pami_xfer_t *scan, void *comm);
+int MPIDO_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+               MPI_Op op, MPID_Comm * comm_ptr, MPID_Request **request);
 
 int MPIDO_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
                MPI_Op op, MPID_Comm * comm_ptr, int *mpierrno);
 int MPIDO_Exscan_simple(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
                MPI_Op op, MPID_Comm * comm_ptr, int *mpierrno);
+int MPIDO_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+                  MPI_Op op, MPID_Comm * comm_ptr, MPID_Request **request);
 
 int MPIDO_Scatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                   void *recvbuf, int recvcount, MPI_Datatype recvtype,
@@ -306,6 +326,9 @@ int MPIDO_Scatter_simple(const void *sendbuf, int sendcount, MPI_Datatype sendty
                   void *recvbuf, int recvcount, MPI_Datatype recvtype,
                   int root, MPID_Comm * comm_ptr, int *mpierrno);
 int MPIDO_CSWrapper_scatter(pami_xfer_t *scatter, void *comm);
+int MPIDO_Iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                   void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                   int root, MPID_Comm * comm_ptr, MPID_Request **request);
 
 int MPIDO_Scatterv(const void *sendbuf, const int *sendcounts, const int *displs,
                    MPI_Datatype sendtype,
@@ -316,6 +339,10 @@ int MPIDO_Scatterv_simple(const void *sendbuf, const int *sendcounts, const int
                    void *recvbuf, int recvcount, MPI_Datatype recvtype,
                    int root, MPID_Comm * comm_ptr, int *mpierrno);
 int MPIDO_CSWrapper_scatterv(pami_xfer_t *scatterv, void *comm);
+int MPIDO_Iscatterv(const void *sendbuf, const int *sendcounts, const int *displs,
+                    MPI_Datatype sendtype,
+                    void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                    int root, MPID_Comm * comm_ptr, MPID_Request **request);
 
 int MPIDO_Alltoallv(const void *sendbuf, const int *sendcounts, const int *senddispls,
                     MPI_Datatype sendtype,
@@ -328,6 +355,11 @@ int MPIDO_Alltoallv_simple(const void *sendbuf, const int *sendcounts, const int
                     MPI_Datatype recvtype,
                     MPID_Comm *comm_ptr, int *mpierrno);
 int MPIDO_CSWrapper_alltoallv(pami_xfer_t *alltoallv, void *comm);
+int MPIDO_Ialltoallv(const void *sendbuf, const int *sendcounts, const int *senddispls,
+                     MPI_Datatype sendtype,
+                     void *recvbuf, const int *recvcounts, const int *recvdispls,
+                     MPI_Datatype recvtype,
+                     MPID_Comm *comm_ptr, MPID_Request **request);
 
 int MPIDO_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                    void *recvbuf, int recvcount, MPI_Datatype recvtype,
@@ -336,6 +368,22 @@ int MPIDO_Alltoall_simple(const void *sendbuf, int sendcount, MPI_Datatype sendt
                    void *recvbuf, int recvcount, MPI_Datatype recvtype,
                    MPID_Comm *comm_ptr, int *mpierrno);
 int MPIDO_CSWrapper_alltoall(pami_xfer_t *alltoall, void *comm);
+int MPIDO_Ialltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                    void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                    MPID_Comm *comm_ptr, MPID_Request **request);
+
+int MPIDO_Ialltoallw(const void *sendbuf, const int *sendcounts, const int *senddispls,
+                     const MPI_Datatype * sendtypes,
+                     void *recvbuf, const int *recvcounts, const int *recvdispls,
+                     const MPI_Datatype * recvtypes,
+                     MPID_Comm *comm_ptr, MPID_Request **request);
+
+int MPIDO_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount,
+                                MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr,
+                                MPID_Request **request);
+
+int MPIDO_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int *recvcounts,
+                          MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPID_Request **request);
 
 int MPIDI_Datatype_to_pami(MPI_Datatype        dt,
                            pami_type_t        *pdt,
diff --git a/src/mpid/pamid/src/coll/Makefile.mk b/src/mpid/pamid/src/coll/Makefile.mk
index 59efcbc..a11e6a1 100644
--- a/src/mpid/pamid/src/coll/Makefile.mk
+++ b/src/mpid/pamid/src/coll/Makefile.mk
@@ -33,7 +33,10 @@ include $(top_srcdir)/src/mpid/pamid/src/coll/alltoallv/Makefile.mk
 include $(top_srcdir)/src/mpid/pamid/src/coll/gatherv/Makefile.mk
 include $(top_srcdir)/src/mpid/pamid/src/coll/scan/Makefile.mk
 include $(top_srcdir)/src/mpid/pamid/src/coll/reduce/Makefile.mk
-
+include $(top_srcdir)/src/mpid/pamid/src/coll/alltoallw/Makefile.mk
+include $(top_srcdir)/src/mpid/pamid/src/coll/exscan/Makefile.mk
+include $(top_srcdir)/src/mpid/pamid/src/coll/ired_scat_block/Makefile.mk
+include $(top_srcdir)/src/mpid/pamid/src/coll/ired_scat/Makefile.mk
 
 lib_lib at MPILIBNAME@_la_SOURCES +=               \
     src/mpid/pamid/src/coll/coll_utils.c
diff --git a/src/mpid/pamid/src/coll/allgather/Makefile.mk b/src/mpid/pamid/src/coll/allgather/Makefile.mk
index 5f48e77..dc8864e 100644
--- a/src/mpid/pamid/src/coll/allgather/Makefile.mk
+++ b/src/mpid/pamid/src/coll/allgather/Makefile.mk
@@ -22,7 +22,8 @@ if BUILD_PAMID
 
 
 lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
-    src/mpid/pamid/src/coll/allgather/mpido_allgather.c
+    src/mpid/pamid/src/coll/allgather/mpido_allgather.c              \
+    src/mpid/pamid/src/coll/allgather/mpido_iallgather.c
 
 
 endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/allgather/mpido_iallgather.c b/src/mpid/pamid/src/coll/allgather/mpido_iallgather.c
new file mode 100644
index 0000000..82b7e97
--- /dev/null
+++ b/src/mpid/pamid/src/coll/allgather/mpido_iallgather.c
@@ -0,0 +1,73 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/allgather/mpido_iallgather.c
+ * \brief ???
+ */
+
+/*#define TRACE_ON */
+#include <mpidimpl.h>
+
+int
+MPIDO_Iallgather(const void *sendbuf,
+                 int sendcount,
+                 MPI_Datatype sendtype,
+                 void *recvbuf,
+                 int recvcount,
+                 MPI_Datatype recvtype,
+                 MPID_Comm * comm_ptr,
+                 MPID_Request ** request)
+{
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to the
+       * MPIR_Iallgather_impl() function to invoke the mpich nbc implementation
+       * of MPI_Iallgather().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Iallgather() with a blocking MPIR_Allgather().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH iallgather algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Allgather_impl(sendbuf, sendcount, sendtype,
+                                   recvbuf, recvcount, recvtype,
+                                   comm_ptr, &mpierrno);
+
+      /*
+       * The blocking allgather has completed - create and complete a
+       * MPID_Request object so the MPIR_Iallgather_impl() function does not
+       * perform an additional iallgather.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+   return 0;
+}
diff --git a/src/mpid/pamid/src/coll/allgatherv/Makefile.mk b/src/mpid/pamid/src/coll/allgatherv/Makefile.mk
index fd131ed..f0ba398 100644
--- a/src/mpid/pamid/src/coll/allgatherv/Makefile.mk
+++ b/src/mpid/pamid/src/coll/allgatherv/Makefile.mk
@@ -22,7 +22,8 @@ if BUILD_PAMID
 
 
 lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
-    src/mpid/pamid/src/coll/allgatherv/mpido_allgatherv.c
+    src/mpid/pamid/src/coll/allgatherv/mpido_allgatherv.c            \
+    src/mpid/pamid/src/coll/allgatherv/mpido_iallgatherv.c
 
 
 endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/allgatherv/mpido_iallgatherv.c b/src/mpid/pamid/src/coll/allgatherv/mpido_iallgatherv.c
new file mode 100644
index 0000000..68d47c7
--- /dev/null
+++ b/src/mpid/pamid/src/coll/allgatherv/mpido_iallgatherv.c
@@ -0,0 +1,77 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/allgatherv/mpido_iallgatherv.c
+ * \brief ???
+ */
+
+/*#define TRACE_ON */
+#include <mpidimpl.h>
+
+int
+MPIDO_Iallgatherv(const void *sendbuf,
+                  int sendcount,
+                  MPI_Datatype sendtype,
+                  void *recvbuf,
+                  const int *recvcounts,
+                  const int *displs,
+                  MPI_Datatype recvtype,
+                  MPID_Comm * comm_ptr,
+                  MPID_Request ** request)
+{
+   TRACE_ERR("Entering MPIDO_Iallgatherv\n");
+
+
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to the
+       * MPIR_Iallgather_impl() function to invoke the mpich nbc implementation
+       * of MPI_Iallgather().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Iallgather() with a blocking MPIR_Allgather().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH iallgather algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Allgatherv_impl(sendbuf, sendcount, sendtype,
+                                    recvbuf, recvcounts, displs, recvtype,
+                                    comm_ptr, &mpierrno);
+
+      /*
+       * The blocking allgather has completed - create and complete a
+       * MPID_Request object so the MPIR_Iallgather_impl() function does not
+       * perform an additional iallgather.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+   return 0;
+}
diff --git a/src/mpid/pamid/src/coll/allreduce/Makefile.mk b/src/mpid/pamid/src/coll/allreduce/Makefile.mk
index 26b2a29..2be9e61 100644
--- a/src/mpid/pamid/src/coll/allreduce/Makefile.mk
+++ b/src/mpid/pamid/src/coll/allreduce/Makefile.mk
@@ -22,7 +22,8 @@ if BUILD_PAMID
 
 
 lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
-    src/mpid/pamid/src/coll/allreduce/mpido_allreduce.c
+    src/mpid/pamid/src/coll/allreduce/mpido_allreduce.c              \
+    src/mpid/pamid/src/coll/allreduce/mpido_iallreduce.c
 
 
 endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/allreduce/mpido_iallreduce.c b/src/mpid/pamid/src/coll/allreduce/mpido_iallreduce.c
new file mode 100644
index 0000000..6446db7
--- /dev/null
+++ b/src/mpid/pamid/src/coll/allreduce/mpido_iallreduce.c
@@ -0,0 +1,73 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/allreduce/mpido_iallreduce.c
+ * \brief ???
+ */
+
+/*#define TRACE_ON*/
+
+#include <mpidimpl.h>
+
+int MPIDO_Iallreduce(const void *sendbuf,
+                    void *recvbuf,
+                    int count,
+                    MPI_Datatype dt,
+                    MPI_Op op,
+                    MPID_Comm *comm_ptr,
+                    MPID_Request **request)
+{
+   TRACE_ERR("Entering mpido_iallreduce\n");
+
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to the
+       * MPIR_Iallreduce_impl() function to invoke the mpich nbc implementation
+       * of MPI_Iallreduce().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Iallreduce() with a blocking MPIR_Allreduce().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH iallreduce algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Allreduce_impl(sendbuf, recvbuf, count, dt, op,
+                                   comm_ptr, &mpierrno);
+
+      /*
+       * The blocking allreduce has completed - create and complete a
+       * MPID_Request object so the MPIR_Iallreduce_impl() function does not
+       * perform an additional iallreduce.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+   return 0;
+}
diff --git a/src/mpid/pamid/src/coll/alltoall/Makefile.mk b/src/mpid/pamid/src/coll/alltoall/Makefile.mk
index 3176aa7..1f40331 100644
--- a/src/mpid/pamid/src/coll/alltoall/Makefile.mk
+++ b/src/mpid/pamid/src/coll/alltoall/Makefile.mk
@@ -22,7 +22,8 @@ if BUILD_PAMID
 
 
 lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
-    src/mpid/pamid/src/coll/alltoall/mpido_alltoall.c
+    src/mpid/pamid/src/coll/alltoall/mpido_alltoall.c                \
+    src/mpid/pamid/src/coll/alltoall/mpido_ialltoall.c
 
 
 endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/alltoall/mpido_ialltoall.c b/src/mpid/pamid/src/coll/alltoall/mpido_ialltoall.c
new file mode 100644
index 0000000..2d1a2c5
--- /dev/null
+++ b/src/mpid/pamid/src/coll/alltoall/mpido_ialltoall.c
@@ -0,0 +1,76 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/alltoall/mpido_ialltoall.c
+ * \brief ???
+ */
+
+/*#define TRACE_ON*/
+
+#include <mpidimpl.h>
+
+int MPIDO_Ialltoall(const void *sendbuf,
+                    int sendcount,
+                    MPI_Datatype sendtype,
+                    void *recvbuf,
+                    int recvcount,
+                    MPI_Datatype recvtype,
+                    MPID_Comm *comm_ptr,
+                    MPID_Request **request)
+{
+   TRACE_ERR("Entering MPIDO_Ialltoall\n");
+
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to the
+       * MPIR_Ialltoall_impl() function to invoke the mpich nbc implementation
+       * of MPI_Ialltoall().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Ialltoall() with a blocking MPIR_Alltoall().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH blocking alltoall algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Alltoall_impl(sendbuf, sendcount, sendtype,
+                                  recvbuf, recvcount, recvtype,
+                                  comm_ptr, &mpierrno);
+
+      /*
+       * The blocking allitoall has completed - create and complete a
+       * MPID_Request object so the MPIR_Ialltoall_impl() function does not
+       * perform an additional ialltoall.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+   TRACE_ERR("Leaving MPIDO_Ialltoall\n");
+   return 0;
+}
diff --git a/src/mpid/pamid/src/coll/alltoallv/Makefile.mk b/src/mpid/pamid/src/coll/alltoallv/Makefile.mk
index 8e3fa82..15ab517 100644
--- a/src/mpid/pamid/src/coll/alltoallv/Makefile.mk
+++ b/src/mpid/pamid/src/coll/alltoallv/Makefile.mk
@@ -22,7 +22,8 @@ if BUILD_PAMID
 
 
 lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
-    src/mpid/pamid/src/coll/alltoallv/mpido_alltoallv.c
+    src/mpid/pamid/src/coll/alltoallv/mpido_alltoallv.c              \
+    src/mpid/pamid/src/coll/alltoallv/mpido_ialltoallv.c
 
 
 endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/alltoallv/mpido_ialltoallv.c b/src/mpid/pamid/src/coll/alltoallv/mpido_ialltoallv.c
new file mode 100644
index 0000000..14ee4d0
--- /dev/null
+++ b/src/mpid/pamid/src/coll/alltoallv/mpido_ialltoallv.c
@@ -0,0 +1,79 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/alltoallv/mpido_alltoallv.c
+ * \brief ???
+ */
+/*#define TRACE_ON*/
+
+#include <mpidimpl.h>
+
+int MPIDO_Ialltoallv(const void *sendbuf,
+                    const int *sendcounts,
+                    const int *senddispls,
+                    MPI_Datatype sendtype,
+                    void *recvbuf,
+                    const int *recvcounts,
+                    const int *recvdispls,
+                    MPI_Datatype recvtype,
+                    MPID_Comm *comm_ptr,
+                    MPID_Request **request)
+{
+   if(comm_ptr->rank == 0)
+      TRACE_ERR("Entering MPIDO_Ialltoallv\n");
+
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to the
+       * MPIR_Ialltoalliv_impl() function to invoke the mpich nbc implementation
+       * of MPI_Ialltoallv().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Ialltoallv() with a blocking MPIR_Alltoallv().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH blocking alltoallv algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Alltoallv_impl(sendbuf, sendcounts, senddispls, sendtype,
+                                  recvbuf, recvcounts, recvdispls, recvtype,
+                                  comm_ptr, &mpierrno);
+
+      /*
+       * The blocking alltoallv has completed - create and complete a
+       * MPID_Request object so the MPIR_Ialltoallv_impl() function does not
+       * perform an additional ialltoallv.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+
+   TRACE_ERR("Leaving ialltoallv\n");
+   return 0;
+}
diff --git a/src/mpid/pamid/src/coll/alltoallw/Makefile.mk b/src/mpid/pamid/src/coll/alltoallw/Makefile.mk
new file mode 100644
index 0000000..33a90cc
--- /dev/null
+++ b/src/mpid/pamid/src/coll/alltoallw/Makefile.mk
@@ -0,0 +1,28 @@
+# begin_generated_IBM_copyright_prolog
+#
+# This is an automatically generated copyright prolog.
+# After initializing,  DO NOT MODIFY OR MOVE
+#  ---------------------------------------------------------------
+# Licensed Materials - Property of IBM
+# Blue Gene/Q 5765-PER 5765-PRP
+#
+# (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved
+# US Government Users Restricted Rights -
+# Use, duplication, or disclosure restricted
+# by GSA ADP Schedule Contract with IBM Corp.
+#
+#  ---------------------------------------------------------------
+#
+# end_generated_IBM_copyright_prolog
+# -*- mode: makefile-gmake; -*-
+
+# note that the includes always happen but the effects of their contents are
+# affected by "if BUILD_PAMID"
+if BUILD_PAMID
+
+
+lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
+    src/mpid/pamid/src/coll/alltoallw/mpido_ialltoallw.c
+
+
+endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/alltoallw/mpido_ialltoallw.c b/src/mpid/pamid/src/coll/alltoallw/mpido_ialltoallw.c
new file mode 100644
index 0000000..0a82baf
--- /dev/null
+++ b/src/mpid/pamid/src/coll/alltoallw/mpido_ialltoallw.c
@@ -0,0 +1,79 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/alltoallv/mpido_alltoallw.c
+ * \brief ???
+ */
+/*#define TRACE_ON*/
+
+#include <mpidimpl.h>
+
+int MPIDO_Ialltoallw(const void *sendbuf,
+                    const int *sendcounts,
+                    const int *senddispls,
+                    const MPI_Datatype *sendtypes,
+                    void *recvbuf,
+                    const int *recvcounts,
+                    const int *recvdispls,
+                    const MPI_Datatype *recvtypes,
+                    MPID_Comm *comm_ptr,
+                    MPID_Request **request)
+{
+   if(comm_ptr->rank == 0)
+      TRACE_ERR("Entering MPIDO_Ialltoallw\n");
+
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to the
+       * MPIR_Ialltoallw_impl() function to invoke the mpich nbc implementation
+       * of MPI_Ialltoallw().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Ialltoallw() with a blocking MPIR_Alltoallw().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH blocking alltoallw algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Alltoallw_impl(sendbuf, sendcounts, senddispls, sendtypes,
+                                  recvbuf, recvcounts, recvdispls, recvtypes,
+                                  comm_ptr, &mpierrno);
+
+      /*
+       * The blocking alltoallw has completed - create and complete a
+       * MPID_Request object so the MPIR_Ialltoallw_impl() function does not
+       * perform an additional ialltoallw.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+
+   TRACE_ERR("Leaving ialltoallw\n");
+   return 0;
+}
diff --git a/src/mpid/pamid/src/coll/barrier/Makefile.mk b/src/mpid/pamid/src/coll/barrier/Makefile.mk
index 6e38ebd..5f17631 100644
--- a/src/mpid/pamid/src/coll/barrier/Makefile.mk
+++ b/src/mpid/pamid/src/coll/barrier/Makefile.mk
@@ -22,7 +22,8 @@ if BUILD_PAMID
 
 
 lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
-    src/mpid/pamid/src/coll/barrier/mpido_barrier.c
+    src/mpid/pamid/src/coll/barrier/mpido_barrier.c                  \
+    src/mpid/pamid/src/coll/barrier/mpido_ibarrier.c
 
 
 endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/barrier/mpido_ibarrier.c b/src/mpid/pamid/src/coll/barrier/mpido_ibarrier.c
new file mode 100644
index 0000000..10607c7
--- /dev/null
+++ b/src/mpid/pamid/src/coll/barrier/mpido_ibarrier.c
@@ -0,0 +1,116 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/barrier/mpido_ibarrier.c
+ * \brief ???
+ */
+
+/*#define TRACE_ON*/
+
+#include <mpidimpl.h>
+
+static void cb_ibarrier(void *ctxt, void *clientdata, pami_result_t err)
+{
+   MPID_Request *mpid_request = (MPID_Request *) clientdata;
+   MPIDI_Request_complete_norelease_inline(mpid_request);
+}
+
+int MPIDO_Ibarrier(MPID_Comm *comm_ptr, MPID_Request **request)
+{
+   TRACE_ERR("Entering MPIDO_Ibarrier\n");
+
+   if(unlikely(comm_ptr->mpid.user_selected_type[PAMI_XFER_BARRIER] == MPID_COLL_USE_MPICH))
+   {
+     if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+     /*
+      * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+      * MPIR_Ibarrier() with a blocking MPIR_Barrier().
+      */
+     if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+       fprintf(stderr,"Using MPICH barrier\n");
+      TRACE_ERR("Using MPICH Barrier\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Barrier(comm_ptr, &mpierrno);
+
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+   MPIDI_Post_coll_t barrier_post;
+   pami_xfer_t barrier;
+   pami_algorithm_t my_barrier;
+   pami_metadata_t *my_barrier_md;
+   int queryreq = 0;
+
+   MPID_Request * mpid_request = MPID_Request_create_inline();
+   mpid_request->kind = MPID_COLL_REQUEST;
+   *request = mpid_request;
+
+   barrier.cb_done = cb_ibarrier;
+   barrier.cookie = (void *)mpid_request;
+
+   if(comm_ptr->mpid.user_selected_type[PAMI_XFER_BARRIER] == MPID_COLL_OPTIMIZED)
+   {
+      TRACE_ERR("Optimized barrier (%s) was pre-selected\n", comm_ptr->mpid.opt_protocol_md[PAMI_XFER_BARRIER][0].name);
+      my_barrier = comm_ptr->mpid.opt_protocol[PAMI_XFER_BARRIER][0];
+      my_barrier_md = &comm_ptr->mpid.opt_protocol_md[PAMI_XFER_BARRIER][0];
+      queryreq = comm_ptr->mpid.must_query[PAMI_XFER_BARRIER][0];
+   }
+   else
+   {
+      TRACE_ERR("Barrier (%s) was specified by user\n", comm_ptr->mpid.user_metadata[PAMI_XFER_BARRIER].name);
+      my_barrier = comm_ptr->mpid.user_selected[PAMI_XFER_BARRIER];
+      my_barrier_md = &comm_ptr->mpid.user_metadata[PAMI_XFER_BARRIER];
+      queryreq = comm_ptr->mpid.user_selected_type[PAMI_XFER_BARRIER];
+   }
+
+   barrier.algorithm = my_barrier;
+   /* There is no support for query-required barrier protocols here */
+   MPID_assert_always(queryreq != MPID_COLL_ALWAYS_QUERY);
+   MPID_assert_always(queryreq != MPID_COLL_CHECK_FN_REQUIRED);
+
+   /* TODO Name needs fixed somehow */
+   MPIDI_Update_last_algorithm(comm_ptr, my_barrier_md->name);
+   if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+   {
+      unsigned long long int threadID;
+      MPIU_Thread_id_t tid;
+      MPIU_Thread_self(&tid);
+      threadID = (unsigned long long int)tid;
+      fprintf(stderr,"<%llx> Using protocol %s for barrier on %u\n",
+              threadID,
+              my_barrier_md->name,
+              (unsigned) comm_ptr->context_id);
+   }
+   TRACE_ERR("%s barrier\n", MPIDI_Process.context_post.active>0?"posting":"invoking");
+   MPIDI_Context_post(MPIDI_Context[0], &barrier_post.state,
+                      MPIDI_Pami_post_wrapper, (void *)&barrier);
+   TRACE_ERR("barrier %s rc: %d\n", MPIDI_Process.context_post.active>0?"posted":"invoked", rc);
+
+   MPID_Progress_wait_inline(1);
+
+   TRACE_ERR("exiting mpido_ibarrier\n");
+   return 0;
+}
diff --git a/src/mpid/pamid/src/coll/bcast/Makefile.mk b/src/mpid/pamid/src/coll/bcast/Makefile.mk
index 489e356..2b0c9fe 100644
--- a/src/mpid/pamid/src/coll/bcast/Makefile.mk
+++ b/src/mpid/pamid/src/coll/bcast/Makefile.mk
@@ -22,7 +22,8 @@ if BUILD_PAMID
 
 
 lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
-    src/mpid/pamid/src/coll/bcast/mpido_bcast.c
+    src/mpid/pamid/src/coll/bcast/mpido_bcast.c                      \
+    src/mpid/pamid/src/coll/bcast/mpido_ibcast.c
 
 
 endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/bcast/mpido_ibcast.c b/src/mpid/pamid/src/coll/bcast/mpido_ibcast.c
new file mode 100644
index 0000000..07bd89a
--- /dev/null
+++ b/src/mpid/pamid/src/coll/bcast/mpido_ibcast.c
@@ -0,0 +1,86 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/bcast/mpido_ibcast.c
+ * \brief ???
+ */
+
+/*#define TRACE_ON*/
+
+#include <mpidimpl.h>
+
+int MPIDO_Ibcast(void *buffer,
+                 int count,
+                 MPI_Datatype datatype,
+                 int root,
+                 MPID_Comm *comm_ptr,
+                 MPID_Request **request)
+{
+   TRACE_ERR("in mpido_ibcast\n");
+
+   const unsigned is_root_rank = (comm_ptr->rank == root);
+   const unsigned user_selected_type =
+     comm_ptr->mpid.user_selected_type[PAMI_XFER_BROADCAST];
+
+   int data_size, data_contig;
+   MPI_Aint data_true_lb = 0;
+   MPID_Datatype *data_ptr;
+
+   MPIDI_Datatype_get_info(count, datatype,
+               data_contig, data_size, data_ptr, data_true_lb);
+
+   /*
+    * If the user has constructed some weird 0-length datatype but
+    * count is not 0, or if the user forced a mpich bcast alogorith,
+    * perform a mpich bcast.
+    */
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to MPIR_Ibcast_impl
+       * to invoke the mpich nbc implementation of MPI_Ibcast().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Ibcast() with a blocking MPIR_Bcast().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH bcast algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Bcast_intra(buffer, count, datatype, root, comm_ptr, &mpierrno);
+
+      /*
+       * The blocking bcast has completed - create and complete a MPID_Request
+       * object so the MPIR_Ibcast_impl() function does not perform the bcast.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+   TRACE_ERR("leaving ibcast\n");
+   return 0;
+}
diff --git a/src/mpid/pamid/src/coll/exscan/Makefile.mk b/src/mpid/pamid/src/coll/exscan/Makefile.mk
new file mode 100644
index 0000000..bd79293
--- /dev/null
+++ b/src/mpid/pamid/src/coll/exscan/Makefile.mk
@@ -0,0 +1,28 @@
+# begin_generated_IBM_copyright_prolog
+#
+# This is an automatically generated copyright prolog.
+# After initializing,  DO NOT MODIFY OR MOVE
+#  ---------------------------------------------------------------
+# Licensed Materials - Property of IBM
+# Blue Gene/Q 5765-PER 5765-PRP
+#
+# (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved
+# US Government Users Restricted Rights -
+# Use, duplication, or disclosure restricted
+# by GSA ADP Schedule Contract with IBM Corp.
+#
+#  ---------------------------------------------------------------
+#
+# end_generated_IBM_copyright_prolog
+# -*- mode: makefile-gmake; -*-
+
+# note that the includes always happen but the effects of their contents are
+# affected by "if BUILD_PAMID"
+if BUILD_PAMID
+
+
+lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
+    src/mpid/pamid/src/coll/exscan/mpido_iexscan.c
+
+
+endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/exscan/mpido_iexscan.c b/src/mpid/pamid/src/coll/exscan/mpido_iexscan.c
new file mode 100644
index 0000000..a45dc41
--- /dev/null
+++ b/src/mpid/pamid/src/coll/exscan/mpido_iexscan.c
@@ -0,0 +1,65 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/exscan/mpido_iexscan.c
+ * \brief ???
+ */
+
+/*#define TRACE_ON */
+#include <mpidimpl.h>
+
+int MPIDO_Iexscan(const void *sendbuf, void *recvbuf,
+                  int count, MPI_Datatype datatype,
+                  MPI_Op op, MPID_Comm * comm_ptr, MPID_Request **request)
+{
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to the
+       * MPIR_Iexscan_impl() function to invoke the mpich nbc implementation
+       * of MPI_Iexscan().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Iexscan() with a blocking MPIR_Exscan().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH blocking exscan algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Exscan_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, &mpierrno);
+
+      /*
+       * The blocking exscan has completed - create and complete a
+       * MPID_Request object so the MPIR_Iexscan_impl() function does not
+       * perform an additional iexscan.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+   return 0;
+}
diff --git a/src/mpid/pamid/src/coll/gather/Makefile.mk b/src/mpid/pamid/src/coll/gather/Makefile.mk
index a83086c..3a999c8 100644
--- a/src/mpid/pamid/src/coll/gather/Makefile.mk
+++ b/src/mpid/pamid/src/coll/gather/Makefile.mk
@@ -22,7 +22,8 @@ if BUILD_PAMID
 
 
 lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
-    src/mpid/pamid/src/coll/gather/mpido_gather.c
+    src/mpid/pamid/src/coll/gather/mpido_gather.c                    \
+    src/mpid/pamid/src/coll/gather/mpido_igather.c
 
 
 endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/gather/mpido_igather.c b/src/mpid/pamid/src/coll/gather/mpido_igather.c
new file mode 100644
index 0000000..dfaa8fa
--- /dev/null
+++ b/src/mpid/pamid/src/coll/gather/mpido_igather.c
@@ -0,0 +1,72 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/gather/mpido_igather.c
+ * \brief ???
+ */
+
+#include <mpidimpl.h>
+
+int MPIDO_Igather(const void *sendbuf,
+                  int sendcount,
+                  MPI_Datatype sendtype,
+                  void *recvbuf,
+                  int recvcount,
+                  MPI_Datatype recvtype,
+                  int root,
+                  MPID_Comm *comm_ptr,
+                  MPID_Request **request)
+{
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to the
+       * MPIR_Igather_impl() function to invoke the mpich nbc implementation
+       * of MPI_Igather().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Igather() with a blocking MPIR_Gather().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH blocking gather algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Gather_impl(sendbuf, sendcount, sendtype,
+                                recvbuf, recvcount, recvtype,
+                                root, comm_ptr, &mpierrno);
+
+      /*
+       * The blocking gather has completed - create and complete a
+       * MPID_Request object so the MPIR_Igather_impl() function does not
+       * perform an additional igather.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+   return 0;
+}
diff --git a/src/mpid/pamid/src/coll/gatherv/Makefile.mk b/src/mpid/pamid/src/coll/gatherv/Makefile.mk
index e8ecd48..efb4ac0 100644
--- a/src/mpid/pamid/src/coll/gatherv/Makefile.mk
+++ b/src/mpid/pamid/src/coll/gatherv/Makefile.mk
@@ -22,7 +22,8 @@ if BUILD_PAMID
 
 
 lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
-    src/mpid/pamid/src/coll/gatherv/mpido_gatherv.c
+    src/mpid/pamid/src/coll/gatherv/mpido_gatherv.c                  \
+    src/mpid/pamid/src/coll/gatherv/mpido_igatherv.c
 
 
 endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/gatherv/mpido_igatherv.c b/src/mpid/pamid/src/coll/gatherv/mpido_igatherv.c
new file mode 100644
index 0000000..6b8bdce
--- /dev/null
+++ b/src/mpid/pamid/src/coll/gatherv/mpido_igatherv.c
@@ -0,0 +1,78 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/gatherv/mpido_igatherv.c
+ * \brief ???
+ */
+
+/*#define TRACE_ON*/
+#include <mpidimpl.h>
+
+int MPIDO_Igatherv(const void *sendbuf,
+                   int sendcount,
+                   MPI_Datatype sendtype,
+                   void *recvbuf,
+                   const int *recvcounts,
+                   const int *displs,
+                   MPI_Datatype recvtype,
+                   int root,
+                   MPID_Comm * comm_ptr,
+                   MPID_Request **request)
+
+{
+   TRACE_ERR("Entering MPIDO_Igatherv\n");
+
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to the
+       * MPIR_Igatherv_impl() function to invoke the mpich nbc implementation
+       * of MPI_Igatherv().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Igatherv() with a blocking MPIR_Gatherv().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH blocking gatherv algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Gatherv_impl(sendbuf, sendcount, sendtype,
+                                 recvbuf, recvcounts, displs, recvtype,
+                                 root, comm_ptr, &mpierrno);
+
+      /*
+       * The blocking gather has completed - create and complete a
+       * MPID_Request object so the MPIR_Igatherv_impl() function does not
+       * perform an additional igatherv.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+
+   return 0;
+}
diff --git a/src/mpid/pamid/src/coll/ired_scat/Makefile.mk b/src/mpid/pamid/src/coll/ired_scat/Makefile.mk
new file mode 100644
index 0000000..da23390
--- /dev/null
+++ b/src/mpid/pamid/src/coll/ired_scat/Makefile.mk
@@ -0,0 +1,28 @@
+# begin_generated_IBM_copyright_prolog
+#
+# This is an automatically generated copyright prolog.
+# After initializing,  DO NOT MODIFY OR MOVE
+#  ---------------------------------------------------------------
+# Licensed Materials - Property of IBM
+# Blue Gene/Q 5765-PER 5765-PRP
+#
+# (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved
+# US Government Users Restricted Rights -
+# Use, duplication, or disclosure restricted
+# by GSA ADP Schedule Contract with IBM Corp.
+#
+#  ---------------------------------------------------------------
+#
+# end_generated_IBM_copyright_prolog
+# -*- mode: makefile-gmake; -*-
+
+# note that the includes always happen but the effects of their contents are
+# affected by "if BUILD_PAMID"
+if BUILD_PAMID
+
+
+lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
+    src/mpid/pamid/src/coll/ired_scat/mpido_ired_scat.c
+
+
+endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/ired_scat/mpido_ired_scat.c b/src/mpid/pamid/src/coll/ired_scat/mpido_ired_scat.c
new file mode 100644
index 0000000..d67f2d4
--- /dev/null
+++ b/src/mpid/pamid/src/coll/ired_scat/mpido_ired_scat.c
@@ -0,0 +1,72 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/ired_scat/mpido_ired_scat.c
+ * \brief ???
+ */
+
+/*#define TRACE_ON*/
+#include <mpidimpl.h>
+
+int MPIDO_Ireduce_scatter(const void *sendbuf,
+                          void *recvbuf,
+                          const int *recvcounts,
+                          MPI_Datatype datatype,
+                          MPI_Op op,
+                          MPID_Comm *comm_ptr,
+                          MPID_Request **request)
+{
+   TRACE_ERR("Entering MPIDO_Ireduce_scatter\n");
+
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to the
+       * MPIR_Ireduce_scatter_impl() function to invoke the mpich nbc
+       * implementation of MPI_Ireduce_scatter().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Ireduce_scatter() with a blocking MPIR_Reduce_scatter_block().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH blocking reduce_scatter algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Reduce_scatter_impl(sendbuf, recvbuf, recvcounts, datatype,
+                                        op, comm_ptr, &mpierrno);
+
+      /*
+       * The blocking gather has completed - create and complete a
+       * MPID_Request object so the MPIR_Ireduce_scatter_impl() function
+       * does not perform an additional ireduce_scatter.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+   return 0;
+}
diff --git a/src/mpid/pamid/src/coll/ired_scat_block/Makefile.mk b/src/mpid/pamid/src/coll/ired_scat_block/Makefile.mk
new file mode 100644
index 0000000..abacf7a
--- /dev/null
+++ b/src/mpid/pamid/src/coll/ired_scat_block/Makefile.mk
@@ -0,0 +1,28 @@
+# begin_generated_IBM_copyright_prolog
+#
+# This is an automatically generated copyright prolog.
+# After initializing,  DO NOT MODIFY OR MOVE
+#  ---------------------------------------------------------------
+# Licensed Materials - Property of IBM
+# Blue Gene/Q 5765-PER 5765-PRP
+#
+# (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved
+# US Government Users Restricted Rights -
+# Use, duplication, or disclosure restricted
+# by GSA ADP Schedule Contract with IBM Corp.
+#
+#  ---------------------------------------------------------------
+#
+# end_generated_IBM_copyright_prolog
+# -*- mode: makefile-gmake; -*-
+
+# note that the includes always happen but the effects of their contents are
+# affected by "if BUILD_PAMID"
+if BUILD_PAMID
+
+
+lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
+    src/mpid/pamid/src/coll/ired_scat_block/mpido_ired_scat_block.c
+
+
+endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/ired_scat_block/mpido_ired_scat_block.c b/src/mpid/pamid/src/coll/ired_scat_block/mpido_ired_scat_block.c
new file mode 100644
index 0000000..a0a4fdc
--- /dev/null
+++ b/src/mpid/pamid/src/coll/ired_scat_block/mpido_ired_scat_block.c
@@ -0,0 +1,72 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/ired_scat_block/mpido_ired_scat_block.c
+ * \brief ???
+ */
+
+/*#define TRACE_ON*/
+#include <mpidimpl.h>
+
+int MPIDO_Ireduce_scatter_block(const void *sendbuf,
+                                void *recvbuf,
+                                int recvcount,
+                                MPI_Datatype datatype,
+                                MPI_Op op,
+                                MPID_Comm *comm_ptr,
+                                MPID_Request **request)
+{
+   TRACE_ERR("Entering MPIDO_Ireduce_scatter_block\n");
+
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to the
+       * MPIR_Ireduce_scatter_block_impl() function to invoke the mpich nbc
+       * implementation of MPI_Ireduce_scatter_block().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Ireduce_scatter_block() with a blocking MPIR_Reduce_scatter_block().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH blocking reduce_scatter_block algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Reduce_scatter_block_impl(sendbuf, recvbuf, recvcount, datatype,
+                                              op, comm_ptr, &mpierrno);
+
+      /*
+       * The blocking gather has completed - create and complete a
+       * MPID_Request object so the MPIR_Ireduce_scatter_block_impl() function
+       * does not perform an additional ireduce_scatter_block.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+   return 0;
+}
diff --git a/src/mpid/pamid/src/coll/reduce/Makefile.mk b/src/mpid/pamid/src/coll/reduce/Makefile.mk
index d6c7a21..b7421dc 100644
--- a/src/mpid/pamid/src/coll/reduce/Makefile.mk
+++ b/src/mpid/pamid/src/coll/reduce/Makefile.mk
@@ -22,7 +22,8 @@ if BUILD_PAMID
 
 
 lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
-    src/mpid/pamid/src/coll/reduce/mpido_reduce.c
+    src/mpid/pamid/src/coll/reduce/mpido_reduce.c                    \
+    src/mpid/pamid/src/coll/reduce/mpido_ireduce.c
 
 
 endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/reduce/mpido_ireduce.c b/src/mpid/pamid/src/coll/reduce/mpido_ireduce.c
new file mode 100644
index 0000000..63e178d
--- /dev/null
+++ b/src/mpid/pamid/src/coll/reduce/mpido_ireduce.c
@@ -0,0 +1,72 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/gather/mpido_ireduce.c
+ * \brief ???
+ */
+
+/*#define TRACE_ON*/
+#include <mpidimpl.h>
+
+int MPIDO_Ireduce(const void *sendbuf,
+                  void *recvbuf,
+                  int count,
+                  MPI_Datatype datatype,
+                  MPI_Op op,
+                  int root,
+                  MPID_Comm *comm_ptr,
+                  MPID_Request **request)
+
+{
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to the
+       * MPIR_Ireduce_impl() function to invoke the mpich nbc
+       * implementation of MPI_Ireduce().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Ireduce() with a blocking MPIR_Reduce().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH blocking reduce_algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Reduce_impl(sendbuf, recvbuf, count, datatype,
+                                op, root, comm_ptr, &mpierrno);
+
+      /*
+       * The blocking reduce has completed - create and complete a
+       * MPID_Request object so the MPIR_Ireduce_impl() function
+       * does not perform an additional ireduce.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+   return 0;
+}
diff --git a/src/mpid/pamid/src/coll/scan/mpido_scan.c b/src/mpid/pamid/src/coll/scan/mpido_scan.c
index 26d9fc5..6f58dd9 100644
--- a/src/mpid/pamid/src/coll/scan/mpido_scan.c
+++ b/src/mpid/pamid/src/coll/scan/mpido_scan.c
@@ -42,6 +42,47 @@ int MPIDO_Scan(const void *sendbuf, void *recvbuf,
                 op, comm_ptr, mpierrno, 0);
 }
 
+int MPIDO_Iscan(const void *sendbuf, void *recvbuf,
+                int count, MPI_Datatype datatype,
+                MPI_Op op, MPID_Comm * comm_ptr, MPID_Request **request)
+{
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to the
+       * MPIR_Iscan_impl() function to invoke the mpich nbc
+       * implementation of MPI_Iscan().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Iscan() with a blocking MPIR_Scan().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH blocking scan_algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Scan_impl(sendbuf, recvbuf, count, datatype,
+                              op, comm_ptr, &mpierrno);
+
+      /*
+       * The blocking scan has completed - create and complete a
+       * MPID_Request object so the MPIR_Iscan_impl() function
+       * does not perform an additional iscan.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+   return 0;
+}
    
 int MPIDO_Exscan(const void *sendbuf, void *recvbuf, 
                int count, MPI_Datatype datatype,
diff --git a/src/mpid/pamid/src/coll/scatter/Makefile.mk b/src/mpid/pamid/src/coll/scatter/Makefile.mk
index 2061411..b64f873 100644
--- a/src/mpid/pamid/src/coll/scatter/Makefile.mk
+++ b/src/mpid/pamid/src/coll/scatter/Makefile.mk
@@ -22,7 +22,8 @@ if BUILD_PAMID
 
 
 lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
-    src/mpid/pamid/src/coll/scatter/mpido_scatter.c
+    src/mpid/pamid/src/coll/scatter/mpido_scatter.c                  \
+    src/mpid/pamid/src/coll/scatter/mpido_iscatter.c
 
 
 endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/scatter/mpido_iscatter.c b/src/mpid/pamid/src/coll/scatter/mpido_iscatter.c
new file mode 100644
index 0000000..f13a192
--- /dev/null
+++ b/src/mpid/pamid/src/coll/scatter/mpido_iscatter.c
@@ -0,0 +1,74 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/scatter/mpido_iscatter.c
+ * \brief ???
+ */
+
+/*#define TRACE_ON */
+
+#include <mpidimpl.h>
+
+int MPIDO_Iscatter(const void *sendbuf,
+                   int sendcount,
+                   MPI_Datatype sendtype,
+                   void *recvbuf,
+                   int recvcount,
+                   MPI_Datatype recvtype,
+                   int root,
+                   MPID_Comm *comm_ptr,
+                   MPID_Request **request)
+{
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to the
+       * MPIR_Iscatter_impl() function to invoke the mpich nbc
+       * implementation of MPI_Iscatter().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Iscatter() with a blocking MPIR_Scatter().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH blocking scatter_algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Scatter_impl(sendbuf, sendcount, sendtype,
+                                 recvbuf, recvcount, recvtype,
+                                 root, comm_ptr, &mpierrno);
+
+      /*
+       * The blocking scatter has completed - create and complete a
+       * MPID_Request object so the MPIR_Iscatter_impl() function
+       * does not perform an additional iscatter.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+   return 0;
+}
diff --git a/src/mpid/pamid/src/coll/scatterv/Makefile.mk b/src/mpid/pamid/src/coll/scatterv/Makefile.mk
index fa6aa44..d0f9e51 100644
--- a/src/mpid/pamid/src/coll/scatterv/Makefile.mk
+++ b/src/mpid/pamid/src/coll/scatterv/Makefile.mk
@@ -22,7 +22,8 @@ if BUILD_PAMID
 
 
 lib_lib at MPILIBNAME@_la_SOURCES +=                                    \
-    src/mpid/pamid/src/coll/scatterv/mpido_scatterv.c
+    src/mpid/pamid/src/coll/scatterv/mpido_scatterv.c                \
+    src/mpid/pamid/src/coll/scatterv/mpido_iscatterv.c
 
 
 endif BUILD_PAMID
diff --git a/src/mpid/pamid/src/coll/scatterv/mpido_iscatterv.c b/src/mpid/pamid/src/coll/scatterv/mpido_iscatterv.c
new file mode 100644
index 0000000..847ac7e
--- /dev/null
+++ b/src/mpid/pamid/src/coll/scatterv/mpido_iscatterv.c
@@ -0,0 +1,73 @@
+/* begin_generated_IBM_copyright_prolog                             */
+/*                                                                  */
+/* This is an automatically generated copyright prolog.             */
+/* After initializing,  DO NOT MODIFY OR MOVE                       */
+/*  --------------------------------------------------------------- */
+/* Licensed Materials - Property of IBM                             */
+/* Blue Gene/Q 5765-PER 5765-PRP                                    */
+/*                                                                  */
+/* (C) Copyright IBM Corp. 2011, 2012 All Rights Reserved           */
+/* US Government Users Restricted Rights -                          */
+/* Use, duplication, or disclosure restricted                       */
+/* by GSA ADP Schedule Contract with IBM Corp.                      */
+/*                                                                  */
+/*  --------------------------------------------------------------- */
+/*                                                                  */
+/* end_generated_IBM_copyright_prolog                               */
+/*  (C)Copyright IBM Corp.  2007, 2011  */
+/**
+ * \file src/coll/scatterv/mpido_iscatterv.c
+ * \brief ???
+ */
+
+#include <mpidimpl.h>
+
+int MPIDO_Iscatterv(const void *sendbuf,
+                    const int *sendcounts,
+                    const int *displs,
+                    MPI_Datatype sendtype,
+                    void *recvbuf,
+                    int recvcount,
+                    MPI_Datatype recvtype,
+                    int root,
+                    MPID_Comm *comm_ptr,
+                    MPID_Request **request)
+{
+   /*if (unlikely((data_size == 0) || (user_selected_type == MPID_COLL_USE_MPICH)))*/
+   {
+      /*
+       * If the mpich mpir non-blocking collectives are enabled, return without
+       * first constructing the MPID_Request. This signals to the
+       * MPIR_Iscatterv_impl() function to invoke the mpich nbc
+       * implementation of MPI_Iscatterv().
+       */
+      if (MPIDI_Process.mpir_nbc != 0)
+       return 0;
+
+      /*
+       * MPIR_* nbc implementation is not enabled. Fake a non-blocking
+       * MPIR_Iscatterv() with a blocking MPIR_Scatterv().
+       */
+      if(unlikely(MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL && comm_ptr->rank == 0))
+         fprintf(stderr,"Using MPICH blocking scatterv_algorithm\n");
+
+      int mpierrno = 0;
+      int rc = MPIR_Scatterv_impl(sendbuf, sendcounts, displs, sendtype,
+                                 recvbuf, recvcount, recvtype,
+                                 root, comm_ptr, &mpierrno);
+
+      /*
+       * The blocking scatterv has completed - create and complete a
+       * MPID_Request object so the MPIR_Iscatterv_impl() function
+       * does not perform an additional iscatterv.
+       */
+      MPID_Request * mpid_request = MPID_Request_create_inline();
+      mpid_request->kind = MPID_COLL_REQUEST;
+      *request = mpid_request;
+      MPIDI_Request_complete_norelease_inline(mpid_request);
+
+      return rc;
+   }
+
+   return 0;
+}
diff --git a/src/mpid/pamid/src/comm/mpid_selectcolls.c b/src/mpid/pamid/src/comm/mpid_selectcolls.c
index 571eb68..2e658e9 100644
--- a/src/mpid/pamid/src/comm/mpid_selectcolls.c
+++ b/src/mpid/pamid/src/comm/mpid_selectcolls.c
@@ -690,6 +690,49 @@ void MPIDI_Comm_coll_query(MPID_Comm *comm)
    comm->coll_fns->Scan         = MPIDO_Scan;
    comm->coll_fns->Exscan       = MPIDO_Exscan;
 
+   /* MPI-3 Support, no optimized collectives hooked in yet */
+   comm->coll_fns->Ibarrier              = MPIR_Ibarrier_intra;
+   comm->coll_fns->Ibcast                = MPIR_Ibcast_intra;
+   comm->coll_fns->Igather               = MPIR_Igather_intra;
+   comm->coll_fns->Igatherv              = MPIR_Igatherv;
+   comm->coll_fns->Iscatter              = MPIR_Iscatter_intra;
+   comm->coll_fns->Iscatterv             = MPIR_Iscatterv;
+   comm->coll_fns->Iallgather            = MPIR_Iallgather_intra;
+   comm->coll_fns->Iallgatherv           = MPIR_Iallgatherv_intra;
+   comm->coll_fns->Ialltoall             = MPIR_Ialltoall_intra;
+   comm->coll_fns->Ialltoallv            = MPIR_Ialltoallv_intra;
+   comm->coll_fns->Ialltoallw            = MPIR_Ialltoallw_intra;
+   comm->coll_fns->Iallreduce            = MPIR_Iallreduce_intra;
+   comm->coll_fns->Ireduce               = MPIR_Ireduce_intra;
+   comm->coll_fns->Ireduce_scatter       = MPIR_Ireduce_scatter_intra;
+   comm->coll_fns->Ireduce_scatter_block = MPIR_Ireduce_scatter_block_intra;
+   comm->coll_fns->Iscan                 = MPIR_Iscan_rec_dbl;
+   comm->coll_fns->Iexscan               = MPIR_Iexscan;
+   comm->coll_fns->Neighbor_allgather    = MPIR_Neighbor_allgather_default;
+   comm->coll_fns->Neighbor_allgatherv   = MPIR_Neighbor_allgatherv_default;
+   comm->coll_fns->Neighbor_alltoall     = MPIR_Neighbor_alltoall_default;
+   comm->coll_fns->Neighbor_alltoallv    = MPIR_Neighbor_alltoallv_default;
+   comm->coll_fns->Neighbor_alltoallw    = MPIR_Neighbor_alltoallw_default;
+
+   /* MPI-3 Support, optimized collectives hooked in */
+   comm->coll_fns->Ibarrier_optimized              = MPIDO_Ibarrier;
+   comm->coll_fns->Ibcast_optimized                = MPIDO_Ibcast;
+   comm->coll_fns->Iallgather_optimized            = MPIDO_Iallgather;
+   comm->coll_fns->Iallgatherv_optimized           = MPIDO_Iallgatherv;
+   comm->coll_fns->Iallreduce_optimized            = MPIDO_Iallreduce;
+   comm->coll_fns->Ialltoall_optimized             = MPIDO_Ialltoall;
+   comm->coll_fns->Ialltoallv_optimized            = MPIDO_Ialltoallv;
+   comm->coll_fns->Ialltoallw_optimized            = MPIDO_Ialltoallw;
+   comm->coll_fns->Iexscan_optimized               = MPIDO_Iexscan;
+   comm->coll_fns->Igather_optimized               = MPIDO_Igather;
+   comm->coll_fns->Igatherv_optimized              = MPIDO_Igatherv;
+   comm->coll_fns->Ireduce_scatter_block_optimized = MPIDO_Ireduce_scatter_block;
+   comm->coll_fns->Ireduce_scatter_optimized       = MPIDO_Ireduce_scatter;
+   comm->coll_fns->Ireduce_optimized               = MPIDO_Ireduce;
+   comm->coll_fns->Iscan_optimized                 = MPIDO_Iscan;
+   comm->coll_fns->Iscatter_optimized              = MPIDO_Iscatter;
+   comm->coll_fns->Iscatterv_optimized             = MPIDO_Iscatterv;
+
    TRACE_ERR("MPIDI_Comm_coll_query exit\n");
 }
 

-----------------------------------------------------------------------

Summary of changes:
 src/include/mpiimpl.h                              |   79 +++++++++++---
 src/mpi/coll/iallgather.c                          |   32 ++++--
 src/mpi/coll/iallgatherv.c                         |   32 ++++--
 src/mpi/coll/iallreduce.c                          |   36 ++++--
 src/mpi/coll/ialltoall.c                           |   18 +++-
 src/mpi/coll/ialltoallv.c                          |   18 +++-
 src/mpi/coll/ialltoallw.c                          |   18 +++-
 src/mpi/coll/ibarrier.c                            |   33 ++++--
 src/mpi/coll/ibcast.c                              |   35 ++++--
 src/mpi/coll/iexscan.c                             |   18 +++-
 src/mpi/coll/igather.c                             |   24 +++-
 src/mpi/coll/igatherv.c                            |   20 +++-
 src/mpi/coll/ired_scat.c                           |   34 ++++--
 src/mpi/coll/ired_scat_block.c                     |   34 ++++--
 src/mpi/coll/ireduce.c                             |   34 ++++--
 src/mpi/coll/iscan.c                               |   31 ++++--
 src/mpi/coll/iscatter.c                            |   25 +++-
 src/mpi/coll/iscatterv.c                           |   21 +++-
 src/mpi/comm/commutil.c                            |   76 +++++++-------
 src/mpid/pamid/include/mpidi_prototypes.h          |   48 ++++++++
 src/mpid/pamid/src/coll/Makefile.mk                |    5 +-
 src/mpid/pamid/src/coll/allgather/Makefile.mk      |    3 +-
 .../pamid/src/coll/allgather/mpido_iallgather.c    |   73 ++++++++++++
 src/mpid/pamid/src/coll/allgatherv/Makefile.mk     |    3 +-
 .../pamid/src/coll/allgatherv/mpido_iallgatherv.c  |   77 +++++++++++++
 src/mpid/pamid/src/coll/allreduce/Makefile.mk      |    3 +-
 .../pamid/src/coll/allreduce/mpido_iallreduce.c    |   73 ++++++++++++
 src/mpid/pamid/src/coll/alltoall/Makefile.mk       |    3 +-
 src/mpid/pamid/src/coll/alltoall/mpido_ialltoall.c |   76 +++++++++++++
 src/mpid/pamid/src/coll/alltoallv/Makefile.mk      |    3 +-
 .../pamid/src/coll/alltoallv/mpido_ialltoallv.c    |   79 +++++++++++++
 src/mpid/pamid/src/coll/alltoallw/Makefile.mk      |   28 +++++
 .../pamid/src/coll/alltoallw/mpido_ialltoallw.c    |   79 +++++++++++++
 src/mpid/pamid/src/coll/barrier/Makefile.mk        |    3 +-
 src/mpid/pamid/src/coll/barrier/mpido_ibarrier.c   |  116 ++++++++++++++++++++
 src/mpid/pamid/src/coll/bcast/Makefile.mk          |    3 +-
 src/mpid/pamid/src/coll/bcast/mpido_ibcast.c       |   86 +++++++++++++++
 src/mpid/pamid/src/coll/exscan/Makefile.mk         |   28 +++++
 src/mpid/pamid/src/coll/exscan/mpido_iexscan.c     |   65 +++++++++++
 src/mpid/pamid/src/coll/gather/Makefile.mk         |    3 +-
 src/mpid/pamid/src/coll/gather/mpido_igather.c     |   72 ++++++++++++
 src/mpid/pamid/src/coll/gatherv/Makefile.mk        |    3 +-
 src/mpid/pamid/src/coll/gatherv/mpido_igatherv.c   |   78 +++++++++++++
 src/mpid/pamid/src/coll/ired_scat/Makefile.mk      |   28 +++++
 .../pamid/src/coll/ired_scat/mpido_ired_scat.c     |   72 ++++++++++++
 .../pamid/src/coll/ired_scat_block/Makefile.mk     |   28 +++++
 .../coll/ired_scat_block/mpido_ired_scat_block.c   |   72 ++++++++++++
 src/mpid/pamid/src/coll/reduce/Makefile.mk         |    3 +-
 src/mpid/pamid/src/coll/reduce/mpido_ireduce.c     |   72 ++++++++++++
 src/mpid/pamid/src/coll/scan/mpido_scan.c          |   41 +++++++
 src/mpid/pamid/src/coll/scatter/Makefile.mk        |    3 +-
 src/mpid/pamid/src/coll/scatter/mpido_iscatter.c   |   74 +++++++++++++
 src/mpid/pamid/src/coll/scatterv/Makefile.mk       |    3 +-
 src/mpid/pamid/src/coll/scatterv/mpido_iscatterv.c |   73 ++++++++++++
 src/mpid/pamid/src/comm/mpid_selectcolls.c         |   43 +++++++
 55 files changed, 1948 insertions(+), 192 deletions(-)
 create mode 100644 src/mpid/pamid/src/coll/allgather/mpido_iallgather.c
 create mode 100644 src/mpid/pamid/src/coll/allgatherv/mpido_iallgatherv.c
 create mode 100644 src/mpid/pamid/src/coll/allreduce/mpido_iallreduce.c
 create mode 100644 src/mpid/pamid/src/coll/alltoall/mpido_ialltoall.c
 create mode 100644 src/mpid/pamid/src/coll/alltoallv/mpido_ialltoallv.c
 create mode 100644 src/mpid/pamid/src/coll/alltoallw/Makefile.mk
 create mode 100644 src/mpid/pamid/src/coll/alltoallw/mpido_ialltoallw.c
 create mode 100644 src/mpid/pamid/src/coll/barrier/mpido_ibarrier.c
 create mode 100644 src/mpid/pamid/src/coll/bcast/mpido_ibcast.c
 create mode 100644 src/mpid/pamid/src/coll/exscan/Makefile.mk
 create mode 100644 src/mpid/pamid/src/coll/exscan/mpido_iexscan.c
 create mode 100644 src/mpid/pamid/src/coll/gather/mpido_igather.c
 create mode 100644 src/mpid/pamid/src/coll/gatherv/mpido_igatherv.c
 create mode 100644 src/mpid/pamid/src/coll/ired_scat/Makefile.mk
 create mode 100644 src/mpid/pamid/src/coll/ired_scat/mpido_ired_scat.c
 create mode 100644 src/mpid/pamid/src/coll/ired_scat_block/Makefile.mk
 create mode 100644 src/mpid/pamid/src/coll/ired_scat_block/mpido_ired_scat_block.c
 create mode 100644 src/mpid/pamid/src/coll/reduce/mpido_ireduce.c
 create mode 100644 src/mpid/pamid/src/coll/scatter/mpido_iscatter.c
 create mode 100644 src/mpid/pamid/src/coll/scatterv/mpido_iscatterv.c


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list