[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2-301-g0ad10e8

Service Account noreply at mpich.org
Fri May 6 11:12:45 CDT 2016


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  0ad10e8000a033d1836ceb493be234773db143f4 (commit)
       via  69567c614fd1285257c62d91a06128819cf98e3e (commit)
       via  bcf8e55a21eaa265a1aae00467dd30185043229d (commit)
       via  16faa92ccb6cb3b0dec392247beea733093c527a (commit)
       via  9fcf5f3e32b80e1419a71736203f4964875709e5 (commit)
       via  5ba5ff318b8a4668d535dea08e910288288184eb (commit)
       via  c4473173e550cc572d908ac1f7dd3c9e459c5405 (commit)
       via  50b21ecddabd41bdfcded62276b7ce9da699a7e5 (commit)
      from  7d511439295df1c3ef918022a4954930288ccf6b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/0ad10e8000a033d1836ceb493be234773db143f4

commit 0ad10e8000a033d1836ceb493be234773db143f4
Author: Pavan Balaji <balaji at anl.gov>
Date:   Sun Apr 24 12:58:51 2016 -0500

    Make MPIR, MPIU, MPID, MPII, MPICH namespace consistent.
    
    MPIR should only be used for functionality that is exposed by the
    MPI-layer downward to the device.  Other functionality owned by the
    MPI layer that is used internally within that layer should be called
    MPII.  MPID functionality is device-specific functionality that is
    exposed to the MPI layer.  MPIU namespace is being removed for now.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/autogen.sh b/autogen.sh
index 408084e..c5dd677 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -787,7 +787,7 @@ static const int generic_msgs_len = 0;
 static msgpair generic_err_msgs[] = { {0xacebad03, 0, "no error catalog", 0xcb0bfa11}, };
 static const int specific_msgs_len = 0;
 static msgpair specific_err_msgs[] = {  {0xacebad03,0,0,0xcb0bfa11}, };
-#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG_NONE
+#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG__NONE
 #define MPIR_MAX_ERROR_CLASS_INDEX 54
 static int class_to_index[] = {
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
diff --git a/confdb/aclocal_cc.m4 b/confdb/aclocal_cc.m4
index 92e7ee1..fd7152b 100644
--- a/confdb/aclocal_cc.m4
+++ b/confdb/aclocal_cc.m4
@@ -521,7 +521,7 @@ if test "$enable_strict_done" != "yes" ; then
     # compiler.
     #   -Wno-type-limits -- There are places where we compare an unsigned to 
     #	    a constant that happens to be zero e.g., if x is unsigned and 
-    #	    MIN_VAL is zero, we'd like to do "MPIU_Assert(x >= MIN_VAL);".
+    #	    MIN_VAL is zero, we'd like to do "MPIR_Assert(x >= MIN_VAL);".
     #       Note this option is not supported by gcc 4.2.  This needs to be added 
     #	    after most other warning flags, so that we catch a gcc bug on 32-bit 
     #	    that doesn't give a warning that this is unsupported, unless another
diff --git a/configure.ac b/configure.ac
index de7728b..637b0cc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -938,16 +938,16 @@ AC_SUBST([HAVE_ERROR_CHECKING])
 # error-messages
 case "$enable_error_messages" in 
     no|none)
-        error_message_kind="MPICH_ERROR_MSG_NONE"
+        error_message_kind="MPICH_ERROR_MSG__NONE"
     ;;
     all|yes)
-	error_message_kind="MPICH_ERROR_MSG_ALL"
+	error_message_kind="MPICH_ERROR_MSG__ALL"
     ;;
     generic)
-	error_message_kind="MPICH_ERROR_MSG_GENERIC"
+	error_message_kind="MPICH_ERROR_MSG__GENERIC"
     ;;
     class)
-	error_message_kind="MPICH_ERROR_MSG_CLASS"
+	error_message_kind="MPICH_ERROR_MSG__CLASS"
     ;;
     *)
     AC_MSG_WARN([Unknown value $enable_error_messages for enable-error-messages])
@@ -1114,14 +1114,14 @@ fi
 if test "$timing_name" != "none" ; then
     timing_kind=`echo $timing_name | \
        tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'`
-    timing_kind=MPID_TIMING_KIND_$timing_kind
+    timing_kind=MPICH_TIMING_KIND__$timing_kind
     AC_DEFINE_UNQUOTED(HAVE_TIMING,$timing_kind,[define to enable timing collection])
     if test "$collect_stats" = "true" ; then
         AC_DEFINE(COLLECT_STATS,1,[define to enable collection of statistics])
     fi
 fi
 
-use_logging_variable="MPID_LOGGING_`echo $logging_name | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'`"
+use_logging_variable="MPICH_LOGGING__`echo $logging_name | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'`"
 AC_DEFINE_UNQUOTED(USE_LOGGING,$use_logging_variable,[define to choose logging library])
 # ----------------------------------------------------------------------------
 # End of logging tests
@@ -1350,20 +1350,20 @@ if test "$MPICH_THREAD_LEVEL" != "MPI_THREAD_SINGLE"; then
 fi
 
 # Check for value thread_cs choice; set the refcount default if necessary
-thread_granularity=MPICH_THREAD_GRANULARITY_SINGLE
-thread_refcount=MPIU_REFCOUNT_NONE
+thread_granularity=MPICH_THREAD_GRANULARITY__SINGLE
+thread_refcount=MPICH_REFCOUNT__NONE
 if test "$enable_threads" = "multiple" ; then
     case $enable_thread_cs in 
     global)
-    thread_granularity=MPICH_THREAD_GRANULARITY_GLOBAL
+    thread_granularity=MPICH_THREAD_GRANULARITY__GLOBAL
     if test "$enable_refcount" = "default" ; then enable_refcount=none ; fi
     ;;
     per-object|per_object)
-    thread_granularity=MPICH_THREAD_GRANULARITY_PER_OBJECT
+    thread_granularity=MPICH_THREAD_GRANULARITY__POBJ
     if test "$enable_refcount" = "default" ; then enable_refcount=lock-free ; fi
     ;;
     lock-free|lock_free|lockfree)
-    thread_granularity=MPICH_THREAD_GRANULARITY_LOCK_FREE
+    thread_granularity=MPICH_THREAD_GRANULARITY__LOCKFREE
     if test "$enable_refcount" = "default" ; then enable_refcount=lock-free ; fi
     if test "$enable_predefined_refcount" = "default" ; then enable_predefined_refcount=no ; fi
     AC_MSG_ERROR([--enable-thread-cs=lock-free is not supported yet, please select a different granularity])
@@ -1375,10 +1375,10 @@ if test "$enable_threads" = "multiple" ; then
 
     case $enable_refcount in
     lock-free|lock_free|lockfree)
-    thread_refcount=MPIU_REFCOUNT_LOCKFREE
+    thread_refcount=MPICH_REFCOUNT__LOCKFREE
     ;;
     none)
-    thread_refcount=MPIU_REFCOUNT_NONE
+    thread_refcount=MPICH_REFCOUNT__NONE
     ;;
     *)
     AC_MSG_ERROR([Unrecognized value $enable_refcount for --enable-refcount])
@@ -1388,10 +1388,10 @@ fi
 AC_DEFINE_UNQUOTED([MPICH_THREAD_GRANULARITY],$thread_granularity,[Method used to implement atomic updates and access])
 
 if test "$enable_predefined_refcount" = "no" ; then
-    AC_DEFINE([MPIU_THREAD_SUPPRESS_PREDEFINED_REFCOUNTS],[1],[define to disable reference counting predefined objects like MPI_COMM_WORLD])
+    AC_DEFINE([MPICH_THREAD_SUPPRESS_PREDEFINED_REFCOUNTS],[1],[define to disable reference counting predefined objects like MPI_COMM_WORLD])
 fi
 
-AC_DEFINE_UNQUOTED([MPIU_THREAD_REFCOUNT],$thread_refcount,[Method used to implement refcount updates])
+AC_DEFINE_UNQUOTED([MPICH_THREAD_REFCOUNT],$thread_refcount,[Method used to implement refcount updates])
 
 # enable-g
 # strip off multiple options, separated by commas
@@ -4541,7 +4541,7 @@ if test "$ac_cv_func_vsnprintf" = "yes" ; then
 #include <stdarg.h>],vsnprintf)
 fi
 # We would like to use strerror in the file namepublisher; it is also used
-# in MPIU_Strerror (whose implementation is broken if strerror is not found)
+# in MPIR_Strerror (whose implementation is broken if strerror is not found)
 AC_CHECK_FUNCS(strerror strncasecmp)
 AC_FUNC_STRERROR_R
 if test "$ac_cv_func_strerror_r" = "yes" ; then
@@ -4731,7 +4731,7 @@ fi
 
 # with MPI_AINT defined, now we can
 # Get the size for the bsendoverhead
-AC_CHECK_SIZEOF(MPIR_Bsend_data_t,0,[
+AC_CHECK_SIZEOF(MPII_Bsend_data_t,0,[
 #define MPI_Datatype int
 typedef $MPI_AINT MPI_Aint;
 #ifdef HAVE_STDLIB_H
@@ -4740,16 +4740,16 @@ typedef $MPI_AINT MPI_Aint;
 #ifdef HAVE_STDINT_H
 #include <stdint.h>
 #endif
-#include "${master_top_srcdir}/src/include/mpir_bsend.h"]
+#include "${master_top_srcdir}/src/include/mpii_bsend.h"]
 )
-if test "$ac_cv_sizeof_MPIR_Bsend_data_t" = "0" ; then
+if test "$ac_cv_sizeof_MPII_Bsend_data_t" = "0" ; then
     AC_MSG_ERROR([Unable to determine the size of MPI_BSEND_OVERHEAD])
     # In the past, a default of 128 was used (still likely good enough),
     # but the autoconf SIZEOF macro has been changed to ignore the second
     # argument, so code that depended on the prior defined behavior now
     # silently breaks.
 fi
-BSEND_OVERHEAD=$ac_cv_sizeof_MPIR_Bsend_data_t
+BSEND_OVERHEAD=$ac_cv_sizeof_MPII_Bsend_data_t
 export BSEND_OVERHEAD
 AC_SUBST(BSEND_OVERHEAD)
 
diff --git a/maint/decode_handle b/maint/decode_handle
index 9b740fe..8895f03 100755
--- a/maint/decode_handle
+++ b/maint/decode_handle
@@ -60,7 +60,7 @@ while (scalar @ARGV) {
 ## code from src/include/mpir_objects.h from which this script was derived
 ## ----8<----
 ##
-## typedef enum MPIR_Object_kind {
+## typedef enum MPII_Object_kind {
 ##   MPIR_COMM       = 0x1,
 ##   MPIR_GROUP      = 0x2,
 ##   MPIR_DATATYPE   = 0x3,
@@ -75,14 +75,14 @@ while (scalar @ARGV) {
 ##   MPIR_PROCGROUP  = 0xc,               /* These are internal device objects */
 ##   MPIR_VCONN      = 0xd,
 ##   MPIR_GREQ_CLASS = 0xf
-##   } MPIR_Object_kind;
+##   } MPII_Object_kind;
 ##
 ## #define HANDLE_MPI_KIND_SHIFT 26
 ## #define HANDLE_GET_MPI_KIND(a) ( ((a)&0x3c000000) >> HANDLE_MPI_KIND_SHIFT )
 ## #define HANDLE_SET_MPI_KIND(a,kind) ((a) | ((kind) << HANDLE_MPI_KIND_SHIFT))
 ## 
 ## /* returns the name of the handle kind for debugging/logging purposes */
-## const char *MPIU_Handle_get_kind_str(int kind);
+## const char *MPIR_Handle_get_kind_str(int kind);
 ## 
 ## /* Handle types.  These are really 2 bits */
 ## #define HANDLE_KIND_INVALID  0x0
diff --git a/maint/extractcvars.in b/maint/extractcvars.in
index c352201..6e67733 100755
--- a/maint/extractcvars.in
+++ b/maint/extractcvars.in
@@ -170,7 +170,7 @@ foreach my $p (@cvars) {
 print OUTPUT_H <<EOT;
 
 /* TODO: this should be defined elsewhere */
-#define ${ns}_assert MPIU_Assert
+#define ${ns}_assert MPIR_Assert
 
 /* Arbitrary, simplifies interaction with external interfaces like MPI_T_ */
 #define ${uc_ns}_MAX_STRLEN (384)
@@ -350,7 +350,7 @@ print OUTPUT_C <<EOT;
         ${var_name} = MPL_strdup(tmp_str);
         ${ns}_assert(${var_name});
         if (${var_name} == NULL) {
-            MPIU_CHKMEM_SETERR(mpi_errno, strlen(tmp_str), "dup of string for ${var_name}");
+            MPIR_CHKMEM_SETERR(mpi_errno, strlen(tmp_str), "dup of string for ${var_name}");
             goto fn_fail;
         }
     }
diff --git a/maint/extracterrmsgs b/maint/extracterrmsgs
index 16f8c22..14f0880 100755
--- a/maint/extracterrmsgs
+++ b/maint/extracterrmsgs
@@ -159,7 +159,7 @@ sub CreateErrmsgsHeader {
  * This file automatically created by extracterrmsgs\
  * DO NOT EDIT\
  */\n";
-    print $FD "#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG_CLASS
+    print $FD "#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG__CLASS
 typedef struct msgpair {
         const unsigned int sentinal1;
         const char *short_name, *long_name; 
@@ -214,7 +214,7 @@ sub CreateErrMsgMapping {
 
     # For the case of classes only, output the strings for the class 
     # messages
-    print $OUTFD "#if MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG_CLASS\n";
+    print $OUTFD "#if MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG__CLASS\n";
     print $OUTFD "#define MPIR_MAX_ERROR_CLASS_INDEX $#class_msgs+1\n";
     print $OUTFD "static const char *classToMsg[] = {\n";
     for (my $i=0; $i<=$#class_msgs; $i++) {
@@ -230,7 +230,7 @@ sub CreateErrMsgMapping {
     # The long messages must be available for the generic message output.
     # An alternative is to separate the short from the long messages;
     # the long messages are needed for > MSG_NONE, the short for > MSG_CLASS.
-    print $OUTFD "#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG_CLASS\n";
+    print $OUTFD "#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG__CLASS\n";
     print $OUTFD "/* The names are in sorted order, allowing the use of a simple\
   linear search or bisection algorithm to find the message corresponding to\
   a particular message */\n";
@@ -284,7 +284,7 @@ sub CreateErrMsgMapping {
 
     $num = 0;
     # Now output the instance specific messages
-    print $OUTFD "#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG_GENERIC\n";
+    print $OUTFD "#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG__GENERIC\n";
     foreach $key (sort keys %specific_msgs)
     {
 	$longvalue = "\"\0\"";
@@ -320,7 +320,7 @@ sub CreateErrMsgMapping {
     print $OUTFD "};\n";
     print $OUTFD "#endif\n\n";
 
-    print $OUTFD "#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG_CLASS\n";
+    print $OUTFD "#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG__CLASS\n";
     $maxval = $#class_msgs + 1;
     print $OUTFD "#define MPIR_MAX_ERROR_CLASS_INDEX $maxval\n";
     print $OUTFD "static int class_to_index[] = {\n";
diff --git a/maint/genstates.in b/maint/genstates.in
index 6f71ba0..e96cbdc 100644
--- a/maint/genstates.in
+++ b/maint/genstates.in
@@ -252,9 +252,9 @@ print F "#ifndef MPICH_MPI_FROM_PMPI\n";
 print F "\n";
 print F "#ifdef HAVE_TIMING\n";
 print F "\n";
-print F "#if (USE_LOGGING == MPID_LOGGING_RLOG)\n";
+print F "#if (USE_LOGGING == MPICH_LOGGING__RLOG)\n";
 print F "\n";
-print F "int MPIR_Describe_timer_states()\n";
+print F "int MPII_Describe_timer_states()\n";
 print F "{\n";
 print F "\n";
 foreach (@states)
@@ -267,6 +267,6 @@ foreach (@states)
 print F "    return 0;\n";
 print F "}\n";
 print F "\n";
-print F "#endif /* USE_LOGGING == MPID_LOGGING_RLOG */\n";
+print F "#endif /* USE_LOGGING == MPICH_LOGGING__RLOG */\n";
 print F "#endif /* HAVE_TIMING */\n";
 print F "#endif /* MPICH_MPI_FROM_PMPI */\n";
diff --git a/mpi.def b/mpi.def
index c7e3bf9..861bfd5 100644
--- a/mpi.def
+++ b/mpi.def
@@ -717,20 +717,20 @@ EXPORTS
 ;
 ; mpich extra and renamed symbols
 ;
-    MPIR_CommGetAttr
-    MPIR_CommGetAttr_fort
-    MPIR_CommSetAttr
-    MPIR_TypeGetAttr
-    MPIR_TypeSetAttr
-    MPIR_WinSetAttr
-    MPIR_WinGetAttr
-    MPIR_Keyval_set_proxy
-	MPIR_Keyval_set_fortran
-	MPIR_Keyval_set_fortran90
-	MPIR_Grequest_set_lang_f77
-	MPIR_Keyval_set_cxx
-	MPIR_Errhandler_set_cxx
-	MPIR_Op_set_cxx
+    MPII_Comm_get_attr
+    MPII_Comm_get_attr_fort
+    MPII_Comm_set_attr
+    MPII_Type_get_attr
+    MPII_Type_set_attr
+    MPII_Win_set_attr
+    MPII_Win_get_attr
+    MPII_Keyval_set_proxy
+	MPII_Keyval_set_fortran
+	MPII_Keyval_set_fortran90
+	MPII_Grequest_set_lang_f77
+	MPII_Keyval_set_cxx
+	MPII_Errhandler_set_cxx
+	MPII_Op_set_cxx
 	MPID_Wtick
 	MPID_Wtime_todouble
 	MPIR_Dup_fn
diff --git a/mpich.def b/mpich.def
index 0ea5f15..8e51fa0 100644
--- a/mpich.def
+++ b/mpich.def
@@ -2,24 +2,24 @@
 
 EXPORTS
 ; Fortran values
-;	MPIR_Keyval_set_fortran
-;	MPIR_Keyval_set_fortran90
-	MPIR_Grequest_set_lang_f77
+;	MPII_Keyval_set_fortran
+;	MPII_Keyval_set_fortran90
+	MPII_Grequest_set_lang_f77
 	MPI_F_STATUS_IGNORE
 	MPI_F_STATUSES_IGNORE
 ; C++ values
-;	MPIR_Keyval_set_cxx
-	MPIR_Errhandler_set_cxx
-	MPIR_Op_set_cxx
+;	MPII_Keyval_set_cxx
+	MPII_Errhandler_set_cxx
+	MPII_Op_set_cxx
 ; misc
-    MPIR_CommGetAttr
-    MPIR_CommGetAttr_fort
-    MPIR_CommSetAttr
-    MPIR_TypeGetAttr
-    MPIR_TypeSetAttr
-    MPIR_WinGetAttr
-    MPIR_WinSetAttr
-    MPIR_Keyval_set_proxy
+    MPII_Comm_get_attr
+    MPII_Comm_get_attr_fort
+    MPII_Comm_set_attr
+    MPII_Type_get_attr
+    MPII_Type_set_attr
+    MPII_Win_get_attr
+    MPII_Win_set_attr
+    MPII_Keyval_set_proxy
 	MPIR_Err_create_code
 	MPIR_Err_return_comm
 	MPID_Wtick
diff --git a/src/binding/cxx/buildiface b/src/binding/cxx/buildiface
index 607cea3..ab0b1a4 100755
--- a/src/binding/cxx/buildiface
+++ b/src/binding/cxx/buildiface
@@ -2328,7 +2328,7 @@ void operator delete[](void *p) {
     extern \"C\" {
 typedef void (*mpircallback)(void);
 }
-extern \"C\" void MPIR_Op_set_cxx( MPI_Op, void (*)(void) );
+extern \"C\" void MPII_Op_set_cxx( MPI_Op, void (*)(void) );
 extern \"C\" 
 void MPIR_Call_op_fn( void *invec, void *outvec, int len, MPI_Datatype dtype,
 		     User_function *uop )
@@ -2342,7 +2342,7 @@ void Op::Init( User_function *f, bool commute )
      print $OUTFD "\
 	MPIX_CALLWORLD( MPI_Op_create( (MPI_User_function *)f, 
 			(int) commute, &the_real_op ) ); 
-	MPIR_Op_set_cxx( the_real_op, (mpircallback) MPIR_Call_op_fn );\n";
+	MPII_Op_set_cxx( the_real_op, (mpircallback) MPIR_Call_op_fn );\n";
 &printCoverageEnd( $OUTFD, "Op_create", 2 );
 print $OUTFD "\
     }\n";
@@ -2356,14 +2356,14 @@ MPIR_Comm_delete_attr_cxx_proxy(
     MPI_Comm_delete_attr_function* user_function,
     MPI_Comm comm,
     int keyval,
-    MPIR_AttrType attrib_type,
+    MPIR_Attr_type attrib_type,
     void* attrib,
     void* extra_state
     )
 {
     void *value = 0;
     /* Make sure that the attribute value is delivered as a pointer */
-    if(MPIR_ATTR_KIND(attrib_type) == MPIR_ATTR_KIND(MPIR_ATTR_INT)){
+    if(MPII_ATTR_KIND(attrib_type) == MPII_ATTR_KIND(MPIR_ATTR_INT)){
         value = &attrib;
     }
     else{
@@ -2417,12 +2417,12 @@ EOT
 }
 static
 int
-MPIR_Comm_copy_attr_cxx_proxy(
+MPII_Comm_copy_attr_cxx_proxy(
     MPI_Comm_copy_attr_function* user_function,
     MPI_Comm comm,
     int keyval,
     void* extra_state,
-    MPIR_AttrType attrib_type,
+    MPIR_Attr_type attrib_type,
     void* attrib,
     void** new_value,
     int* flag
@@ -2430,7 +2430,7 @@ MPIR_Comm_copy_attr_cxx_proxy(
 {
     void *value = 0;
     /* Make sure that the attribute value is delivered as a pointer */
-    if(MPIR_ATTR_KIND(attrib_type) == MPIR_ATTR_KIND(MPIR_ATTR_INT)){
+    if(MPII_ATTR_KIND(attrib_type) == MPII_ATTR_KIND(MPIR_ATTR_INT)){
         value = &attrib;
     }
     else{
@@ -2497,7 +2497,7 @@ EOT
     MPIX_CALLWORLD( MPI_Comm_create_keyval( (MPI_Comm_copy_attr_function *)cf, 
 				       (MPI_Comm_delete_attr_function *)df,
 				      &keyval, extra_state ) );
-    MPIR_Keyval_set_proxy( keyval, MPIR_Comm_copy_attr_cxx_proxy, MPIR_Comm_delete_attr_cxx_proxy );
+    MPII_Keyval_set_proxy( keyval, MPII_Comm_copy_attr_cxx_proxy, MPIR_Comm_delete_attr_cxx_proxy );
 EOT
     &printCoverageEnd( $OUTFD, "Comm_create_keyval", 3 );
     print $OUTFD <<EOT;
@@ -2510,7 +2510,7 @@ MPIR_Type_delete_attr_cxx_proxy(
     MPI_Type_delete_attr_function* user_function,
     MPI_Datatype datatype,
     int keyval,
-    MPIR_AttrType attrib_type,
+    MPIR_Attr_type attrib_type,
     void* attrib,
     void* extra_state
     )
@@ -2519,7 +2519,7 @@ MPIR_Type_delete_attr_cxx_proxy(
     MPI::Datatype::Delete_attr_function* f = (MPI::Datatype::Delete_attr_function*)user_function;
     void *value = 0;
     /* Make sure that the attribute value is delivered as a pointer */
-    if(MPIR_ATTR_KIND(attrib_type) == MPIR_ATTR_KIND(MPIR_ATTR_INT)){
+    if(MPII_ATTR_KIND(attrib_type) == MPII_ATTR_KIND(MPIR_ATTR_INT)){
         value = &attrib;
     }
     else{
@@ -2535,7 +2535,7 @@ MPIR_Type_copy_attr_cxx_proxy(
     MPI_Datatype datatype,
     int keyval,
     void* extra_state,
-    MPIR_AttrType attrib_type,
+    MPIR_Attr_type attrib_type,
     void* attrib,
     void** new_value,
     int* flag
@@ -2546,7 +2546,7 @@ MPIR_Type_copy_attr_cxx_proxy(
     MPI::Datatype::Copy_attr_function* f = (MPI::Datatype::Copy_attr_function*)user_function;
     void *value = 0;
     /* Make sure that the attribute value is delivered as a pointer */
-    if(MPIR_ATTR_KIND(attrib_type) == MPIR_ATTR_KIND(MPIR_ATTR_INT)){
+    if(MPII_ATTR_KIND(attrib_type) == MPII_ATTR_KIND(MPIR_ATTR_INT)){
         value = &attrib;
     }
     else{
@@ -2567,7 +2567,7 @@ EOT
     MPIX_CALLWORLD( MPI_Type_create_keyval( (MPI_Type_copy_attr_function *)cf, 
 			       (MPI_Type_delete_attr_function *)df,
 		 	       &keyval, extra_state ) );
-    MPIR_Keyval_set_proxy( keyval, MPIR_Type_copy_attr_cxx_proxy, MPIR_Type_delete_attr_cxx_proxy );
+    MPII_Keyval_set_proxy( keyval, MPIR_Type_copy_attr_cxx_proxy, MPIR_Type_delete_attr_cxx_proxy );
 EOT
     &printCoverageEnd( $OUTFD, "Type_create_keyval", 3 );
     print $OUTFD <<EOT;
@@ -2580,7 +2580,7 @@ MPIR_Win_delete_attr_cxx_proxy(
     MPI_Win_delete_attr_function* user_function,
     MPI_Win win,
     int keyval,
-    MPIR_AttrType attrib_type,
+    MPIR_Attr_type attrib_type,
     void* attrib,
     void* extra_state
     )
@@ -2589,7 +2589,7 @@ MPIR_Win_delete_attr_cxx_proxy(
     MPI::Win::Delete_attr_function* f = (MPI::Win::Delete_attr_function*)user_function;
     void *value = 0;
     /* Make sure that the attribute value is delivered as a pointer */
-    if(MPIR_ATTR_KIND(attrib_type) == MPIR_ATTR_KIND(MPIR_ATTR_INT)){
+    if(MPII_ATTR_KIND(attrib_type) == MPII_ATTR_KIND(MPIR_ATTR_INT)){
         value = &attrib;
     }
     else{
@@ -2605,7 +2605,7 @@ MPIR_Win_copy_attr_cxx_proxy(
     MPI_Win win,
     int keyval,
     void* extra_state,
-    MPIR_AttrType attrib_type,
+    MPIR_Attr_type attrib_type,
     void* attrib,
     void** new_value,
     int* flag
@@ -2616,7 +2616,7 @@ MPIR_Win_copy_attr_cxx_proxy(
     MPI::Win::Copy_attr_function* f = (MPI::Win::Copy_attr_function*)user_function;
     void *value = 0;
     /* Make sure that the attribute value is delivered as a pointer */
-    if(MPIR_ATTR_KIND(attrib_type) == MPIR_ATTR_KIND(MPIR_ATTR_INT)){
+    if(MPII_ATTR_KIND(attrib_type) == MPII_ATTR_KIND(MPIR_ATTR_INT)){
         value = &attrib;
     }
     else{
@@ -2637,7 +2637,7 @@ EOT
     MPIX_CALLWORLD( MPI_Win_create_keyval( (MPI_Win_copy_attr_function *)cf, 
 				       (MPI_Win_delete_attr_function *)df,
 				      &keyval, extra_state ) );
-    MPIR_Keyval_set_proxy( keyval, MPIR_Win_copy_attr_cxx_proxy, MPIR_Win_delete_attr_cxx_proxy );
+    MPII_Keyval_set_proxy( keyval, MPIR_Win_copy_attr_cxx_proxy, MPIR_Win_delete_attr_cxx_proxy );
 EOT
     &printCoverageEnd( $OUTFD, "Win_create_keyval", 3 );
     print $OUTFD <<EOT;
@@ -2649,7 +2649,7 @@ EOT
 print $OUTFD <<EOT;
 // Provide a C routine that can call the C++ error handler, handling
 // any calling-sequence change.  
-extern \"C\" void MPIR_Errhandler_set_cxx( MPI_Errhandler, void (*)(void) );
+extern \"C\" void MPII_Errhandler_set_cxx( MPI_Errhandler, void (*)(void) );
 extern \"C\" 
 void MPIR_Call_errhandler_function( int kind, int *handle, int *errcode, 
 			      void (*cxxfn)(void) )
@@ -2697,7 +2697,7 @@ Errhandler File::Create_errhandler( Errhandler_function *f )
     MPI_Errhandler eh;
     MPI::Errhandler e1;
     MPI_File_create_errhandler( (MPI_File_errhandler_function *)f, &eh );
-    MPIR_Errhandler_set_cxx( eh, 
+    MPII_Errhandler_set_cxx( eh,
 			     (mpircallback)MPIR_Call_errhandler_function );
     e1.the_real_errhandler = eh;
     return e1;
@@ -2708,7 +2708,7 @@ Errhandler Comm::Create_errhandler( Errhandler_function *f )
     MPI_Errhandler eh;
     MPI::Errhandler e1;
     MPI_Comm_create_errhandler( (MPI_Comm_errhandler_function *)f, &eh );
-    MPIR_Errhandler_set_cxx( eh, 
+    MPII_Errhandler_set_cxx( eh,
 			     (mpircallback)MPIR_Call_errhandler_function );
     e1.the_real_errhandler = eh;
     return e1;
@@ -2718,7 +2718,7 @@ Errhandler Win::Create_errhandler( Errhandler_function *f )
     MPI_Errhandler eh;
     MPI::Errhandler e1;
     MPI_Win_create_errhandler( (MPI_Win_errhandler_function *)f, &eh );
-    MPIR_Errhandler_set_cxx( eh, 
+    MPII_Errhandler_set_cxx( eh,
 			     (mpircallback)MPIR_Call_errhandler_function );
     e1.the_real_errhandler = eh;
     return e1;
diff --git a/src/binding/fortran/mpif_h/buildiface b/src/binding/fortran/mpif_h/buildiface
index 5c30884..6f4f80c 100755
--- a/src/binding/fortran/mpif_h/buildiface
+++ b/src/binding/fortran/mpif_h/buildiface
@@ -382,14 +382,14 @@ foreach $_ (@ARGV) {
 # at the end.  This is used with the attribute routines which must 
 # pass an additional argument to a special attribute routine that handles
 # the differences between C and Fortran attributes.
-%ChangeCall = ( 'Comm_get_attr' => 'MPIR_CommGetAttr_fort:!MPIR_ATTR_AINT' ,
-		'Type_get_attr' => 'MPIR_TypeGetAttr:!MPIR_ATTR_AINT',
-		'Win_get_attr'  => 'MPIR_WinGetAttr:!MPIR_ATTR_AINT',
-		'Attr_get'      => 'MPIR_CommGetAttr_fort:!MPIR_ATTR_INT',
-		'Comm_set_attr' => 'MPIR_CommSetAttr:!MPIR_ATTR_AINT',
-		'Type_set_attr' => 'MPIR_TypeSetAttr:!MPIR_ATTR_AINT',
-		'Win_set_attr'  => 'MPIR_WinSetAttr:!MPIR_ATTR_AINT',
-		'Attr_put'      => 'MPIR_CommSetAttr:!MPIR_ATTR_INT',
+%ChangeCall = ( 'Comm_get_attr' => 'MPII_Comm_get_attr_fort:!MPIR_ATTR_AINT' ,
+		'Type_get_attr' => 'MPII_Type_get_attr:!MPIR_ATTR_AINT',
+		'Win_get_attr'  => 'MPII_Win_get_attr:!MPIR_ATTR_AINT',
+		'Attr_get'      => 'MPII_Comm_get_attr_fort:!MPIR_ATTR_INT',
+		'Comm_set_attr' => 'MPII_Comm_set_attr:!MPIR_ATTR_AINT',
+		'Type_set_attr' => 'MPII_Type_set_attr:!MPIR_ATTR_AINT',
+		'Win_set_attr'  => 'MPII_Win_set_attr:!MPIR_ATTR_AINT',
+		'Attr_put'      => 'MPII_Comm_set_attr:!MPIR_ATTR_INT',
     );
 # 
 # Note that wtime and wtick aren't found because they don't match the 
@@ -1906,7 +1906,7 @@ sub unweighted_out_ctof {
 # Logical variables
 sub logical_ftoc {
     my $count = $_[0];
-    print $OUTFD "    l$count = MPIR_FROM_FLOG(*v$count);\n";
+    print $OUTFD "    l$count = MPII_FROM_FLOG(*v$count);\n";
 }
 sub logical_in_decl {
     my $count = $_[0];
@@ -1928,7 +1928,7 @@ sub logical_out_ctof {
     my $coutvar = $_[0];
     my $outvar  = $_[1];
     if ($do_logical) {
-	print $OUTFD "    if ($errparmlval == MPI_SUCCESS) *$outvar = MPIR_TO_FLOG($coutvar);\n";
+	print $OUTFD "    if ($errparmlval == MPI_SUCCESS) *$outvar = MPII_TO_FLOG($coutvar);\n";
     }
 }
 sub logical_out_decl {
@@ -1969,7 +1969,7 @@ sub logical_array_in_ftoc {
     if ($ActSize) {int li;
      l$count = (int *)$malloc($ActSize * sizeof(int));
      for (li=0; li<$ActSize; li++) {
-        l$count\[li\] = MPIR_FROM_FLOG(v$count\[li\]);
+        l$count\[li\] = MPII_FROM_FLOG(v$count\[li\]);
      }
     }
 ";
@@ -2002,7 +2002,7 @@ sub logical_array_out_ctof {
     print $OUTFD "\
     if ($errparmlval == MPI_SUCCESS) {int li;
      for (li=0; li<$ActSize; li++) {
-        $outvar\[li\] = MPIR_TO_FLOG($coutvar\[li\]);
+        $outvar\[li\] = MPII_TO_FLOG($coutvar\[li\]);
      }
     }
 ";
@@ -3611,7 +3611,7 @@ sub setF90Type_keyval {
     if ($within_fint) { $argname = "l$argnum"; }
     print $FD "\
     if (*ierr == MPI_SUCCESS) {
-         MPIR_Keyval_set_proxy( (int)$argname, MPIR_Type_copy_attr_f90_proxy, MPIR_Type_delete_attr_f90_proxy );
+         MPII_Keyval_set_proxy( (int)$argname, MPIR_Type_copy_attr_f90_proxy, MPIR_Type_delete_attr_f90_proxy );
     }\n";
 }
 sub setF90Comm_keyval {
@@ -3622,7 +3622,7 @@ sub setF90Comm_keyval {
     if ($within_fint) { $argname = "l$argnum"; }
     print $FD "\
     if (*ierr == MPI_SUCCESS) {
-         MPIR_Keyval_set_proxy( (int)$argname, MPIR_Comm_copy_attr_f90_proxy, MPIR_Comm_delete_attr_f90_proxy );
+         MPII_Keyval_set_proxy( (int)$argname, MPII_Comm_copy_attr_f90_proxy, MPIR_Comm_delete_attr_f90_proxy );
     }\n";
 }
 sub setF90Win_keyval {
@@ -3633,7 +3633,7 @@ sub setF90Win_keyval {
     if ($within_fint) { $argname = "l$argnum"; }
     print $FD "\
     if (*ierr == MPI_SUCCESS) {
-         MPIR_Keyval_set_proxy( (int)$argname, MPIR_Win_copy_attr_f90_proxy, MPIR_Win_delete_attr_f90_proxy );
+         MPII_Keyval_set_proxy( (int)$argname, MPIR_Win_copy_attr_f90_proxy, MPIR_Win_delete_attr_f90_proxy );
     }\n";
 }
 sub setF77greq {
@@ -3644,7 +3644,7 @@ sub setF77greq {
 
     print $FD "\
     if (*ierr == MPI_SUCCESS) {
-         MPIR_Grequest_set_lang_f77( $argname );
+         MPII_Grequest_set_lang_f77( $argname );
     }\n";
 }
 
@@ -4394,12 +4394,12 @@ typedef void (FORT_CALL F77_CopyFunction) (MPI_Fint *, MPI_Fint *, MPI_Aint *, M
 /* Helper proxy function to thunk the attr copy function call into F77 calling convention */
 static
 int
-MPIR_Comm_copy_attr_f77_proxy(
+MPII_Comm_copy_attr_f77_proxy(
     MPI_Comm_copy_attr_function* user_function,
     MPI_Comm comm,
     int keyval,
     void* extra_state,
-    MPIR_AttrType value_type,
+    MPIR_Attr_type value_type,
     void* value,
     void** new_value,
     int* flag
@@ -4408,15 +4408,15 @@ MPIR_Comm_copy_attr_f77_proxy(
     MPI_Fint ierr = 0;
     MPI_Fint fhandle = (MPI_Fint)comm;
     MPI_Fint fkeyval = (MPI_Fint)keyval;
-    MPI_Aint fvalue = MPIU_VOID_PTR_CAST_TO_MPI_AINT (value);
+    MPI_Aint fvalue = MPIR_VOID_PTR_CAST_TO_MPI_AINT (value);
     MPI_Aint *fextra = (MPI_Aint *)extra_state;
     MPI_Aint fnew = 0;
     MPI_Fint fflag = 0;
 
     ((F77_CopyFunction*)user_function)( &fhandle, &fkeyval, fextra, &fvalue, &fnew, &fflag, &ierr );
 
-    *flag = MPIR_FROM_FLOG(fflag);
-    *new_value = MPIU_AINT_CAST_TO_VOID_PTR ((MPI_Aint) fnew);
+    *flag = MPII_FROM_FLOG(fflag);
+    *new_value = MPIR_AINT_CAST_TO_VOID_PTR ((MPI_Aint) fnew);
     return (int)ierr;
 }
 
@@ -4431,7 +4431,7 @@ MPIR_Comm_delete_attr_f77_proxy(
     MPI_Comm_delete_attr_function* user_function,
     MPI_Comm comm,
     int keyval,
-    MPIR_AttrType value_type,
+    MPIR_Attr_type value_type,
     void* value,
     void* extra_state
     )
@@ -4439,7 +4439,7 @@ MPIR_Comm_delete_attr_f77_proxy(
     MPI_Fint ierr = 0;
     MPI_Fint fhandle = (MPI_Fint)comm;
     MPI_Fint fkeyval = (MPI_Fint)keyval;
-    MPI_Aint fvalue = MPIU_VOID_PTR_CAST_TO_MPI_AINT (value);
+    MPI_Aint fvalue = MPIR_VOID_PTR_CAST_TO_MPI_AINT (value);
     MPI_Aint *fextra = (MPI_Aint *)extra_state;
 
     ((F77_DeleteFunction*)user_function)( &fhandle, &fkeyval, &fvalue, fextra, &ierr );
@@ -4456,7 +4456,7 @@ MPIR_Comm_delete_attr_f77_proxy(
         *ierr = MPI_Comm_create_keyval( v1, v2, &l3, v4 );
         if (!*ierr) {
 	    *v3 = l3;
-            MPIR_Keyval_set_proxy((int)*v3, MPIR_Comm_copy_attr_f77_proxy, MPIR_Comm_delete_attr_f77_proxy);
+            MPII_Keyval_set_proxy((int)*v3, MPII_Comm_copy_attr_f77_proxy, MPIR_Comm_delete_attr_f77_proxy);
         }
 }\n";
     close ($OUTFD);
@@ -4479,7 +4479,7 @@ MPIR_Comm_delete_attr_f77_proxy(
     #&print_attr;
     print $OUTFD "{
         *v5 = *v4;
-        *v6 = MPIR_TO_FLOG(1);
+        *v6 = MPII_TO_FLOG(1);
         *ierr = MPI_SUCCESS;
 }\n";
     close ($OUTFD);
@@ -4512,7 +4512,7 @@ MPIR_Comm_delete_attr_f77_proxy(
     &print_args( $OUTFD, $args, 0, "null_copy_fn" );
     print $OUTFD "{
         *ierr = MPI_SUCCESS;
-        *v6 = MPIR_TO_FLOG(0);
+        *v6 = MPII_TO_FLOG(0);
 }\n";
     close ($OUTFD);
     &ReplaceIfDifferent( $filename, $filename . ".new" );
@@ -5212,7 +5212,7 @@ sub WriteAttrDefaults {
     #&print_attr;
     print $OUTFD "{
         *v5 = *v4;
-        *v6 = MPIR_TO_FLOG(1);
+        *v6 = MPII_TO_FLOG(1);
         *ierr = MPI_SUCCESS;
 }\n";
     close ($OUTFD);
@@ -5247,7 +5247,7 @@ sub WriteAttrDefaults {
     &print_args( $OUTFD, $args, 0, "${prefix}null_copy_fn" );
     print $OUTFD "{
         *ierr = MPI_SUCCESS;
-        *v6 = MPIR_TO_FLOG(0);
+        *v6 = MPII_TO_FLOG(0);
 }\n";
     close ($OUTFD);
     &ReplaceIfDifferent( $filename, $filename . ".new" );
@@ -5410,7 +5410,7 @@ MPIR_Type_copy_attr_f90_proxy(
     MPI_Datatype datatype,
     int keyval,
     void* extra_state,
-    MPIR_AttrType value_type,
+    MPIR_Attr_type value_type,
     void* value,
     void** new_value,
     int* flag
@@ -5419,15 +5419,15 @@ MPIR_Type_copy_attr_f90_proxy(
     MPI_Fint ierr = 0;
     MPI_Fint fhandle = (MPI_Fint)datatype;
     MPI_Fint fkeyval = (MPI_Fint)keyval;
-    MPI_Aint fvalue = MPIU_VOID_PTR_CAST_TO_MPI_AINT (value);
+    MPI_Aint fvalue = MPIR_VOID_PTR_CAST_TO_MPI_AINT (value);
     MPI_Aint* fextra  = (MPI_Aint*)extra_state;
     MPI_Aint fnew = 0;
     MPI_Fint fflag = 0;
 
     ((F90_CopyFunction*)user_function)( &fhandle, &fkeyval, fextra, &fvalue, &fnew, &fflag, &ierr );
 
-    *flag = MPIR_FROM_FLOG(fflag);
-    *new_value = MPIU_AINT_CAST_TO_VOID_PTR (fnew);
+    *flag = MPII_FROM_FLOG(fflag);
+    *new_value = MPIR_AINT_CAST_TO_VOID_PTR (fnew);
     return (int)ierr;
 }
 
@@ -5442,7 +5442,7 @@ MPIR_Type_delete_attr_f90_proxy(
     MPI_Type_delete_attr_function* user_function,
     MPI_Datatype datatype,
     int keyval,
-    MPIR_AttrType value_type,
+    MPIR_Attr_type value_type,
     void* value,
     void* extra_state
     )
@@ -5450,7 +5450,7 @@ MPIR_Type_delete_attr_f90_proxy(
     MPI_Fint ierr = 0;
     MPI_Fint fhandle = (MPI_Fint)datatype;
     MPI_Fint fkeyval = (MPI_Fint)keyval;
-    MPI_Aint fvalue = MPIU_VOID_PTR_CAST_TO_MPI_AINT (value);
+    MPI_Aint fvalue = MPIR_VOID_PTR_CAST_TO_MPI_AINT (value);
     MPI_Aint* fextra  = (MPI_Aint*)extra_state;
 
     ((F90_DeleteFunction*)user_function)( &fhandle, &fkeyval, &fvalue, fextra, &ierr );
@@ -5469,12 +5469,12 @@ typedef void (FORT_CALL F90_CopyFunction) (MPI_Fint *, MPI_Fint *, MPI_Aint *, M
 /* Helper proxy function to thunk the attr copy function call into F90 calling convention */
 static
 int
-MPIR_Comm_copy_attr_f90_proxy(
+MPII_Comm_copy_attr_f90_proxy(
     MPI_Comm_copy_attr_function* user_function,
     MPI_Comm comm,
     int keyval,
     void* extra_state,
-    MPIR_AttrType value_type,
+    MPIR_Attr_type value_type,
     void* value,
     void** new_value,
     int* flag
@@ -5483,15 +5483,15 @@ MPIR_Comm_copy_attr_f90_proxy(
     MPI_Fint ierr = 0;
     MPI_Fint fhandle = (MPI_Fint)comm;
     MPI_Fint fkeyval = (MPI_Fint)keyval;
-    MPI_Aint fvalue = MPIU_VOID_PTR_CAST_TO_MPI_AINT (value);
+    MPI_Aint fvalue = MPIR_VOID_PTR_CAST_TO_MPI_AINT (value);
     MPI_Aint* fextra  = (MPI_Aint*)extra_state;
     MPI_Aint fnew = 0;
     MPI_Fint fflag = 0;
 
     ((F90_CopyFunction*)user_function)( &fhandle, &fkeyval, fextra, &fvalue, &fnew, &fflag, &ierr );
 
-    *flag = MPIR_FROM_FLOG(fflag);
-    *new_value = MPIU_AINT_CAST_TO_VOID_PTR (fnew);
+    *flag = MPII_FROM_FLOG(fflag);
+    *new_value = MPIR_AINT_CAST_TO_VOID_PTR (fnew);
     return (int)ierr;
 }
 
@@ -5506,7 +5506,7 @@ MPIR_Comm_delete_attr_f90_proxy(
     MPI_Comm_delete_attr_function* user_function,
     MPI_Comm comm,
     int keyval,
-    MPIR_AttrType value_type,
+    MPIR_Attr_type value_type,
     void* value,
     void* extra_state
     )
@@ -5514,7 +5514,7 @@ MPIR_Comm_delete_attr_f90_proxy(
     MPI_Fint ierr = 0;
     MPI_Fint fhandle = (MPI_Fint)comm;
     MPI_Fint fkeyval = (MPI_Fint)keyval;
-    MPI_Aint fvalue = MPIU_VOID_PTR_CAST_TO_MPI_AINT (value);
+    MPI_Aint fvalue = MPIR_VOID_PTR_CAST_TO_MPI_AINT (value);
     MPI_Aint* fextra  = (MPI_Aint*)extra_state;
 
     ((F90_DeleteFunction*)user_function)( &fhandle, &fkeyval, &fvalue, fextra, &ierr );
@@ -5538,7 +5538,7 @@ MPIR_Win_copy_attr_f90_proxy(
     MPI_Win win,
     int keyval,
     void* extra_state,
-    MPIR_AttrType value_type,
+    MPIR_Attr_type value_type,
     void* value,
     void** new_value,
     int* flag
@@ -5547,15 +5547,15 @@ MPIR_Win_copy_attr_f90_proxy(
     MPI_Fint ierr = 0;
     MPI_Fint fhandle = (MPI_Fint)win;
     MPI_Fint fkeyval = (MPI_Fint)keyval;
-    MPI_Aint fvalue = MPIU_VOID_PTR_CAST_TO_MPI_AINT (value);
+    MPI_Aint fvalue = MPIR_VOID_PTR_CAST_TO_MPI_AINT (value);
     MPI_Aint* fextra  = (MPI_Aint*)extra_state;
     MPI_Aint fnew = 0;
     MPI_Fint fflag = 0;
 
     ((F90_CopyFunction*)user_function)( &fhandle, &fkeyval, fextra, &fvalue, &fnew, &fflag, &ierr );
 
-    *flag = MPIR_FROM_FLOG(fflag);
-    *new_value = MPIU_AINT_CAST_TO_VOID_PTR (fnew);
+    *flag = MPII_FROM_FLOG(fflag);
+    *new_value = MPIR_AINT_CAST_TO_VOID_PTR (fnew);
     return (int)ierr;
 }
 
@@ -5570,7 +5570,7 @@ MPIR_Win_delete_attr_f90_proxy(
     MPI_Win_delete_attr_function* user_function,
     MPI_Win win,
     int keyval,
-    MPIR_AttrType value_type,
+    MPIR_Attr_type value_type,
     void* value,
     void* extra_state
     )
@@ -5578,7 +5578,7 @@ MPIR_Win_delete_attr_f90_proxy(
     MPI_Fint ierr = 0;
     MPI_Fint fhandle = (MPI_Fint)win;
     MPI_Fint fkeyval = (MPI_Fint)keyval;
-    MPI_Aint fvalue = MPIU_VOID_PTR_CAST_TO_MPI_AINT (value);
+    MPI_Aint fvalue = MPIR_VOID_PTR_CAST_TO_MPI_AINT (value);
     MPI_Aint* fextra  = (MPI_Aint*)extra_state;
 
     ((F90_DeleteFunction*)user_function)( &fhandle, &fkeyval, &fvalue, fextra, &ierr );
@@ -5782,13 +5782,13 @@ sub printCallForFint {
     &print_int_to_fint( $routine_name, $args );
     # Hack
     if ($routine_name eq "Op_create") {
-	print $OUTFD "     MPIR_Op_set_fc( l3 );\n";
+	print $OUTFD "     MPII_Op_set_fc( l3 );\n";
     }
     elsif ($routine_name eq "Comm_create_errhandler" ||
 	   $routine_name eq "Win_create_errhandler" ||
 	   $routine_name eq "File_create_errhandler" ||
 	   $routine_name eq "Errhandler_create") {
-	print $OUTFD "     MPIR_Errhandler_set_fc( l2 );\n";
+	print $OUTFD "     MPII_Errhandler_set_fc( l2 );\n";
     }
 
     $within_fint = 0;
diff --git a/src/binding/fortran/mpif_h/mpi_fortimpl.h b/src/binding/fortran/mpif_h/mpi_fortimpl.h
index 2d6c765..e8dcb8e 100644
--- a/src/binding/fortran/mpif_h/mpi_fortimpl.h
+++ b/src/binding/fortran/mpif_h/mpi_fortimpl.h
@@ -143,8 +143,8 @@ typedef MPI_Aint MPI_FAint;
 /* Fortran logicals */
 /* The definitions for the Fortran logical values are also needed 
    by the reduction operations in mpi/coll/opland, oplor, and oplxor, 
-   so they are defined in src/include/mpir_fortlogical.h */
-#include "mpir_fortlogical.h"
+   so they are defined in src/include/mpii_fortlogical.h */
+#include "mpii_fortlogical.h"
 
 
 /* MPIR_F_MPI_BOTTOM is the address of the Fortran MPI_BOTTOM value */
diff --git a/src/binding/fortran/mpif_h/mpichf.def b/src/binding/fortran/mpif_h/mpichf.def
index 58075ee..33cc256 100644
--- a/src/binding/fortran/mpif_h/mpichf.def
+++ b/src/binding/fortran/mpif_h/mpichf.def
@@ -751,8 +751,8 @@ EXPORTS
 	MPIFCMB6
 	MPIFCMB7
 	MPIFCMB8
-;	MPIR_F_TRUE
-;	MPIR_F_FALSE
+;	MPII_F_TRUE
+;	MPII_F_FALSE
 	MPIR_F_NeedInit
 	MPIR_F_MPI_BOTTOM
 	MPIR_F_MPI_IN_PLACE
diff --git a/src/binding/fortran/mpif_h/mpichfg.def b/src/binding/fortran/mpif_h/mpichfg.def
index 61a2026..d420e51 100644
--- a/src/binding/fortran/mpif_h/mpichfg.def
+++ b/src/binding/fortran/mpif_h/mpichfg.def
@@ -1943,8 +1943,8 @@ EXPORTS
 	mpifcmb6_
 	mpifcmb7_
 	mpifcmb8_
-;	MPIR_F_TRUE
-;	MPIR_F_FALSE
+;	MPII_F_TRUE
+;	MPII_F_FALSE
 	MPIR_F_NeedInit
 	MPIR_F_MPI_BOTTOM
 	MPIR_F_MPI_IN_PLACE
diff --git a/src/binding/fortran/mpif_h/mpichfs.def b/src/binding/fortran/mpif_h/mpichfs.def
index 5ce549c..720eda4 100644
--- a/src/binding/fortran/mpif_h/mpichfs.def
+++ b/src/binding/fortran/mpif_h/mpichfs.def
@@ -723,8 +723,8 @@ EXPORTS
 	MPIFCMB6
 	MPIFCMB7
 	MPIFCMB8
-;	MPIR_F_TRUE
-;	MPIR_F_FALSE
+;	MPII_F_TRUE
+;	MPII_F_FALSE
 	MPIR_F_NeedInit
 	MPIR_F_MPI_BOTTOM
 	MPIR_F_MPI_IN_PLACE
diff --git a/src/binding/fortran/mpif_h/setbot.c.in b/src/binding/fortran/mpif_h/setbot.c.in
index faf193c..2451f06 100644
--- a/src/binding/fortran/mpif_h/setbot.c.in
+++ b/src/binding/fortran/mpif_h/setbot.c.in
@@ -40,10 +40,10 @@ FORT_DLL_SPEC void FORT_CALL mpirinitc2_( char * FORT_MIXED_LEN_DECL
 */
 #ifndef F77_USE_BOOLEAN_LITERALS
 #if defined(F77_RUNTIME_VALUES) || !defined(F77_TRUE_VALUE_SET)
-MPI_Fint MPIR_F_TRUE = 1, MPIR_F_FALSE = 0;
+MPI_Fint MPII_F_TRUE = 1, MPII_F_FALSE = 0;
 #else
-const MPI_Fint MPIR_F_TRUE=F77_TRUE_VALUE;
-const MPI_Fint MPIR_F_FALSE=F77_FALSE_VALUE;
+const MPI_Fint MPII_F_TRUE=F77_TRUE_VALUE;
+const MPI_Fint MPII_F_FALSE=F77_FALSE_VALUE;
 #endif
 #endif
 
diff --git a/src/binding/fortran/use_mpi/create_f90_complex.c b/src/binding/fortran/use_mpi/create_f90_complex.c
index cb485b7..a92e595 100644
--- a/src/binding/fortran/use_mpi/create_f90_complex.c
+++ b/src/binding/fortran/use_mpi/create_f90_complex.c
@@ -75,12 +75,12 @@ int MPI_Type_create_f90_complex( int precision, int range, MPI_Datatype *newtype
     static realModel f90_real_model[2] = { 
 	{ MPIR_F90_REAL_MODEL, MPI_COMPLEX},
 	{ MPIR_F90_DOUBLE_MODEL, MPI_DOUBLE_COMPLEX } };
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_F90_COMPLEX);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_F90_COMPLEX);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_CREATE_F90_COMPLEX);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_CREATE_F90_COMPLEX);
 
     /* ... body of routine ...  */
     /* MPI 2.1, Section 16.2, page 473 lines 12-27 make it clear that
@@ -124,7 +124,7 @@ int MPI_Type_create_f90_complex( int precision, int range, MPI_Datatype *newtype
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_CREATE_F90_COMPLEX);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_CREATE_F90_COMPLEX);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 fn_fail:
diff --git a/src/binding/fortran/use_mpi/create_f90_int.c b/src/binding/fortran/use_mpi/create_f90_int.c
index 24484df..2f7507b 100644
--- a/src/binding/fortran/use_mpi/create_f90_int.c
+++ b/src/binding/fortran/use_mpi/create_f90_int.c
@@ -69,12 +69,12 @@ int MPI_Type_create_f90_integer( int range, MPI_Datatype *newtype )
     MPI_Datatype basetype = MPI_DATATYPE_NULL;
     static intModel f90_integer_map[] = { MPIR_F90_INTEGER_MODEL_MAP
 					  {0, 0, 0 } };
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_F90_INTEGER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_F90_INTEGER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_CREATE_F90_INTEGER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_CREATE_F90_INTEGER);
 
     /* ... body of routine ...  */
     for (i=0; f90_integer_map[i].range > 0; i++) {
@@ -107,7 +107,7 @@ int MPI_Type_create_f90_integer( int range, MPI_Datatype *newtype )
 
     /* ... end of body of routine ... */
  fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_CREATE_F90_INTEGER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_CREATE_F90_INTEGER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
  fn_fail:
diff --git a/src/binding/fortran/use_mpi/create_f90_real.c b/src/binding/fortran/use_mpi/create_f90_real.c
index 1b1282a..04ff55a 100644
--- a/src/binding/fortran/use_mpi/create_f90_real.c
+++ b/src/binding/fortran/use_mpi/create_f90_real.c
@@ -74,12 +74,12 @@ int MPI_Type_create_f90_real( int precision, int range, MPI_Datatype *newtype )
     static realModel f90_real_model[2] = { 
 	{ MPIR_F90_REAL_MODEL, MPI_REAL},
 	{ MPIR_F90_DOUBLE_MODEL, MPI_DOUBLE_PRECISION } };
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_F90_REAL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_F90_REAL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_CREATE_F90_REAL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_CREATE_F90_REAL);
 
     /* ... body of routine ...  */
     /* MPI 2.1, Section 16.2, page 473 lines 12-27 make it clear that
@@ -123,7 +123,7 @@ int MPI_Type_create_f90_real( int precision, int range, MPI_Datatype *newtype )
 
  fn_exit:
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_CREATE_F90_REAL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_CREATE_F90_REAL);
     return mpi_errno;
  fn_fail:
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/binding/fortran/use_mpi/create_f90_util.c b/src/binding/fortran/use_mpi/create_f90_util.c
index 13fdd4e..1736661 100644
--- a/src/binding/fortran/use_mpi/create_f90_util.c
+++ b/src/binding/fortran/use_mpi/create_f90_util.c
@@ -116,7 +116,7 @@ int MPIR_Create_unnamed_predefined( MPI_Datatype old, int combiner,
              * MPID_Type_commit */
             MPID_Datatype_get_basic_type(old, old_basic);
             MPID_Datatype_get_basic_type(new_dtp->handle, new_basic);
-            MPIU_Assert(new_basic == old_basic);
+            MPIR_Assert(new_basic == old_basic);
         }
 #endif
 
diff --git a/src/binding/fortran/use_mpi_f08/mpi_c_interface_glue.f90 b/src/binding/fortran/use_mpi_f08/mpi_c_interface_glue.f90
index 208f1f2..bad02fa 100644
--- a/src/binding/fortran/use_mpi_f08/mpi_c_interface_glue.f90
+++ b/src/binding/fortran/use_mpi_f08/mpi_c_interface_glue.f90
@@ -13,16 +13,16 @@ implicit none
 public :: MPIR_Fortran_string_f2c
 public :: MPIR_Fortran_string_c2f
 
-public :: MPIR_Comm_copy_attr_f08_proxy
+public :: MPII_Comm_copy_attr_f08_proxy
 public :: MPIR_Comm_delete_attr_f08_proxy
 public :: MPIR_Type_copy_attr_f08_proxy
 public :: MPIR_Type_delete_attr_f08_proxy
 public :: MPIR_Win_copy_attr_f08_proxy
 public :: MPIR_Win_delete_attr_f08_proxy
-public :: MPIR_Keyval_set_proxy
+public :: MPII_Keyval_set_proxy
 public :: MPIR_Grequest_set_lang_fortran
 
-! Bind to C's enum MPIR_AttrType in mpir_attr_generic.h
+! Bind to C's enum MPIR_Attr_type in mpir_attr_generic.h
 enum, bind(C)
     enumerator :: MPIR_ATTR_PTR  = 0
     enumerator :: MPIR_ATTR_AINT = 1
@@ -31,15 +31,15 @@ end enum
 
 interface
 
-subroutine MPIR_Keyval_set_proxy(keyval, attr_copy_proxy, attr_delete_proxy) bind(C, name="MPIR_Keyval_set_proxy")
+subroutine MPII_Keyval_set_proxy(keyval, attr_copy_proxy, attr_delete_proxy) bind(C, name="MPII_Keyval_set_proxy")
     use :: iso_c_binding, only : c_int, c_funptr
     integer(c_int), value, intent(in) :: keyval
     type(c_funptr), value, intent(in) :: attr_copy_proxy, attr_delete_proxy
     ! The subroutine is implemented in attrutil.c on the C side
-end subroutine MPIR_Keyval_set_proxy
+end subroutine MPII_Keyval_set_proxy
 
 ! Just need to tag the lang is Fortran, so it is fine to bind to *_lang_f77
-subroutine MPIR_Grequest_set_lang_fortran(request) bind(C, name="MPIR_Grequest_set_lang_f77")
+subroutine MPIR_Grequest_set_lang_fortran(request) bind(C, name="MPII_Grequest_set_lang_f77")
     use :: mpi_c_interface_types, only : c_Request
     integer(c_Request), value, intent(in) :: request
     ! The subroutine is implemented in mpir_request.c on the C side
@@ -97,7 +97,7 @@ subroutine MPIR_Fortran_string_c2f(cstring, fstring)
     end do
 end subroutine MPIR_Fortran_string_c2f
 
-function MPIR_Comm_copy_attr_f08_proxy (user_function, oldcomm, comm_keyval, extra_state, &
+function MPII_Comm_copy_attr_f08_proxy (user_function, oldcomm, comm_keyval, extra_state, &
         attr_type, attribute_val_in, attribute_val_out, flag) result(ierror)
 
     use :: iso_c_binding, only : c_int, c_intptr_t
@@ -135,7 +135,7 @@ function MPIR_Comm_copy_attr_f08_proxy (user_function, oldcomm, comm_keyval, ext
     flag = merge(1, 0, flag_f)
     ierror = ierror_f
 
-end function MPIR_Comm_copy_attr_f08_proxy
+end function MPII_Comm_copy_attr_f08_proxy
 
 function MPIR_Comm_delete_attr_f08_proxy (user_function, comm, comm_keyval, attr_type, &
         attribute_val, extra_state) result(ierror)
diff --git a/src/binding/fortran/use_mpi_f08/mpi_c_interface_nobuf.f90 b/src/binding/fortran/use_mpi_f08/mpi_c_interface_nobuf.f90
index 41ec748..a50befe 100644
--- a/src/binding/fortran/use_mpi_f08/mpi_c_interface_nobuf.f90
+++ b/src/binding/fortran/use_mpi_f08/mpi_c_interface_nobuf.f90
@@ -629,7 +629,7 @@ end function MPIR_Comm_free_keyval_c
 
 ! Not bind to MPI_Comm_get_attr directly due to the attr_type tag
 function MPIR_Comm_get_attr_c(comm, comm_keyval, attribute_val, flag, attr_type) &
-    bind(C, name="MPIR_CommGetAttr") result(ierror)
+    bind(C, name="MPII_Comm_get_attr") result(ierror)
     use, intrinsic :: iso_c_binding, only : c_int
     use :: mpi_f08_compile_constants, only : MPI_ADDRESS_KIND
     use :: mpi_c_interface_types, only : c_Comm
@@ -696,7 +696,7 @@ function MPIR_Comm_remote_size_c(comm, size) &
 end function MPIR_Comm_remote_size_c
 
 function MPIR_Comm_set_attr_c(comm, comm_keyval, attribute_val, attr_type) &
-    bind(C, name="MPIR_CommSetAttr") result(ierror)
+    bind(C, name="MPII_Comm_set_attr") result(ierror)
     use, intrinsic :: iso_c_binding, only : c_int
     use :: mpi_f08_compile_constants, only : MPI_ADDRESS_KIND
     use :: mpi_c_interface_types, only : c_Comm
@@ -952,7 +952,7 @@ function MPIR_Type_free_keyval_c(type_keyval) &
 end function MPIR_Type_free_keyval_c
 
 function MPIR_Type_get_attr_c(datatype, type_keyval, attribute_val, flag, attr_type) &
-    bind(C, name="MPIR_TypeGetAttr") result(ierror)
+    bind(C, name="MPII_Type_get_attr") result(ierror)
     use, intrinsic :: iso_c_binding, only : c_int
     use :: mpi_f08_compile_constants, only : MPI_ADDRESS_KIND
     use :: mpi_c_interface_types, only : c_Datatype
@@ -979,7 +979,7 @@ function MPIR_Type_get_name_c(datatype, type_name, resultlen) &
 end function MPIR_Type_get_name_c
 
 function MPIR_Type_set_attr_c(datatype, type_keyval, attribute_val, attr_type) &
-    bind(C, name="MPIR_TypeSetAttr") result(ierror)
+    bind(C, name="MPII_Type_set_attr") result(ierror)
     use, intrinsic :: iso_c_binding, only : c_int
     use :: mpi_f08_compile_constants, only : MPI_ADDRESS_KIND
     use :: mpi_c_interface_types, only : c_Datatype
@@ -1033,7 +1033,7 @@ function MPIR_Win_free_keyval_c(win_keyval) &
 end function MPIR_Win_free_keyval_c
 
 function MPIR_Win_get_attr_c(win, win_keyval, attribute_val, flag, attr_type) &
-    bind(C, name="MPIR_WinGetAttr") result(ierror)
+    bind(C, name="MPII_Win_get_attr") result(ierror)
     use, intrinsic :: iso_c_binding, only : c_int
     use :: mpi_f08_compile_constants, only : MPI_ADDRESS_KIND
     use :: mpi_c_interface_types, only : c_Win
@@ -1060,7 +1060,7 @@ function MPIR_Win_get_name_c(win, win_name, resultlen) &
 end function MPIR_Win_get_name_c
 
 function MPIR_Win_set_attr_c(win, win_keyval, attribute_val, attr_type) &
-    bind(C, name="MPIR_WinSetAttr") result(ierror)
+    bind(C, name="MPII_Win_set_attr") result(ierror)
     use, intrinsic :: iso_c_binding, only : c_int
     use :: mpi_f08_compile_constants, only : MPI_ADDRESS_KIND
     use :: mpi_c_interface_types, only : c_Win
diff --git a/src/binding/fortran/use_mpi_f08/wrappers_c/buildiface b/src/binding/fortran/use_mpi_f08/wrappers_c/buildiface
index 64425d6..e7b950f 100755
--- a/src/binding/fortran/use_mpi_f08/wrappers_c/buildiface
+++ b/src/binding/fortran/use_mpi_f08/wrappers_c/buildiface
@@ -313,12 +313,12 @@ int cdesc_create_datatype(CFI_cdesc_t *cdesc, int oldcount, MPI_Datatype oldtype
 #ifdef HAVE_ERROR_CHECKING
     {
         int size;
-        MPIU_Assert(cdesc->rank <= MAX_RANK);
+        MPIR_Assert(cdesc->rank <= MAX_RANK);
         MPI_Type_size(oldtype, &size);
         /* When cdesc->elem_len != size, things suddenly become complicated. Generally, it is hard to create
          * a composite datatype based on two datatypes. Currently we don't support it and doubt it is usefull.
          */
-        MPIU_Assert(cdesc->elem_len == size);
+        MPIR_Assert(cdesc->elem_len == size);
     }
 #endif
 
diff --git a/src/binding/fortran/use_mpi_f08/wrappers_f/comm_create_keyval_f08ts.f90 b/src/binding/fortran/use_mpi_f08/wrappers_f/comm_create_keyval_f08ts.f90
index e0303c0..edd973c 100644
--- a/src/binding/fortran/use_mpi_f08/wrappers_f/comm_create_keyval_f08ts.f90
+++ b/src/binding/fortran/use_mpi_f08/wrappers_f/comm_create_keyval_f08ts.f90
@@ -11,7 +11,7 @@ subroutine MPI_Comm_create_keyval_f08(comm_copy_attr_fn, comm_delete_attr_fn, co
     use :: mpi_f08, only : MPI_Comm_delete_attr_function
     use :: mpi_f08, only : MPI_ADDRESS_KIND
     use :: mpi_c_interface, only : MPIR_Comm_create_keyval_c
-    use :: mpi_c_interface, only : MPIR_Keyval_set_proxy, MPIR_Comm_copy_attr_f08_proxy, MPIR_Comm_delete_attr_f08_proxy
+    use :: mpi_c_interface, only : MPII_Keyval_set_proxy, MPII_Comm_copy_attr_f08_proxy, MPIR_Comm_delete_attr_f08_proxy
 
     implicit none
 
@@ -31,7 +31,7 @@ subroutine MPI_Comm_create_keyval_f08(comm_copy_attr_fn, comm_delete_attr_fn, co
 
     ierror_c = MPIR_Comm_create_keyval_c(comm_copy_attr_fn_c, comm_delete_attr_fn_c, comm_keyval_c, extra_state)
 
-    call MPIR_Keyval_set_proxy(comm_keyval_c, c_funloc(MPIR_Comm_copy_attr_f08_proxy), c_funloc(MPIR_Comm_delete_attr_f08_proxy))
+    call MPII_Keyval_set_proxy(comm_keyval_c, c_funloc(MPII_Comm_copy_attr_f08_proxy), c_funloc(MPIR_Comm_delete_attr_f08_proxy))
     comm_keyval = comm_keyval_c
     if (present(ierror)) ierror = ierror_c
 
diff --git a/src/binding/fortran/use_mpi_f08/wrappers_f/type_create_keyval_f08ts.f90 b/src/binding/fortran/use_mpi_f08/wrappers_f/type_create_keyval_f08ts.f90
index 760a8af..2087f50 100644
--- a/src/binding/fortran/use_mpi_f08/wrappers_f/type_create_keyval_f08ts.f90
+++ b/src/binding/fortran/use_mpi_f08/wrappers_f/type_create_keyval_f08ts.f90
@@ -11,7 +11,7 @@ subroutine MPI_Type_create_keyval_f08(type_copy_attr_fn, type_delete_attr_fn, ty
     use :: mpi_f08, only : MPI_Type_copy_attr_function
     use :: mpi_f08, only : MPI_Type_delete_attr_function
     use :: mpi_c_interface, only : MPIR_Type_create_keyval_c
-    use :: mpi_c_interface, only : MPIR_Keyval_set_proxy, MPIR_Type_copy_attr_f08_proxy, MPIR_Type_delete_attr_f08_proxy
+    use :: mpi_c_interface, only : MPII_Keyval_set_proxy, MPIR_Type_copy_attr_f08_proxy, MPIR_Type_delete_attr_f08_proxy
 
     implicit none
 
@@ -31,7 +31,7 @@ subroutine MPI_Type_create_keyval_f08(type_copy_attr_fn, type_delete_attr_fn, ty
 
     ierror_c = MPIR_Type_create_keyval_c(type_copy_attr_fn_c, type_delete_attr_fn_c, type_keyval_c, extra_state)
 
-    call MPIR_Keyval_set_proxy(type_keyval_c, c_funloc(MPIR_Type_copy_attr_f08_proxy), c_funloc(MPIR_Type_delete_attr_f08_proxy))
+    call MPII_Keyval_set_proxy(type_keyval_c, c_funloc(MPIR_Type_copy_attr_f08_proxy), c_funloc(MPIR_Type_delete_attr_f08_proxy))
     type_keyval = type_keyval_c
     if (present(ierror)) ierror = ierror_c
 
diff --git a/src/binding/fortran/use_mpi_f08/wrappers_f/win_create_keyval_f08ts.f90 b/src/binding/fortran/use_mpi_f08/wrappers_f/win_create_keyval_f08ts.f90
index 78f5bbc..b433089 100644
--- a/src/binding/fortran/use_mpi_f08/wrappers_f/win_create_keyval_f08ts.f90
+++ b/src/binding/fortran/use_mpi_f08/wrappers_f/win_create_keyval_f08ts.f90
@@ -11,7 +11,7 @@ subroutine MPI_Win_create_keyval_f08(win_copy_attr_fn, win_delete_attr_fn, win_k
     use :: mpi_f08, only : MPI_Win_copy_attr_function
     use :: mpi_f08, only : MPI_Win_delete_attr_function
     use :: mpi_c_interface, only : MPIR_Win_create_keyval_c
-    use :: mpi_c_interface, only : MPIR_Keyval_set_proxy, MPIR_Win_copy_attr_f08_proxy, MPIR_Win_delete_attr_f08_proxy
+    use :: mpi_c_interface, only : MPII_Keyval_set_proxy, MPIR_Win_copy_attr_f08_proxy, MPIR_Win_delete_attr_f08_proxy
 
     implicit none
 
@@ -31,7 +31,7 @@ subroutine MPI_Win_create_keyval_f08(win_copy_attr_fn, win_delete_attr_fn, win_k
 
     ierror_c = MPIR_Win_create_keyval_c(win_copy_attr_fn_c, win_delete_attr_fn_c, win_keyval_c, extra_state)
 
-    call MPIR_Keyval_set_proxy(win_keyval_c, c_funloc(MPIR_Win_copy_attr_f08_proxy), c_funloc(MPIR_Win_delete_attr_f08_proxy))
+    call MPII_Keyval_set_proxy(win_keyval_c, c_funloc(MPIR_Win_copy_attr_f08_proxy), c_funloc(MPIR_Win_delete_attr_f08_proxy))
     win_keyval = win_keyval_c
     if (present(ierror)) ierror = ierror_c
 
diff --git a/src/env/mpichversion.c b/src/env/mpichversion.c
index 38f3c6b..3b9640e 100644
--- a/src/env/mpichversion.c
+++ b/src/env/mpichversion.c
@@ -90,22 +90,22 @@ int main( int argc, char *argv[] )
 
     /* Print out the information, one item per line */
     if (flags[Version_number]) {
-	printf( "MPICH Version:    \t%s\n", MPIR_Version_string );
+	printf( "MPICH Version:    \t%s\n", MPII_Version_string );
     }
     if (flags[Date]) {
-	printf( "MPICH Release date:\t%s\n", MPIR_Version_date );
+	printf( "MPICH Release date:\t%s\n", MPII_Version_date );
     }
     if (flags[Device]) {
-	printf( "MPICH Device:    \t%s\n", MPIR_Version_device );
+	printf( "MPICH Device:    \t%s\n", MPII_Version_device );
     }
     if (flags[Configure_args]) {
-	printf( "MPICH configure: \t%s\n", MPIR_Version_configure );
+	printf( "MPICH configure: \t%s\n", MPII_Version_configure );
     }
     if (flags[Compilers]) {
-	printf( "MPICH CC: \t%s\n", MPIR_Version_CC );
-	printf( "MPICH CXX: \t%s\n", MPIR_Version_CXX );
-	printf( "MPICH F77: \t%s\n", MPIR_Version_F77 );
-	printf( "MPICH FC: \t%s\n", MPIR_Version_FC );
+	printf( "MPICH CC: \t%s\n", MPII_Version_CC );
+	printf( "MPICH CXX: \t%s\n", MPII_Version_CXX );
+	printf( "MPICH F77: \t%s\n", MPII_Version_F77 );
+	printf( "MPICH FC: \t%s\n", MPII_Version_FC );
     }
 
     return 0;
diff --git a/src/include/Makefile.mk b/src/include/Makefile.mk
index b9a28ea..7da2717 100644
--- a/src/include/Makefile.mk
+++ b/src/include/Makefile.mk
@@ -34,11 +34,11 @@ noinst_HEADERS +=                   \
     src/include/mpir_dbg.h          \
     src/include/mpir_attr_generic.h \
     src/include/mpir_attr.h         \
-    src/include/mpir_f77interface.h \
-    src/include/mpir_cxxinterface.h \
-    src/include/mpir_fortlogical.h   \
+    src/include/mpii_f77interface.h \
+    src/include/mpii_cxxinterface.h \
+    src/include/mpii_fortlogical.h   \
     src/include/mpiallstates.h      \
-    src/include/mpir_bsend.h          \
+    src/include/mpii_bsend.h          \
     src/include/mpir_cvars.h        \
     src/include/mpichconfconst.h    \
     src/include/mpir_err.h          \
diff --git a/src/include/mpichconfconst.h b/src/include/mpichconfconst.h
index 9b0f09d..b4c208d 100644
--- a/src/include/mpichconfconst.h
+++ b/src/include/mpichconfconst.h
@@ -13,10 +13,10 @@
 #ifndef MPICHCONFCONST_H_INCLUDED
 #define MPICHCONFCONST_H_INCLUDED
 
-#define MPICH_ERROR_MSG_NONE 0
-#define MPICH_ERROR_MSG_CLASS 1
-#define MPICH_ERROR_MSG_GENERIC 2
-#define MPICH_ERROR_MSG_ALL 8
+#define MPICH_ERROR_MSG__NONE 0
+#define MPICH_ERROR_MSG__CLASS 1
+#define MPICH_ERROR_MSG__GENERIC 2
+#define MPICH_ERROR_MSG__ALL 8
 
 /* -------------------------------------------------------------------- */
 /* thread-related constants */
@@ -28,15 +28,34 @@
  * A configure choice will set MPICH_THREAD_GRANULARITY to one of these values */
 
 /* _INVALID exists to avoid accidental macro evaluations to 0 */
-#define MPICH_THREAD_GRANULARITY_INVALID 0
-#define MPICH_THREAD_GRANULARITY_GLOBAL 1
-#define MPICH_THREAD_GRANULARITY_PER_OBJECT 2
-#define MPICH_THREAD_GRANULARITY_LOCK_FREE 3
+#define MPICH_THREAD_GRANULARITY__INVALID 0
+#define MPICH_THREAD_GRANULARITY__GLOBAL 1
+#define MPICH_THREAD_GRANULARITY__POBJ 2
+#define MPICH_THREAD_GRANULARITY__LOCKFREE 3
 /* _SINGLE is the "null" granularity, where all processes are single-threaded */
-#define MPICH_THREAD_GRANULARITY_SINGLE 4
+#define MPICH_THREAD_GRANULARITY__SINGLE 4
 
 /* _NONE means no concurrency control, such as when using MPI_THREAD_SINGLE */
-#define MPIU_REFCOUNT_NONE 1
-#define MPIU_REFCOUNT_LOCKFREE 2
+#define MPICH_REFCOUNT__NONE 1
+#define MPICH_REFCOUNT__LOCKFREE 2
+
+/* Possible values for timing */
+#define MPICH_TIMING_KIND__NONE 0
+#define MPICH_TIMING_KIND__TIME 1
+#define MPICH_TIMING_KIND__LOG 2
+#define MPICH_TIMING_KIND__LOG_DETAILED 3
+#define MPICH_TIMING_KIND__ALL 4
+#define MPICH_TIMING_KIND__RUNTIME 5
+
+/* Possible values for USE_LOGGING */
+#define MPICH_LOGGING__NONE 0
+#define MPICH_LOGGING__RLOG 1
+#define MPICH_LOGGING__EXTERNAL 4
+
+/* Possible values for process state */
+#define MPICH_MPI_STATE__PRE_INIT 0
+#define MPICH_MPI_STATE__IN_INIT 1
+#define MPICH_MPI_STATE__POST_INIT 2
+#define MPICH_MPI_STATE__POST_FINALIZED 3
 
 #endif /* MPICHCONFCONST_H_INCLUDED */
diff --git a/src/include/mpir_bsend.h b/src/include/mpii_bsend.h
similarity index 92%
rename from src/include/mpir_bsend.h
rename to src/include/mpii_bsend.h
index 227174d..83263da 100644
--- a/src/include/mpir_bsend.h
+++ b/src/include/mpii_bsend.h
@@ -44,33 +44,33 @@ typedef enum {
     BSEND = 0,
     IBSEND = 1,
     BSEND_INIT = 2
-} MPIR_Bsend_kind_t;
+} MPII_Bsend_kind_t;
 
 struct MPIR_Request;
 struct MPIR_Comm;
 
 /* BsendMsg is used to hold all of the message particulars in case a
    request is not currently available */
-typedef struct MPIR_Bsend_msg {
+typedef struct MPII_Bsend_msg {
     void         *msgbuf;
     MPI_Aint     count;
     MPI_Datatype dtype;
     int          tag;
     struct MPIR_Comm    *comm_ptr;
     int          dest;
-} MPIR_Bsend_msg_t;
+} MPII_Bsend_msg_t;
 
 /* BsendData describes a bsend request */
-typedef struct MPIR_Bsend_data {
+typedef struct MPII_Bsend_data {
     size_t            size;            /* size that is available for data */
     size_t            total_size;      /* total size of this segment,
                                           including all headers */
-    struct MPIR_Bsend_data *next, *prev;
-    MPIR_Bsend_kind_t kind;
+    struct MPII_Bsend_data *next, *prev;
+    MPII_Bsend_kind_t kind;
     struct MPIR_Request  *request;
-    MPIR_Bsend_msg_t  msg;
+    MPII_Bsend_msg_t  msg;
     double            alignpad;        /* make sure that the struct
                                           shares double alignment */
-} MPIR_Bsend_data_t;
+} MPII_Bsend_data_t;
 
 #endif /* MPIR_BSEND_H_INCLUDED */
diff --git a/src/include/mpii_cxxinterface.h b/src/include/mpii_cxxinterface.h
new file mode 100644
index 0000000..23a13a3
--- /dev/null
+++ b/src/include/mpii_cxxinterface.h
@@ -0,0 +1,15 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPII_CXX_INTERFACE_H_INCLUDED
+#define MPII_CXX_INTERFACE_H_INCLUDED
+
+extern void MPII_Keyval_set_cxx( int, void (*)(void), void (*)(void) );
+extern void MPII_Op_set_cxx( MPI_Op, void (*)(void) );
+extern void MPII_Errhandler_set_cxx( MPI_Errhandler, void (*)(void) );
+
+#endif /* MPII_CXX_INTERFACE_H_INCLUDED */
diff --git a/src/include/mpir_f77interface.h b/src/include/mpii_f77interface.h
similarity index 52%
rename from src/include/mpir_f77interface.h
rename to src/include/mpii_f77interface.h
index 775f98e..37ebf4b 100644
--- a/src/include/mpir_f77interface.h
+++ b/src/include/mpii_f77interface.h
@@ -4,21 +4,21 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-#ifndef MPIR_F77INTERFACE_H_INCLUDED
-#define MPIR_F77INTERFACE_H_INCLUDED
+#ifndef MPII_F77INTERFACE_H_INCLUDED
+#define MPII_F77INTERFACE_H_INCLUDED
 
 /* These functions are provided by the MPICH code for the Fortran interface,
    and provide the interfaces needed to keep track of which MPI internal
    objects need to have Fortran or Fortran 90 characteristics */
-void MPIR_Keyval_set_fortran( int );
-void MPIR_Keyval_set_fortran90( int );
-void MPIR_Grequest_set_lang_f77( MPI_Request greq );
+void MPII_Keyval_set_fortran( int );
+void MPII_Keyval_set_fortran90( int );
+void MPII_Grequest_set_lang_f77( MPI_Request greq );
 #if defined(HAVE_FORTRAN_BINDING) && !defined(HAVE_FINT_IS_INT)
-void MPIR_Op_set_fc( MPI_Op );
-typedef void (MPIR_F77_User_function) ( void *, void *, MPI_Fint *, MPI_Fint * );
-void MPIR_Errhandler_set_fc( MPI_Errhandler );
+void MPII_Op_set_fc( MPI_Op );
+typedef void (MPII_F77_User_function) ( void *, void *, MPI_Fint *, MPI_Fint * );
+void MPII_Errhandler_set_fc( MPI_Errhandler );
 #endif
 
-#define MPIR_ATTR_C_TO_FORTRAN(ATTR) ((ATTR)+1)
+#define MPII_ATTR_C_TO_FORTRAN(ATTR) ((ATTR)+1)
 
-#endif /* MPIR_F77INTERFACE_H_INCLUDED */
+#endif /* MPII_F77INTERFACE_H_INCLUDED */
diff --git a/src/include/mpir_fortlogical.h b/src/include/mpii_fortlogical.h
similarity index 68%
rename from src/include/mpir_fortlogical.h
rename to src/include/mpii_fortlogical.h
index 004a7c2..5528aee 100644
--- a/src/include/mpir_fortlogical.h
+++ b/src/include/mpii_fortlogical.h
@@ -4,22 +4,22 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-#ifndef MPI_FORTLOGICAL_H_INCLUDED
-#define MPI_FORTLOGICAL_H_INCLUDED
+#ifndef MPII_FORTLOGICAL_H_INCLUDED
+#define MPII_FORTLOGICAL_H_INCLUDED
 
 /* Fortran logical values */
 #ifndef _CRAY
 #ifdef F77_USE_BOOLEAN_LITERALS
-#define MPIR_F_TRUE  F77_TRUE_VALUE
-#define MPIR_F_FALSE F77_FALSE_VALUE
+#define MPII_F_TRUE  F77_TRUE_VALUE
+#define MPII_F_FALSE F77_FALSE_VALUE
 #else
 #if !defined(F77_RUNTIME_VALUES) && defined(F77_TRUE_VALUE_SET)
-extern const MPI_Fint MPIR_F_TRUE, MPIR_F_FALSE;
+extern const MPI_Fint MPII_F_TRUE, MPII_F_FALSE;
 #else
-extern MPI_Fint MPIR_F_TRUE, MPIR_F_FALSE;
+extern MPI_Fint MPII_F_TRUE, MPII_F_FALSE;
 #endif
 #endif
-#define MPIR_TO_FLOG(a) ((a) ? MPIR_F_TRUE : MPIR_F_FALSE)
+#define MPII_TO_FLOG(a) ((a) ? MPII_F_TRUE : MPII_F_FALSE)
 /* 
    Note on true and false.  This code is only an approximation.
    Some systems define either true or false, and allow some or ALL other
@@ -33,13 +33,13 @@ extern MPI_Fint MPIR_F_TRUE, MPIR_F_FALSE;
    interface library for multiple compilers that differ only in the 
    value used for Fortran .TRUE. .
  */
-#define MPIR_FROM_FLOG(a) ( (a) == MPIR_F_FALSE ? 0 : 1 )
+#define MPII_FROM_FLOG(a) ( (a) == MPII_F_FALSE ? 0 : 1 )
 
 #else
 /* CRAY Vector processors only; these are defined in /usr/include/fortran.h 
    Thanks to lmc at cray.com */
-#define MPIR_TO_FLOG(a) (_btol(a))
-#define MPIR_FROM_FLOG(a) ( _ltob(&(a)) )    /* (a) must be a pointer */
+#define MPII_TO_FLOG(a) (_btol(a))
+#define MPII_FROM_FLOG(a) ( _ltob(&(a)) )    /* (a) must be a pointer */
 #endif
 
-#endif /* MPI_FORTLOGICAL_H_INCLUDED */
+#endif /* MPII_FORTLOGICAL_H_INCLUDED */
diff --git a/src/include/mpiimpl.h b/src/include/mpiimpl.h
index e43b486..100f357 100644
--- a/src/include/mpiimpl.h
+++ b/src/include/mpiimpl.h
@@ -191,11 +191,11 @@ typedef struct MPIR_Topology MPIR_Topology;
 #include "mpir_ext.h"
 
 #ifdef HAVE_CXX_BINDING
-#include "mpir_cxxinterface.h"
+#include "mpii_cxxinterface.h"
 #endif
 
 #ifdef HAVE_FORTRAN_BINDING
-#include "mpir_f77interface.h"
+#include "mpii_f77interface.h"
 #endif
 
 
diff --git a/src/include/mpir_assert.h b/src/include/mpir_assert.h
index 11887ce..c4c698c 100644
--- a/src/include/mpir_assert.h
+++ b/src/include/mpir_assert.h
@@ -29,43 +29,43 @@ int MPIR_Assert_fail(const char *cond, const char *file_name, int line_num);
 int MPIR_Assert_fail_fmt(const char *cond, const char *file_name, int line_num, const char *fmt, ...);
 
 /*
- * MPIU_Assert()
+ * MPIR_Assert()
  *
  * Similar to assert() except that it performs an MPID_Abort() when the 
  * assertion fails.  Also, for Windows, it doesn't popup a
  * mesage box on a remote machine.
  */
 #if (!defined(NDEBUG) && defined(HAVE_ERROR_CHECKING))
-#   define MPIU_AssertDeclValue(_a,_b) _a = _b
-#   define MPIU_Assert(a_)                             \
+#   define MPIR_AssertDeclValue(_a,_b) _a = _b
+#   define MPIR_Assert(a_)                             \
     do {                                               \
         if (unlikely(!(a_))) {                         \
             MPIR_Assert_fail(#a_, __FILE__, __LINE__); \
         }                                              \
     } while (0)
 #else
-#   define MPIU_Assert(a_)
+#   define MPIR_Assert(a_)
 /* Empty decls not allowed in C */
-#   define MPIU_AssertDeclValue(_a,_b) _a ATTRIBUTE((unused)) = _b
+#   define MPIR_AssertDeclValue(_a,_b) _a ATTRIBUTE((unused)) = _b
 #endif
 
 /*
- * MPIU_Assertp()
+ * MPIR_Assertp()
  *
- * Similar to MPIU_Assert() except that these assertions persist regardless of 
- * NDEBUG or HAVE_ERROR_CHECKING.  MPIU_Assertp() may
+ * Similar to MPIR_Assert() except that these assertions persist regardless of
+ * NDEBUG or HAVE_ERROR_CHECKING.  MPIR_Assertp() may
  * be used for error checking in prototype code, although it should be 
  * converted real error checking and reporting once the
  * prototype becomes part of the official and supported code base.
  */
-#define MPIU_Assertp(a_)                                             \
+#define MPIR_Assertp(a_)                                             \
     do {                                                             \
         if (unlikely(!(a_))) {                                       \
             MPIR_Assert_fail(#a_, __FILE__, __LINE__);               \
         }                                                            \
     } while (0)
 
-/* Define the MPIU_Assert_fmt_msg macro.  This macro takes two arguments.  The
+/* Define the MPIR_Assert_fmt_msg macro.  This macro takes two arguments.  The
  * first is the condition to assert.  The second is a parenthesized list of
  * arguments suitable for passing directly to printf that will yield a relevant
  * error message.  The macro will first evaluate the condition.  If it evaluates
@@ -79,34 +79,34 @@ int MPIR_Assert_fail_fmt(const char *cond, const char *file_name, int line_num,
  *    The supplied error message will also be evaluated and printed.
  * 3) It will similarly emit the assertion failure and caller supplied messages
  *    to the debug log, if enabled, via MPL_DBG_MSG_FMT.
- * 4) It will invoke MPID_Abort, just like the other MPIU_Assert* macros.
+ * 4) It will invoke MPID_Abort, just like the other MPIR_Assert* macros.
  *
  * If the compiler doesn't support (...)/__VA_ARGS__ in macros then the user
  * message will not be evaluated or printed.  If NDEBUG is defined or
  * HAVE_ERROR_CHECKING is undefined, this macro will expand to nothing, just
- * like MPIU_Assert.
+ * like MPIR_Assert.
  *
  * Example usage:
  *
- * MPIU_Assert_fmg_msg(foo > bar,("foo is larger than bar: foo=%d bar=%d",foo,bar));
+ * MPIR_Assert_fmg_msg(foo > bar,("foo is larger than bar: foo=%d bar=%d",foo,bar));
  */
 #if (!defined(NDEBUG) && defined(HAVE_ERROR_CHECKING))
 #  if defined(HAVE_MACRO_VA_ARGS)
 
 /* newlines are added internally by the impl function, callers do not need to include them */
-#    define MPIU_Assert_fmt_msg(cond_,fmt_arg_parens_)                         \
+#    define MPIR_Assert_fmt_msg(cond_,fmt_arg_parens_)                         \
     do {                                                                       \
         if (unlikely(!(cond_))) {                                              \
             MPIR_Assert_fail_fmt(#cond_, __FILE__, __LINE__,                   \
-                                 MPIU_Assert_fmt_msg_expand_ fmt_arg_parens_); \
+                                 fmt_msg_expand_ fmt_arg_parens_); \
         }                                                                      \
     } while (0)
 /* helper to just expand the parens arg inline */
-#    define MPIU_Assert_fmt_msg_expand_(...) __VA_ARGS__
+#    define fmt_msg_expand_(...) __VA_ARGS__
 
 #  else /* defined(HAVE_MACRO_VA_ARGS) */
 
-#    define MPIU_Assert_fmt_msg(cond_,fmt_arg_parens_)                                                   \
+#    define MPIR_Assert_fmt_msg(cond_,fmt_arg_parens_)                                                   \
     do {                                                                                                 \
         if (unlikely(!(cond_))) {                                                                        \
             MPIR_Assert_fail_fmt(#cond_, __FILE__, __LINE__,                                             \
@@ -116,15 +116,15 @@ int MPIR_Assert_fail_fmt(const char *cond, const char *file_name, int line_num,
 
 #  endif
 #else /* !defined(NDEBUG) && defined(HAVE_ERROR_CHECKING) */
-#    define MPIU_Assert_fmt_msg(cond_,fmt_arg_parens_)
+#    define MPIR_Assert_fmt_msg(cond_,fmt_arg_parens_)
 #endif
 
 #ifdef HAVE_C11__STATIC_ASSERT
-#  define MPIU_Static_assert(cond_,msg_) _Static_assert(cond_,msg_)
+#  define MPIR_Static_assert(cond_,msg_) _Static_assert(cond_,msg_)
 #endif
 /* fallthrough to a run-time assertion */
-#ifndef MPIU_Static_assert
-#  define MPIU_Static_assert(cond_,msg_) MPIU_Assert_fmt_msg((cond_), ("%s", (msg_)))
+#ifndef MPIR_Static_assert
+#  define MPIR_Static_assert(cond_,msg_) MPIR_Assert_fmt_msg((cond_), ("%s", (msg_)))
 #endif
 
 #endif /* !defined(MPIR_ASSERT_H_INCLUDED) */
diff --git a/src/include/mpir_attr.h b/src/include/mpir_attr.h
index 12f295c..ca6e7ba 100644
--- a/src/include/mpir_attr.h
+++ b/src/include/mpir_attr.h
@@ -13,7 +13,7 @@
    only store generic copy and delete functions.  This allows us to use
    common code for the attribute set, delete, and dup functions */
 /*E
-  MPIR_Copy_function - MPID Structure to hold an attribute copy function
+  copy_function - MPID Structure to hold an attribute copy function
 
   Notes:
   The appropriate element of this union is selected by using the language
@@ -32,18 +32,18 @@
 
   E*/
 int
-MPIR_Attr_copy_c_proxy(
+MPII_Attr_copy_c_proxy(
     MPI_Comm_copy_attr_function* user_function,
     int handle,
     int keyval,
     void* extra_state,
-    MPIR_AttrType attrib_type,
+    MPIR_Attr_type attrib_type,
     void* attrib,
     void** attrib_copy,
     int* flag
     );
 
-typedef struct MPIR_Copy_function {
+typedef struct copy_function {
   int  (*C_CopyFunction)( int, int, void *, void *, void *, int * );
   void (*F77_CopyFunction)  ( MPI_Fint *, MPI_Fint *, MPI_Fint *, MPI_Fint *,
                               MPI_Fint *, MPI_Fint *, MPI_Fint * );
@@ -54,12 +54,12 @@ typedef struct MPIR_Copy_function {
    * Currently the lang-indpendent funcs are used only for keyvals
    */
   MPI_Comm_copy_attr_function *user_function;
-  MPIR_Attr_copy_proxy *proxy;
+  MPII_Attr_copy_proxy *proxy;
   /* The C++ function is the same as the C function */
-} MPIR_Copy_function;
+} copy_function;
 
 /*E
-  MPIR_Delete_function - MPID Structure to hold an attribute delete function
+  delete_function - MPID Structure to hold an attribute delete function
 
   Notes:
   The appropriate element of this union is selected by using the language
@@ -78,16 +78,16 @@ typedef struct MPIR_Copy_function {
 
   E*/
 int
-MPIR_Attr_delete_c_proxy(
+MPII_Attr_delete_c_proxy(
     MPI_Comm_delete_attr_function* user_function,
     int handle,
     int keyval,
-    MPIR_AttrType attrib_type,
+    MPIR_Attr_type attrib_type,
     void* attrib,
     void* extra_state
     );
 
-typedef struct MPIR_Delete_function {
+typedef struct delete_function {
   int  (*C_DeleteFunction)  ( int, int, void *, void * );
   void (*F77_DeleteFunction)( MPI_Fint *, MPI_Fint *, MPI_Fint *, MPI_Fint *,
                               MPI_Fint * );
@@ -98,37 +98,37 @@ typedef struct MPIR_Delete_function {
    * Currently the lang-indpendent funcs are used only for keyvals
    */
   MPI_Comm_delete_attr_function *user_function;
-  MPIR_Attr_delete_proxy *proxy;
-} MPIR_Delete_function;
+  MPII_Attr_delete_proxy *proxy;
+} delete_function;
 
 /*S
-  MPIR_Keyval - Structure of an MPID keyval
+  MPII_Keyval - Structure of an MPID keyval
 
   Module:
   Attribute-DS
 
   S*/
-typedef struct MPIR_Keyval {
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
-    MPIR_Object_kind     kind;
+typedef struct MPII_Keyval {
+    MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
+    MPII_Object_kind     kind;
     int                  was_freed;
     void                 *extra_state;
-    MPIR_Copy_function   copyfn;
-    MPIR_Delete_function delfn;
+    copy_function   copyfn;
+    delete_function delfn;
   /* other, device-specific information */
 #ifdef MPID_DEV_KEYVAL_DECL
     MPID_DEV_KEYVAL_DECL
 #endif
-} MPIR_Keyval;
+} MPII_Keyval;
 
-#define MPIR_Keyval_add_ref( _keyval )                                  \
+#define MPII_Keyval_add_ref( _keyval )                                  \
     do {                                                                \
-        MPIU_Object_add_ref( _keyval );                                 \
+        MPIR_Object_add_ref( _keyval );                                 \
     } while(0)
 
-#define MPIR_Keyval_release_ref( _keyval, _inuse )                      \
+#define MPII_Keyval_release_ref( _keyval, _inuse )                      \
     do {                                                                \
-        MPIU_Object_release_ref( _keyval, _inuse );                     \
+        MPIR_Object_release_ref( _keyval, _inuse );                     \
     } while(0)
 
 
@@ -138,9 +138,9 @@ typedef struct MPIR_Keyval {
    to work with the datatype code used in the I/O library.  While this
    is really a limitation in the current Datatype implementation. */
 #ifdef USE_AINT_FOR_ATTRVAL
-typedef MPI_Aint MPIR_AttrVal_t;
+typedef MPI_Aint MPII_Attr_val_t;
 #else
-typedef void * MPIR_AttrVal_t;
+typedef void * MPII_Attr_val_t;
 #endif
 
 /* Attributes need no ref count or handle, but since we want to use the
@@ -185,14 +185,14 @@ typedef void * MPIR_AttrVal_t;
 
  S*/
 typedef struct MPIR_Attribute {
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
-    MPIR_Keyval  *keyval;           /* Keyval structure for this attribute */
+    MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
+    MPII_Keyval  *keyval;           /* Keyval structure for this attribute */
 
     struct MPIR_Attribute *next;    /* Pointer to next in the list */
-    MPIR_AttrType attrType;         /* Type of the attribute */
+    MPIR_Attr_type attrType;         /* Type of the attribute */
     long        pre_sentinal;       /* Used to detect user errors in accessing
 				       the value */
-    MPIR_AttrVal_t value;           /* Stored value. An Aint must be at least
+    MPII_Attr_val_t value;           /* Stored value. An Aint must be at least
 				       as large as an address - some builds
 				       may make an Aint larger than a void * */
     long        post_sentinal;      /* Like pre_sentinal */
diff --git a/src/include/mpir_attr_generic.h b/src/include/mpir_attr_generic.h
index 7bf6b48..72d7aeb 100644
--- a/src/include/mpir_attr_generic.h
+++ b/src/include/mpir_attr_generic.h
@@ -23,7 +23,7 @@
   Unfortunately, these have a slightly different calling sequence for
   each language, particularly when the size of a pointer is
   different from the size of a Fortran integer.  The unions
-  'MPIR_Copy_function' and 'MPIR_Delete_function' capture the differences
+  'copy_function' and 'delete_function' capture the differences
   in a single union type.
 
   The above comment is out of date but has never been updated as it should
@@ -115,18 +115,18 @@ typedef enum {
     MPIR_ATTR_PTR=0,
     MPIR_ATTR_AINT=1,
     MPIR_ATTR_INT=3
-} MPIR_AttrType;
+} MPIR_Attr_type;
 
-#define MPIR_ATTR_KIND(_a) (_a & 0x1)
+#define MPII_ATTR_KIND(_a) (_a & 0x1)
 
-int MPIR_CommSetAttr( MPI_Comm, int, void *, MPIR_AttrType );
-int MPIR_TypeSetAttr( MPI_Datatype, int, void *, MPIR_AttrType );
-int MPIR_WinSetAttr( MPI_Win, int, void *, MPIR_AttrType );
-int MPIR_CommGetAttr( MPI_Comm, int, void *, int *, MPIR_AttrType );
-int MPIR_TypeGetAttr( MPI_Datatype, int, void *, int *, MPIR_AttrType );
-int MPIR_WinGetAttr( MPI_Win, int, void *, int *, MPIR_AttrType );
+int MPII_Comm_set_attr( MPI_Comm, int, void *, MPIR_Attr_type );
+int MPII_Type_set_attr( MPI_Datatype, int, void *, MPIR_Attr_type );
+int MPII_Win_set_attr( MPI_Win, int, void *, MPIR_Attr_type );
+int MPII_Comm_get_attr( MPI_Comm, int, void *, int *, MPIR_Attr_type );
+int MPII_Type_get_attr( MPI_Datatype, int, void *, int *, MPIR_Attr_type );
+int MPII_Win_get_attr( MPI_Win, int, void *, int *, MPIR_Attr_type );
 
-int MPIR_CommGetAttr_fort( MPI_Comm, int, void *, int *, MPIR_AttrType );
+int MPII_Comm_get_attr_fort( MPI_Comm, int, void *, int *, MPIR_Attr_type );
 
 
 #if defined(__cplusplus)
@@ -152,12 +152,12 @@ extern "C" {
  */
 typedef
 int
-(MPIR_Attr_copy_proxy)(
+(MPII_Attr_copy_proxy)(
     MPI_Comm_copy_attr_function* user_function,
     int handle,
     int keyval,
     void* extra_state,
-    MPIR_AttrType attrib_type,
+    MPIR_Attr_type attrib_type,
     void* attrib,
     void** attrib_copy,
     int* flag
@@ -165,20 +165,20 @@ int
 
 typedef
 int
-(MPIR_Attr_delete_proxy)(
+(MPII_Attr_delete_proxy)(
     MPI_Comm_delete_attr_function* user_function,
     int handle,
     int keyval,
-    MPIR_AttrType attrib_type,
+    MPIR_Attr_type attrib_type,
     void* attrib,
     void* extra_state
     );
 
 void
-MPIR_Keyval_set_proxy(
+MPII_Keyval_set_proxy(
     int keyval,
-    MPIR_Attr_copy_proxy copy_proxy,
-    MPIR_Attr_delete_proxy delete_proxy
+    MPII_Attr_copy_proxy copy_proxy,
+    MPII_Attr_delete_proxy delete_proxy
     );
 
 #if defined(__cplusplus)
diff --git a/src/include/mpir_coll.h b/src/include/mpir_coll.h
index ff6c1e1..3b3a0c4 100644
--- a/src/include/mpir_coll.h
+++ b/src/include/mpir_coll.h
@@ -45,51 +45,51 @@ typedef struct MPIR_Collops {
                            MPIR_Comm *, MPIR_Errflag_t *);
 
     /* MPI-3 nonblocking collectives */
-    int (*Ibarrier_sched)(MPIR_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Ibarrier_sched)(MPIR_Comm *comm_ptr, MPIR_Sched_t s);
     int (*Ibcast_sched)(void *buffer, int count, MPI_Datatype datatype, int root,
-                  MPIR_Comm *comm_ptr, MPID_Sched_t s);
+                  MPIR_Comm *comm_ptr, MPIR_Sched_t s);
     int (*Igather_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                    int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr,
-                   MPID_Sched_t s);
+                   MPIR_Sched_t s);
     int (*Igatherv_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                     const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root,
-                    MPIR_Comm *comm_ptr, MPID_Sched_t s);
+                    MPIR_Comm *comm_ptr, MPIR_Sched_t s);
     int (*Iscatter_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                     int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr,
-                    MPID_Sched_t s);
+                    MPIR_Sched_t s);
     int (*Iscatterv_sched)(const void *sendbuf, const int *sendcounts, const int *displs,
                      MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                     int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+                     int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
     int (*Iallgather_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                       int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
-                      MPID_Sched_t s);
+                      MPIR_Sched_t s);
     int (*Iallgatherv_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                        const int *recvcounts, const int *displs, MPI_Datatype recvtype,
-                       MPIR_Comm *comm_ptr, MPID_Sched_t s);
+                       MPIR_Comm *comm_ptr, MPIR_Sched_t s);
     int (*Ialltoall_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                      int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
-                     MPID_Sched_t s);
+                     MPIR_Sched_t s);
     int (*Ialltoallv_sched)(const void *sendbuf, const int *sendcounts, const int *sdispls,
                       MPI_Datatype sendtype, void *recvbuf, const int *recvcounts,
                       const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
-                      MPID_Sched_t s);
+                      MPIR_Sched_t s);
     int (*Ialltoallw_sched)(const void *sendbuf, const int *sendcounts, const int *sdispls,
                       const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts,
                       const int *rdispls, const MPI_Datatype *recvtypes,
-                      MPIR_Comm *comm_ptr, MPID_Sched_t s);
+                      MPIR_Comm *comm_ptr, MPIR_Sched_t s);
     int (*Ireduce_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
-                   int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+                   int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
     int (*Iallreduce_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                      MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+                      MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
     int (*Ireduce_scatter_sched)(const void *sendbuf, void *recvbuf, const int *recvcounts,
-                           MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+                           MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
     int (*Ireduce_scatter_block_sched)(const void *sendbuf, void *recvbuf, int recvcount,
                                  MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr,
-                                 MPID_Sched_t s);
+                                 MPIR_Sched_t s);
     int (*Iscan_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
-                 MPIR_Comm *comm_ptr, MPID_Sched_t s);
+                 MPIR_Comm *comm_ptr, MPIR_Sched_t s);
     int (*Iexscan_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
-                   MPIR_Comm *comm_ptr, MPID_Sched_t s);
+                   MPIR_Comm *comm_ptr, MPIR_Sched_t s);
 
     struct MPIR_Collops *prev_coll_fns; /* when overriding this table, set this to point to the old table */
 
@@ -112,21 +112,21 @@ typedef struct MPIR_Collops {
                               MPIR_Comm *comm_ptr);
     int (*Ineighbor_allgather)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                                void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                               MPIR_Comm *comm_ptr, MPID_Sched_t s);
+                               MPIR_Comm *comm_ptr, MPIR_Sched_t s);
     int (*Ineighbor_allgatherv)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                                 void *recvbuf, const int recvcounts[], const int displs[],
-                                MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+                                MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
     int (*Ineighbor_alltoall)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                               void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                              MPIR_Comm *comm_ptr, MPID_Sched_t s);
+                              MPIR_Comm *comm_ptr, MPIR_Sched_t s);
     int (*Ineighbor_alltoallv)(const void *sendbuf, const int sendcounts[], const int sdispls[],
                                MPI_Datatype sendtype, void *recvbuf, const int recvcounts[],
                                const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
-                               MPID_Sched_t s);
+                               MPIR_Sched_t s);
     int (*Ineighbor_alltoallw)(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[],
                                const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[],
                                const MPI_Aint rdispls[], const MPI_Datatype recvtypes[],
-                               MPIR_Comm *comm_ptr, MPID_Sched_t s);
+                               MPIR_Comm *comm_ptr, MPIR_Sched_t s);
 } MPIR_Collops;
 
 
@@ -378,74 +378,74 @@ int MPIR_Neighbor_allgatherv_default(const void *sendbuf, int sendcount, MPI_Dat
 int MPIR_Neighbor_alltoall_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
 int MPIR_Neighbor_alltoallv_default(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
 int MPIR_Neighbor_alltoallw_default(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm *comm_ptr);
-int MPIR_Ineighbor_allgather_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ineighbor_allgatherv_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ineighbor_alltoall_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ineighbor_alltoallv_default(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ineighbor_alltoallw_default(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ineighbor_allgather_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ineighbor_allgatherv_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ineighbor_alltoall_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ineighbor_alltoallv_default(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ineighbor_alltoallw_default(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm *comm_ptr, MPIR_Sched_t s);
 
 
 /* nonblocking collective default algorithms */
-int MPIR_Ibcast_intra(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ibcast_binomial(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iscatter_for_bcast(void *tmp_buf, int root, MPIR_Comm *comm_ptr, int nbytes, MPID_Sched_t s);
-int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ibcast_scatter_ring_allgather(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ibarrier_intra(MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ibarrier_inter(MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoallv_intra(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoallv_inter(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallreduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallreduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallreduce_naive(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallreduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallreduce_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Igather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Igather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iscatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iscatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iscatterv(const void *sendbuf, const int *sendcounts, const int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_intra(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_inter(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_rec_dbl(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_rec_hlv(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_pairwise(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_block_intra(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_block_rec_hlv(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_block_pairwise(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_block_rec_dbl(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_block_noncomm(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoall_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoall_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoall_inplace(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoall_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoall_perm_sr(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoall_pairwise(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgather_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgather_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgather_ring(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgatherv_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgatherv_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgatherv_ring(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgatherv_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgatherv_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iscan_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoallw_intra(const void *sendbuf, const int *sendcounts, const int *sdispls, const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts, const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoallw_inter(const void *sendbuf, const int *sendcounts, const int *sdispls, const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts, const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ibcast_intra(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ibcast_binomial(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iscatter_for_bcast(void *tmp_buf, int root, MPIR_Comm *comm_ptr, int nbytes, MPIR_Sched_t s);
+int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ibcast_scatter_ring_allgather(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ibarrier_intra(MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ibarrier_inter(MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ialltoallv_intra(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ialltoallv_inter(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallreduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallreduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallreduce_naive(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallreduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallreduce_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Igather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Igather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iscatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iscatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iscatterv(const void *sendbuf, const int *sendcounts, const int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_scatter_intra(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_scatter_inter(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_scatter_rec_dbl(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_scatter_rec_hlv(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_scatter_pairwise(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_scatter_block_intra(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_scatter_block_rec_hlv(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_scatter_block_pairwise(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_scatter_block_rec_dbl(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ireduce_scatter_block_noncomm(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ialltoall_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ialltoall_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ialltoall_inplace(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ialltoall_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ialltoall_perm_sr(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ialltoall_pairwise(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallgather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallgather_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallgather_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallgather_ring(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallgatherv_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallgatherv_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallgatherv_ring(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallgatherv_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iallgatherv_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iscan_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ialltoallw_intra(const void *sendbuf, const int *sendcounts, const int *sdispls, const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts, const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
+int MPIR_Ialltoallw_inter(const void *sendbuf, const int *sendcounts, const int *sdispls, const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts, const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr, MPIR_Sched_t s);
 
 #endif /* MPIR_COLL_H_INCLUDED */
diff --git a/src/include/mpir_comm.h b/src/include/mpir_comm.h
index 57bd1ed..babfa5d 100644
--- a/src/include/mpir_comm.h
+++ b/src/include/mpir_comm.h
@@ -131,10 +131,10 @@ int MPIR_Comm_map_free(struct MPIR_Comm *comm);
   communicator have acked.
   S*/
 struct MPIR_Comm {
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
     MPID_Thread_mutex_t mutex;
-    MPIU_Context_id_t context_id; /* Send context id.  See notes */
-    MPIU_Context_id_t recvcontext_id; /* Send context id.  See notes */
+    MPIR_Context_id_t context_id; /* Send context id.  See notes */
+    MPIR_Context_id_t recvcontext_id; /* Send context id.  See notes */
     int           remote_size;   /* Value of MPI_Comm_(remote)_size */
     int           rank;          /* Value of MPI_Comm_rank */
     MPIR_Attribute *attributes;  /* List of attributes */
@@ -172,7 +172,7 @@ struct MPIR_Comm {
     struct MPIR_Collops  *coll_fns; /* Pointer to a table of functions
                                               implementing the collective
                                               routines */
-    struct MPIR_TopoOps  *topo_fns; /* Pointer to a table of functions
+    struct MPII_Topo_ops  *topo_fns; /* Pointer to a table of functions
 				       implementting the topology routines */
     int next_sched_tag;             /* used by the NBC schedule code to allocate tags */
 
@@ -199,7 +199,7 @@ struct MPIR_Comm {
     MPID_DEV_COMM_DECL
 #endif
 };
-extern MPIU_Object_alloc_t MPIR_Comm_mem;
+extern MPIR_Object_alloc_t MPIR_Comm_mem;
 
 typedef struct MPIR_Gpid {
 #ifdef MPID_DEV_GPID_DECL
@@ -213,9 +213,9 @@ typedef struct MPIR_Gpid {
 int MPIR_Comm_delete_internal(MPIR_Comm * comm_ptr);
 
 #define MPIR_Comm_add_ref(_comm) \
-    do { MPIU_Object_add_ref((_comm)); } while (0)
+    do { MPIR_Object_add_ref((_comm)); } while (0)
 #define MPIR_Comm_release_ref( _comm, _inuse ) \
-    do { MPIU_Object_release_ref( _comm, _inuse ); } while (0)
+    do { MPIR_Object_release_ref( _comm, _inuse ); } while (0)
 
 
 /* Release a reference to a communicator.  If there are no pending
@@ -253,31 +253,10 @@ static inline int MPIR_Comm_release(MPIR_Comm * comm_ptr)
 */
 int MPIR_Comm_release_always(MPIR_Comm *comm_ptr);
 
-/* applies the specified info chain to the specified communicator */
-int MPIR_Comm_apply_hints(MPIR_Comm *comm_ptr, MPIR_Info *info_ptr);
-
-int MPIR_Comm_copy( MPIR_Comm *, int, MPIR_Comm ** );
-int MPIR_Comm_copy_data(MPIR_Comm *comm_ptr, MPIR_Comm **outcomm_ptr);
-
-int MPIR_Setup_intercomm_localcomm( MPIR_Comm * );
-
 int MPIR_Comm_create( MPIR_Comm ** );
 int MPIR_Comm_create_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr, int tag,
                            MPIR_Comm ** newcomm);
 
-/* comm_create helper functions, used by both comm_create and comm_create_group */
-int MPIR_Comm_create_calculate_mapping(MPIR_Group  *group_ptr,
-                                       MPIR_Comm   *comm_ptr,
-                                       int        **mapping_out,
-                                       MPIR_Comm **mapping_comm);
-
-int MPIR_Comm_create_map(int local_n,
-                         int remote_n,
-                         int *local_mapping,
-                         int *remote_mapping,
-                         MPIR_Comm *mapping_comm,
-                         MPIR_Comm *newcomm);
-
 /* implements the logic for MPI_Comm_create for intracommunicators only */
 int MPIR_Comm_create_intra(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr,
                            MPIR_Comm **newcomm_ptr);
@@ -287,8 +266,6 @@ int MPIR_Comm_commit( MPIR_Comm * );
 
 int MPIR_Comm_is_node_aware( MPIR_Comm * );
 
-int MPIR_Comm_is_node_consecutive( MPIR_Comm *);
-
 int MPIR_Comm_idup_impl(MPIR_Comm *comm_ptr, MPIR_Comm **newcomm, MPIR_Request **reqp);
 
 int MPIR_Comm_shrink(MPIR_Comm *comm_ptr, MPIR_Comm **newcomm_ptr);
@@ -298,8 +275,6 @@ int MPIR_Comm_agree(MPIR_Comm *comm_ptr, int *flag);
 int MPIR_Comm_split_filesystem(MPI_Comm comm, int key, const char *dirname, MPI_Comm *newcomm);
 #endif
 
-int MPIR_Comm_init(MPIR_Comm *);
-
 #define MPIR_Comm_rank(comm_ptr) ((comm_ptr)->rank)
 #define MPIR_Comm_size(comm_ptr) ((comm_ptr)->local_size)
 
@@ -307,7 +282,7 @@ int MPIR_Comm_init(MPIR_Comm *);
 typedef int (*MPIR_Comm_hint_fn_t)(MPIR_Comm *, MPIR_Info *, void *);
 int MPIR_Comm_register_hint(const char *hint_key, MPIR_Comm_hint_fn_t fn, void *state);
 
-int MPIR_Comm_delete_attr_impl(MPIR_Comm *comm_ptr, MPIR_Keyval *keyval_ptr);
+int MPIR_Comm_delete_attr_impl(MPIR_Comm *comm_ptr, MPII_Keyval *keyval_ptr);
 int MPIR_Comm_create_keyval_impl(MPI_Comm_copy_attr_function *comm_copy_attr_fn,
                                  MPI_Comm_delete_attr_function *comm_delete_attr_fn,
                                  int *comm_keyval, void *extra_state);
@@ -338,7 +313,7 @@ int MPIR_Comm_split_impl(MPIR_Comm *comm_ptr, int color, int key, MPIR_Comm **ne
 int MPIR_Comm_split_type_impl(MPIR_Comm *comm_ptr, int split_type, int key, MPIR_Info *info_ptr,
                               MPIR_Comm **newcomm_ptr);
 int MPIR_Comm_set_attr_impl(MPIR_Comm *comm_ptr, int comm_keyval, void *attribute_val,
-                            MPIR_AttrType attrType);
+                            MPIR_Attr_type attrType);
 
 
 /* Preallocated comm objects.  There are 3: comm_world, comm_self, and
@@ -352,14 +327,37 @@ extern MPIR_Comm MPIR_Comm_direct[];
    of the handle is 3-1 (e.g., the index in the builtin array) */
 #define MPIR_ICOMM_WORLD  ((MPI_Comm)0x44000002)
 
-#ifndef HAVE_DEV_COMM_HOOK
-#define MPID_Dev_comm_create_hook( a ) MPI_SUCCESS
-#define MPID_Dev_comm_destroy_hook( a ) MPI_SUCCESS
-#endif
-
 typedef struct MPIR_Commops {
     int (*split_type)(MPIR_Comm *, int, int, MPIR_Info *, MPIR_Comm **);
 } MPIR_Commops;
 extern struct MPIR_Commops  *MPIR_Comm_fns; /* Communicator creation functions */
 
+
+/* internal functions */
+
+int MPII_Comm_init(MPIR_Comm *);
+
+int MPII_Comm_is_node_consecutive( MPIR_Comm *);
+
+/* applies the specified info chain to the specified communicator */
+int MPII_Comm_apply_hints(MPIR_Comm *comm_ptr, MPIR_Info *info_ptr);
+
+int MPII_Comm_copy( MPIR_Comm *, int, MPIR_Comm ** );
+int MPII_Comm_copy_data(MPIR_Comm *comm_ptr, MPIR_Comm **outcomm_ptr);
+
+int MPII_Setup_intercomm_localcomm( MPIR_Comm * );
+
+/* comm_create helper functions, used by both comm_create and comm_create_group */
+int MPII_Comm_create_calculate_mapping(MPIR_Group  *group_ptr,
+                                       MPIR_Comm   *comm_ptr,
+                                       int        **mapping_out,
+                                       MPIR_Comm **mapping_comm);
+
+int MPII_Comm_create_map(int local_n,
+                         int remote_n,
+                         int *local_mapping,
+                         int *remote_mapping,
+                         MPIR_Comm *mapping_comm,
+                         MPIR_Comm *newcomm);
+
 #endif /* MPIR_COMM_H_INCLUDED */
diff --git a/src/include/mpir_contextid.h b/src/include/mpir_contextid.h
index 571cd88..339e6e8 100644
--- a/src/include/mpir_contextid.h
+++ b/src/include/mpir_contextid.h
@@ -8,9 +8,9 @@
 #ifndef MPIR_CONTEXTID_H_INCLUDED
 #define MPIR_CONTEXTID_H_INCLUDED
 
-#define MPIU_CONTEXT_ID_T_DATATYPE MPI_UINT16_T
-typedef uint16_t MPIU_Context_id_t;
-#define MPIU_INVALID_CONTEXT_ID ((MPIU_Context_id_t)0xffff)
+#define MPIR_CONTEXT_ID_T_DATATYPE MPI_UINT16_T
+typedef uint16_t MPIR_Context_id_t;
+#define MPIR_INVALID_CONTEXT_ID ((MPIR_Context_id_t)0xffff)
 
 /* The following preprocessor macros provide bitfield access information for
  * context ID values.  They follow a uniform naming pattern:
@@ -86,7 +86,7 @@ typedef uint16_t MPIU_Context_id_t;
 
 /* should probably be (sizeof(int)*CHAR_BITS) once we make the code CHAR_BITS-clean */
 #define MPIR_CONTEXT_INT_BITS (32)
-#define MPIR_CONTEXT_ID_BITS (sizeof(MPIU_Context_id_t)*8) /* 8 --> CHAR_BITS eventually */
+#define MPIR_CONTEXT_ID_BITS (sizeof(MPIR_Context_id_t)*8) /* 8 --> CHAR_BITS eventually */
 #define MPIR_MAX_CONTEXT_MASK \
     ((1 << (MPIR_CONTEXT_ID_BITS - (MPIR_CONTEXT_PREFIX_SHIFT + MPIR_CONTEXT_DYNAMIC_PROC_WIDTH))) / MPIR_CONTEXT_INT_BITS)
 
@@ -94,12 +94,12 @@ typedef uint16_t MPIU_Context_id_t;
    with the other comm routines (src/mpi/comm, in mpicomm.h).  However,
    to create a new communicator after a spawn or connect-accept operation,
    the device may need to create a new contextid */
-int MPIR_Get_contextid_sparse(MPIR_Comm *comm_ptr, MPIU_Context_id_t *context_id, int ignore_id);
-int MPIR_Get_contextid_sparse_group(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr, int tag, MPIU_Context_id_t *context_id, int ignore_id);
+int MPIR_Get_contextid_sparse(MPIR_Comm *comm_ptr, MPIR_Context_id_t *context_id, int ignore_id);
+int MPIR_Get_contextid_sparse_group(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr, int tag, MPIR_Context_id_t *context_id, int ignore_id);
 
 int MPIR_Get_contextid_nonblock(MPIR_Comm *comm_ptr, MPIR_Comm *newcommp, MPIR_Request **req);
 int MPIR_Get_intercomm_contextid_nonblock(MPIR_Comm *comm_ptr, MPIR_Comm *newcommp, MPIR_Request **req);
 
-void MPIR_Free_contextid( MPIU_Context_id_t );
+void MPIR_Free_contextid( MPIR_Context_id_t );
 
 #endif /* MPIR_CONTEXTID_H_INCLUDED */
diff --git a/src/include/mpir_cxxinterface.h b/src/include/mpir_cxxinterface.h
deleted file mode 100644
index 4bb3fdf..0000000
--- a/src/include/mpir_cxxinterface.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- *  (C) 2001 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- *
- */
-
-#ifndef MPIR_CXX_INTERFACE_H_INCLUDED
-#define MPIR_CXX_INTERFACE_H_INCLUDED
-
-extern void MPIR_Keyval_set_cxx( int, void (*)(void), void (*)(void) );
-extern void MPIR_Op_set_cxx( MPI_Op, void (*)(void) );
-extern void MPIR_Errhandler_set_cxx( MPI_Errhandler, void (*)(void) );
-
-#endif /* MPIR_CXX_INTERFACE_H_INCLUDED */
diff --git a/src/include/mpir_datatype.h b/src/include/mpir_datatype.h
index 6910c7c..f94abb5 100644
--- a/src/include/mpir_datatype.h
+++ b/src/include/mpir_datatype.h
@@ -10,7 +10,7 @@
 
 /* This routine is used to install an attribute free routine for datatypes
    at finalize-time */
-void MPIR_DatatypeAttrFinalize( void );
+void MPII_Datatype_attr_finalize( void );
 
 #define MPIR_DATATYPE_IS_PREDEFINED(type) \
     ((HANDLE_GET_KIND(type) == HANDLE_KIND_BUILTIN) || \
@@ -24,10 +24,6 @@ void MPIR_Type_get_extent_x_impl(MPI_Datatype datatype, MPI_Count *lb, MPI_Count
 void MPIR_Type_get_true_extent_x_impl(MPI_Datatype datatype, MPI_Count *true_lb, MPI_Count *true_extent);
 int MPIR_Type_size_x_impl(MPI_Datatype datatype, MPI_Count *size);
 
-#define MPIR_Type_extent_impl(datatype, extent_ptr) MPID_Datatype_get_extent_macro(datatype, *(extent_ptr))
-#define MPIR_Type_size_impl(datatype, size) MPID_Datatype_get_size_macro(datatype, *(size))
-#define MPIR_Test_cancelled_impl(status, flag) *(flag) = MPIR_STATUS_GET_CANCEL_BIT(*(status))
-
 void MPIR_Get_count_impl(const MPI_Status *status, MPI_Datatype datatype, int *count);
 int MPIR_Type_commit_impl(MPI_Datatype *datatype);
 int MPIR_Type_create_struct_impl(int count,
diff --git a/src/include/mpir_debugger.h b/src/include/mpir_debugger.h
index 708791b..67dc5f8 100644
--- a/src/include/mpir_debugger.h
+++ b/src/include/mpir_debugger.h
@@ -13,22 +13,24 @@
    when specifically requested
 */
 #ifdef HAVE_DEBUGGER_SUPPORT
-void MPIR_WaitForDebugger( void );
-void MPIR_DebuggerSetAborting( const char * );
-void MPIR_Sendq_remember(MPIR_Request *, int, int, int );
-void MPIR_Sendq_forget(MPIR_Request *);
-void MPIR_CommL_remember( MPIR_Comm * );
-void MPIR_CommL_forget( MPIR_Comm * );
+void MPIR_Debugger_set_aborting( const char * );
 
-#define MPIR_SENDQ_REMEMBER(_a,_b,_c,_d) MPIR_Sendq_remember(_a,_b,_c,_d)
-#define MPIR_SENDQ_FORGET(_a) MPIR_Sendq_forget(_a)
-#define MPIR_COMML_REMEMBER(_a) MPIR_CommL_remember( _a )
-#define MPIR_COMML_FORGET(_a) MPIR_CommL_forget( _a )
+/* internal functions */
+void MPII_Wait_for_debugger( void );
+void MPII_Sendq_remember(MPIR_Request *, int, int, int );
+void MPII_Sendq_forget(MPIR_Request *);
+void MPII_CommL_remember( MPIR_Comm * );
+void MPII_CommL_forget( MPIR_Comm * );
+
+#define MPII_SENDQ_REMEMBER(_a,_b,_c,_d) MPII_Sendq_remember(_a,_b,_c,_d)
+#define MPII_SENDQ_FORGET(_a) MPII_Sendq_forget(_a)
+#define MPII_COMML_REMEMBER(_a) MPII_CommL_remember( _a )
+#define MPII_COMML_FORGET(_a) MPII_CommL_forget( _a )
 #else
-#define MPIR_SENDQ_REMEMBER(a,b,c,d)
-#define MPIR_SENDQ_FORGET(a)
-#define MPIR_COMML_REMEMBER(_a)
-#define MPIR_COMML_FORGET(_a)
+#define MPII_SENDQ_REMEMBER(a,b,c,d)
+#define MPII_SENDQ_FORGET(a)
+#define MPII_COMML_REMEMBER(_a)
+#define MPII_COMML_FORGET(_a)
 #endif
 
 #endif /* MPIR_DEBUGGER_H_INCLUDED */
diff --git a/src/include/mpir_err.h b/src/include/mpir_err.h
index 3ba9808..f4399fd 100644
--- a/src/include/mpir_err.h
+++ b/src/include/mpir_err.h
@@ -754,7 +754,7 @@ cvars:
 
 /* some simple memcpy aliasing checks */
 #define MPIR_ERR_CHKMEMCPYANDSTMT(err_,stmt_,src_,dst_,len_) \
-    MPIR_ERR_CHKANDSTMT3(MPIU_MEM_RANGES_OVERLAP((dst_),(len_),(src_),(len_)),err_,MPI_ERR_INTERN,stmt_,"**memcpyalias","**memcpyalias %p %p %L",(src_),(dst_),(long long)(len_))
+    MPIR_ERR_CHKANDSTMT3(MPIR_MEM_RANGES_OVERLAP((dst_),(len_),(src_),(len_)),err_,MPI_ERR_INTERN,stmt_,"**memcpyalias","**memcpyalias %p %p %L",(src_),(dst_),(long long)(len_))
 #define MPIR_ERR_CHKMEMCPYANDJUMP(err_,src_,dst_,len_) \
     MPIR_ERR_CHKMEMCPYANDSTMT((err_),goto fn_fail,(src_),(dst_),(len_))
 
diff --git a/src/include/mpir_errhandler.h b/src/include/mpir_errhandler.h
index 53a06db..3fb9d88 100644
--- a/src/include/mpir_errhandler.h
+++ b/src/include/mpir_errhandler.h
@@ -9,7 +9,7 @@
 #define MPIR_ERRHANDLER_H_INCLUDED
 
 /*E
-  MPIR_Errhandler_fn - MPID Structure to hold an error handler function
+  errhandler_fn - MPIR Structure to hold an error handler function
 
   Notes:
   The MPI-1 Standard declared only the C version of this, implicitly
@@ -30,12 +30,12 @@
   of the union)?
 
   E*/
-typedef union MPIR_Errhandler_fn {
+typedef union errhandler_fn {
    void (*C_Comm_Handler_function) ( MPI_Comm *, int *, ... );
    void (*F77_Handler_function) ( MPI_Fint *, MPI_Fint * );
    void (*C_Win_Handler_function) ( MPI_Win *, int *, ... );
    void (*C_File_Handler_function) ( MPI_File *, int *, ... );
-} MPIR_Errhandler_fn;
+} errhandler_fn;
 
 /*S
   MPIR_Errhandler - Description of the error handler structure
@@ -56,16 +56,16 @@ typedef union MPIR_Errhandler_fn {
   ErrHand-DS
   S*/
 typedef struct MPIR_Errhandler {
-  MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+  MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
   MPIR_Lang_t        language;
-  MPIR_Object_kind   kind;
-  MPIR_Errhandler_fn errfn;
+  MPII_Object_kind   kind;
+  errhandler_fn errfn;
   /* Other, device-specific information */
 #ifdef MPID_DEV_ERRHANDLER_DECL
     MPID_DEV_ERRHANDLER_DECL
 #endif
 } MPIR_Errhandler;
-extern MPIU_Object_alloc_t MPIR_Errhandler_mem;
+extern MPIR_Object_alloc_t MPIR_Errhandler_mem;
 /* Preallocated errhandler objects */
 extern MPIR_Errhandler MPIR_Errhandler_builtin[];
 extern MPIR_Errhandler MPIR_Errhandler_direct[];
@@ -74,17 +74,17 @@ extern MPIR_Errhandler MPIR_Errhandler_direct[];
  * we decide to reference count the other predefined objects.  If we get to the
  * point where we never reference count *any* of the builtin objects then we
  * should probably remove these checks and let them fall through to the checks
- * for BUILTIN down in the MPIU_Object_* routines. */
+ * for BUILTIN down in the MPIR_Object_* routines. */
 #define MPIR_Errhandler_add_ref( _errhand )                               \
     do {                                                                  \
         if (HANDLE_GET_KIND((_errhand)->handle) != HANDLE_KIND_BUILTIN) { \
-            MPIU_Object_add_ref( _errhand );                              \
+            MPIR_Object_add_ref( _errhand );                              \
         }                                                                 \
     } while (0)
 #define MPIR_Errhandler_release_ref( _errhand, _inuse )                   \
     do {                                                                  \
         if (HANDLE_GET_KIND((_errhand)->handle) != HANDLE_KIND_BUILTIN) { \
-            MPIU_Object_release_ref( (_errhand), (_inuse) );              \
+            MPIR_Object_release_ref( (_errhand), (_inuse) );              \
         }                                                                 \
         else {                                                            \
             *(_inuse) = 1;                                                \
diff --git a/src/include/mpir_func.h b/src/include/mpir_func.h
index d4a37f3..8dd38c5 100644
--- a/src/include/mpir_func.h
+++ b/src/include/mpir_func.h
@@ -20,18 +20,6 @@
  * 3. Additional memory validation of the memory arena (--enable-g=memarena)
  */
 
-/* state declaration macros */
-#if defined(MPL_USE_DBG_LOGGING) || defined(MPICH_DEBUG_MEMARENA)
-#define MPID_MPI_STATE_DECL(a)
-#define MPID_MPI_INIT_STATE_DECL(a)
-#define MPID_MPI_FINALIZE_STATE_DECL(a)
-#define MPIDI_STATE_DECL(a)
-
-/* Tell the package to define the rest of the enter/exit macros in
-   terms of these */
-#define NEEDS_FUNC_ENTER_EXIT_DEFS 1
-#endif /* MPL_USE_DBG_LOGGING || MPICH_DEBUG_MEMARENA */
-
 /* function enter and exit macros */
 #if defined(MPL_USE_DBG_LOGGING)
 #define MPIR_FUNC_ENTER(a) MPL_DBG_MSG(MPL_DBG_ROUTINE_ENTER,TYPICAL,"Entering "#a)
@@ -45,167 +33,97 @@
 #define MPIR_FUNC_EXIT(a) MPL_trvalid("Leaving " #a)
 #endif
 
+/* state declaration macros */
+#if defined(MPL_USE_DBG_LOGGING) || defined(MPICH_DEBUG_MEMARENA)
 
-#if defined(NEEDS_FUNC_ENTER_EXIT_DEFS)
-
-#define MPID_MPI_FUNC_ENTER(a)			MPIR_FUNC_ENTER(a)
-#define MPID_MPI_FUNC_EXIT(a)			MPIR_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_FRONT(a)	MPIR_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_FRONT(a)	MPIR_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_BACK(a)	MPIR_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_BOTH(a)	MPIR_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BACK(a)	MPIR_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BOTH(a)	MPIR_FUNC_EXIT(a)
-#define MPID_MPI_COLL_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
-#define MPID_MPI_COLL_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
-#define MPID_MPI_RMA_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
-#define MPID_MPI_RMA_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
-#define MPID_MPI_INIT_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
-#define MPID_MPI_INIT_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
-#define MPID_MPI_FINALIZE_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
-#define MPID_MPI_FINALIZE_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
-
-/* device layer definitions */
-#define MPIDI_FUNC_ENTER(a)			MPIR_FUNC_ENTER(a)
-#define MPIDI_FUNC_EXIT(a)			MPIR_FUNC_EXIT(a)
-#define MPIDI_PT2PT_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
-#define MPIDI_RMA_FUNC_ENTER(a)			MPIR_FUNC_ENTER(a)
-#define MPIDI_RMA_FUNC_EXIT(a)			MPIR_FUNC_EXIT(a)
-
-/* evaporate the timing macros since timing is not selected */
-#define MPIU_Timer_init(rank, size)
-#define MPIU_Timer_finalize()
-
-#else   /* ! NEEDS_FUNC_ENTER_EXIT_DEFS */
-
-/* Possible values for timing */
-#define MPID_TIMING_KIND_NONE 0
-#define MPID_TIMING_KIND_TIME 1
-#define MPID_TIMING_KIND_LOG 2
-#define MPID_TIMING_KIND_LOG_DETAILED 3
-#define MPID_TIMING_KIND_ALL 4
-#define MPID_TIMING_KIND_RUNTIME 5
+#define MPIR_FUNC_TERSE_STATE_DECL(a)
+#define MPIR_FUNC_TERSE_INIT_STATE_DECL(a)
+#define MPIR_FUNC_TERSE_FINALIZE_STATE_DECL(a)
+#define MPIR_FUNC_TERSE_ENTER(a)		MPIR_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_EXIT(a)			MPIR_FUNC_EXIT(a)
+#define MPIR_FUNC_TERSE_PT2PT_ENTER(a)		MPIR_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_PT2PT_EXIT(a)		MPIR_FUNC_EXIT(a)
+#define MPIR_FUNC_TERSE_PT2PT_ENTER_FRONT(a)	MPIR_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_PT2PT_EXIT_FRONT(a)	MPIR_FUNC_EXIT(a)
+#define MPIR_FUNC_TERSE_PT2PT_ENTER_BACK(a)	MPIR_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_PT2PT_ENTER_BOTH(a)	MPIR_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_PT2PT_EXIT_BACK(a)	MPIR_FUNC_EXIT(a)
+#define MPIR_FUNC_TERSE_PT2PT_EXIT_BOTH(a)	MPIR_FUNC_EXIT(a)
+#define MPIR_FUNC_TERSE_COLL_ENTER(a)		MPIR_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_COLL_EXIT(a)		MPIR_FUNC_EXIT(a)
+#define MPIR_FUNC_TERSE_RMA_ENTER(a)		MPIR_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_RMA_EXIT(a)		MPIR_FUNC_EXIT(a)
+#define MPIR_FUNC_TERSE_INIT_ENTER(a)		MPIR_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_INIT_EXIT(a)		MPIR_FUNC_EXIT(a)
+#define MPIR_FUNC_TERSE_FINALIZE_ENTER(a)	MPIR_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_FINALIZE_EXIT(a)        MPIR_FUNC_EXIT(a)
+
+#define MPIR_FUNC_VERBOSE_STATE_DECL(a)
+#define MPIR_FUNC_VERBOSE_ENTER(a)	        MPIR_FUNC_ENTER(a)
+#define MPIR_FUNC_VERBOSE_EXIT(a)	        MPIR_FUNC_EXIT(a)
+#define MPIR_FUNC_VERBOSE_PT2PT_ENTER(a)	MPIR_FUNC_ENTER(a)
+#define MPIR_FUNC_VERBOSE_PT2PT_EXIT(a)		MPIR_FUNC_EXIT(a)
+#define MPIR_FUNC_VERBOSE_RMA_ENTER(a)		MPIR_FUNC_ENTER(a)
+#define MPIR_FUNC_VERBOSE_RMA_EXIT(a)		MPIR_FUNC_EXIT(a)
+
+#define MPII_Timer_init(rank, size)
+#define MPII_Timer_finalize()
+
+#else   /* ! defined(MPL_USE_DBG_LOGGING) && ! defined(MPICH_DEBUG_MEMARENA) */
 
 /* Routine tracing (see --enable-timing for control of this) */
-#if defined(HAVE_TIMING) && (HAVE_TIMING == MPID_TIMING_KIND_LOG || \
-    HAVE_TIMING == MPID_TIMING_KIND_LOG_DETAILED || \
-    HAVE_TIMING == MPID_TIMING_KIND_ALL || \
-    HAVE_TIMING == MPID_TIMING_KIND_RUNTIME)
+#if defined(HAVE_TIMING) && (HAVE_TIMING == MPICH_TIMING_KIND__LOG || \
+    HAVE_TIMING == MPICH_TIMING_KIND__LOG_DETAILED || \
+    HAVE_TIMING == MPICH_TIMING_KIND__ALL || \
+    HAVE_TIMING == MPICH_TIMING_KIND__RUNTIME)
 
-/* This include file contains the static state definitions */
 #include "mpiallstates.h"
 
-/* Possible values for USE_LOGGING */
-#define MPID_LOGGING_NONE 0
-#define MPID_LOGGING_RLOG 1
-#define MPID_LOGGING_EXTERNAL 4
-
-/* Include the macros specific to the selected logging library */
-#if (USE_LOGGING == MPID_LOGGING_RLOG)
+#if (USE_LOGGING == MPICH_LOGGING__RLOG)
 #include "rlog_macros.h"
-#elif (USE_LOGGING == MPID_LOGGING_EXTERNAL)
+#elif (USE_LOGGING == MPICH_LOGGING__EXTERNAL)
 #include "mpilogging.h"
 #else
 #error You must select a logging library if timing is enabled
 #endif
 
-/* MPI layer definitions */
-#define MPID_MPI_STATE_DECL(a)                MPIDU_STATE_DECL(a)
-#define MPID_MPI_INIT_STATE_DECL(a)           MPIDU_INIT_STATE_DECL(a)
-#define MPID_MPI_FINALIZE_STATE_DECL(a)       MPIDU_FINALIZE_STATE_DECL(a)
-
-#define MPID_MPI_FUNC_ENTER(a)                MPIDU_FUNC_ENTER(a)
-#define MPID_MPI_FUNC_EXIT(a)                 MPIDU_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER(a)          MPIDU_PT2PT_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT(a)           MPIDU_PT2PT_FUNC_EXIT(a)
-#define MPID_MPI_COLL_FUNC_ENTER(a)           MPIDU_COLL_FUNC_ENTER(a)
-#define MPID_MPI_COLL_FUNC_EXIT(a)            MPIDU_COLL_FUNC_EXIT(a)
-#define MPID_MPI_RMA_FUNC_ENTER(a)            MPIDU_RMA_FUNC_ENTER(a)
-#define MPID_MPI_RMA_FUNC_EXIT(a)             MPIDU_RMA_FUNC_EXIT(a)
-#define MPID_MPI_INIT_FUNC_ENTER(a)           MPIDU_INIT_FUNC_ENTER(a)
-#define MPID_MPI_INIT_FUNC_EXIT(a)            MPIDU_INIT_FUNC_EXIT(a)
-#define MPID_MPI_FINALIZE_FUNC_ENTER(a)       MPIDU_FINALIZE_FUNC_ENTER(a)
-#define MPID_MPI_FINALIZE_FUNC_EXIT(a)        MPIDU_FINALIZE_FUNC_EXIT(a)
-
-#define MPID_MPI_PT2PT_FUNC_ENTER_FRONT(a)    MPIDU_PT2PT_FUNC_ENTER_FRONT(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_FRONT(a)     MPIDU_PT2PT_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_BACK(a)     MPIDU_PT2PT_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BACK(a)      MPIDU_PT2PT_FUNC_EXIT_BACK(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BOTH(a)      MPIDU_PT2PT_FUNC_EXIT_BOTH(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_BOTH(a)     MPIDU_PT2PT_FUNC_ENTER_BOTH(a)
-
-#if defined(HAVE_TIMING) && (HAVE_TIMING == MPID_TIMING_KIND_LOG_DETAILED || HAVE_TIMING == MPID_TIMING_KIND_ALL)
-
-/* device layer definitions */
-#define MPIDI_STATE_DECL(a)                MPIDU_STATE_DECL(a)
-#define MPIDI_FUNC_ENTER(a)                MPIDU_FUNC_ENTER(a)
-#define MPIDI_FUNC_EXIT(a)                 MPIDU_FUNC_EXIT(a)
-#define MPIDI_PT2PT_FUNC_ENTER(a)          MPIDU_PT2PT_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_EXIT(a)           MPIDU_PT2PT_FUNC_EXIT(a)
-#define MPIDI_RMA_FUNC_ENTER(a)            MPIDU_RMA_FUNC_ENTER(a)
-#define MPIDI_RMA_FUNC_EXIT(a)             MPIDU_RMA_FUNC_EXIT(a)
-
-#else
-
-#define MPIDI_STATE_DECL(a)
-#define MPIDI_FUNC_ENTER(a)
-#define MPIDI_FUNC_EXIT(a)
-#define MPIDI_PT2PT_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_EXIT(a)
-#define MPIDI_RMA_FUNC_ENTER(a)
-#define MPIDI_RMA_FUNC_EXIT(a)
-
-#endif /* (HAVE_TIMING == MPID_TIMING_KIND_LOG_DETAILED || HAVE_TIMING == MPID_TIMING_KIND_ALL) */
-
-/* prototype the initialization/finalization functions */
-int MPIU_Timer_init(int rank, int size);
-int MPIU_Timer_finalize(void);
-int MPIR_Describe_timer_states(void);
-
-/* The original statistics macros (see the design documentation)
-   have been superceeded by the MPIR_T_PVAR_* macros (see mpit.h) */
-
 #else /* HAVE_TIMING and doing logging */
 
-/* evaporate all the timing macros if timing is not selected */
-#define MPIU_Timer_init(rank, size)
-#define MPIU_Timer_finalize()
-/* MPI layer */
-#define MPID_MPI_STATE_DECL(a)
-#define MPID_MPI_INIT_STATE_DECL(a)
-#define MPID_MPI_FINALIZE_STATE_DECL(a)
-#define MPID_MPI_FUNC_EXIT(a)
-#define MPID_MPI_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_FRONT(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_FRONT(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_BACK(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_BOTH(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BACK(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BOTH(a)
-#define MPID_MPI_COLL_FUNC_ENTER(a)
-#define MPID_MPI_COLL_FUNC_EXIT(a)
-#define MPID_MPI_RMA_FUNC_ENTER(a)
-#define MPID_MPI_RMA_FUNC_EXIT(a)
-#define MPID_MPI_INIT_FUNC_ENTER(a)
-#define MPID_MPI_INIT_FUNC_EXIT(a)
-#define MPID_MPI_FINALIZE_FUNC_ENTER(a)
-#define MPID_MPI_FINALIZE_FUNC_EXIT(a)
-/* device layer */
-#define MPIDI_STATE_DECL(a)
-#define MPIDI_FUNC_ENTER(a)
-#define MPIDI_FUNC_EXIT(a)
-#define MPIDI_PT2PT_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_EXIT(a)
-#define MPIDI_RMA_FUNC_ENTER(a)
-#define MPIDI_RMA_FUNC_EXIT(a)
+#define MPII_Timer_init(rank, size)
+#define MPII_Timer_finalize()
+
+#define MPIR_FUNC_TERSE_STATE_DECL(a)
+#define MPIR_FUNC_TERSE_INIT_STATE_DECL(a)
+#define MPIR_FUNC_TERSE_FINALIZE_STATE_DECL(a)
+#define MPIR_FUNC_TERSE_EXIT(a)
+#define MPIR_FUNC_TERSE_ENTER(a)
+#define MPIR_FUNC_TERSE_PT2PT_ENTER(a)
+#define MPIR_FUNC_TERSE_PT2PT_ENTER_FRONT(a)
+#define MPIR_FUNC_TERSE_PT2PT_EXIT_FRONT(a)
+#define MPIR_FUNC_TERSE_PT2PT_ENTER_BACK(a)
+#define MPIR_FUNC_TERSE_PT2PT_ENTER_BOTH(a)
+#define MPIR_FUNC_TERSE_PT2PT_EXIT(a)
+#define MPIR_FUNC_TERSE_PT2PT_EXIT_BACK(a)
+#define MPIR_FUNC_TERSE_PT2PT_EXIT_BOTH(a)
+#define MPIR_FUNC_TERSE_COLL_ENTER(a)
+#define MPIR_FUNC_TERSE_COLL_EXIT(a)
+#define MPIR_FUNC_TERSE_RMA_ENTER(a)
+#define MPIR_FUNC_TERSE_RMA_EXIT(a)
+#define MPIR_FUNC_TERSE_INIT_ENTER(a)
+#define MPIR_FUNC_TERSE_INIT_EXIT(a)
+#define MPIR_FUNC_TERSE_FINALIZE_ENTER(a)
+#define MPIR_FUNC_TERSE_FINALIZE_EXIT(a)
+
+#define MPIR_FUNC_VERBOSE_STATE_DECL(a)
+#define MPIR_FUNC_VERBOSE_ENTER(a)
+#define MPIR_FUNC_VERBOSE_EXIT(a)
+#define MPIR_FUNC_VERBOSE_PT2PT_ENTER(a)
+#define MPIR_FUNC_VERBOSE_PT2PT_EXIT(a)
+#define MPIR_FUNC_VERBOSE_RMA_ENTER(a)
+#define MPIR_FUNC_VERBOSE_RMA_EXIT(a)
 
 #endif /* HAVE_TIMING */
 
-#endif /* NEEDS_FUNC_ENTER_EXIT_DEFS */
+#endif /* ! defined(MPL_USE_DBG_LOGGING) && ! defined(MPICH_DEBUG_MEMARENA) */
 
 #endif /* MPIR_FUNC_H_INCLUDED */
diff --git a/src/include/mpir_group.h b/src/include/mpir_group.h
index 0744212..daa3f1e 100644
--- a/src/include/mpir_group.h
+++ b/src/include/mpir_group.h
@@ -15,11 +15,11 @@
  *---------------------------------------------------------------------------*/
 /* This structure is used to implement the group operations such as
    MPI_Group_translate_ranks */
-typedef struct MPIR_Group_pmap_t {
+typedef struct MPII_Group_pmap_t {
     int          lpid;      /* local process id, from VCONN */
     int          next_lpid; /* Index of next lpid (in lpid order) */
     int          flag;      /* marker, used to implement group operations */
-} MPIR_Group_pmap_t;
+} MPII_Group_pmap_t;
 
 /* Any changes in the MPIR_Group structure must be made to the
    predefined value in MPIR_Group_builtin for MPI_GROUP_EMPTY in
@@ -55,12 +55,12 @@ typedef struct MPIR_Group_pmap_t {
 
  S*/
 struct MPIR_Group {
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
     int          size;           /* Size of a group */
     int          rank;           /* rank of this process relative to this
 				    group */
     int          idx_of_first_lpid;
-    MPIR_Group_pmap_t *lrank_to_lpid; /* Array mapping a local rank to local
+    MPII_Group_pmap_t *lrank_to_lpid; /* Array mapping a local rank to local
 					 process number */
     int          is_local_dense_monotonic; /* see NOTE-G1 */
 
@@ -83,7 +83,7 @@ struct MPIR_Group {
  * case for many MPI tool libraries, such as Scalasca.
  */
 
-extern MPIU_Object_alloc_t MPIR_Group_mem;
+extern MPIR_Object_alloc_t MPIR_Group_mem;
 /* Preallocated group objects */
 #define MPIR_GROUP_N_BUILTIN 1
 extern MPIR_Group MPIR_Group_builtin[MPIR_GROUP_N_BUILTIN];
@@ -93,12 +93,11 @@ extern MPIR_Group MPIR_Group_direct[];
 extern MPIR_Group * const MPIR_Group_empty;
 
 #define MPIR_Group_add_ref( _group ) \
-    do { MPIU_Object_add_ref( _group ); } while (0)
+    do { MPIR_Object_add_ref( _group ); } while (0)
 
 #define MPIR_Group_release_ref( _group, _inuse ) \
-     do { MPIU_Object_release_ref( _group, _inuse ); } while (0)
+     do { MPIR_Object_release_ref( _group, _inuse ); } while (0)
 
-void MPIR_Group_setup_lpid_list( MPIR_Group * );
 int MPIR_Group_create( int, MPIR_Group ** );
 int MPIR_Group_release(MPIR_Group *group_ptr);
 
@@ -116,4 +115,7 @@ int MPIR_Group_union_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2, MPIR_G
 int MPIR_Group_check_subset(MPIR_Group * group_ptr, MPIR_Comm * comm_ptr);
 int MPIR_Group_init(void);
 
+/* internal functions */
+void MPII_Group_setup_lpid_list( MPIR_Group * );
+
 #endif /* MPIR_GROUP_H_INCLUDED */
diff --git a/src/include/mpir_info.h b/src/include/mpir_info.h
index e43aa83..4fa2e85 100644
--- a/src/include/mpir_info.h
+++ b/src/include/mpir_info.h
@@ -22,8 +22,8 @@
 
   For simplicity, we have not abstracted the info data structures;
   routines that want to work with the linked list may do so directly.
-  Because the 'MPI_Info' type is a handle and not a pointer, an MPIU
-  (utility) routine is provided to handle the
+  Because the 'MPI_Info' type is a handle and not a pointer, an MPIR
+  routine is provided to handle the
   deallocation of 'MPIR_Info' elements.  See the implementation of
   'MPI_Info_create' for how an Info type is allocated.
 
@@ -50,7 +50,7 @@
 
   T*/
 /*S
-  MPIR_Info - Structure of an MPID info
+  MPIR_Info - Structure of an MPIR info
 
   Notes:
   There is no reference count because 'MPI_Info' values, unlike other MPI
@@ -82,12 +82,12 @@
   Info-DS
   S*/
 struct MPIR_Info {
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
     struct MPIR_Info   *next;
     char               *key;
     char               *value;
 };
-extern MPIU_Object_alloc_t MPIR_Info_mem;
+extern MPIR_Object_alloc_t MPIR_Info_mem;
 /* Preallocated info objects */
 #define MPIR_INFO_N_BUILTIN 2
 extern MPIR_Info MPIR_Info_builtin[MPIR_INFO_N_BUILTIN];
@@ -99,7 +99,7 @@ int MPIR_Info_get_nthkey_impl(MPIR_Info *info, int n, char *key);
 void MPIR_Info_get_valuelen_impl(MPIR_Info *info_ptr, const char *key, int *valuelen, int *flag);
 int MPIR_Info_set_impl(MPIR_Info *info_ptr, const char *key, const char *value);
 int MPIR_Info_dup_impl(MPIR_Info *info_ptr, MPIR_Info **new_info_ptr);
-void MPIU_Info_free( MPIR_Info *info_ptr );
-int MPIU_Info_alloc(MPIR_Info **info_p_p);
+void MPIR_Info_free( MPIR_Info *info_ptr );
+int MPIR_Info_alloc(MPIR_Info **info_p_p);
 
 #endif /* MPIR_INFO_H_INCLUDED */
diff --git a/src/include/mpir_mem.h b/src/include/mpir_mem.h
index 58381fe..44c4d6a 100644
--- a/src/include/mpir_mem.h
+++ b/src/include/mpir_mem.h
@@ -61,7 +61,7 @@ extern MPL_dbg_class MPIR_DBG_STRING;
 
   Most memory should be allocated at the time that 'MPID_Init' is 
   called and released with 'MPID_Finalize' is called.  If at all possible,
-  no other MPID routine should fail because memory could not be allocated
+  no other routine should fail because memory could not be allocated
   (for example, because the user has allocated large arrays after 'MPI_Init').
   
   The implementation of the MPI routines will strive to avoid memory allocation
@@ -78,9 +78,9 @@ extern MPL_dbg_class MPIR_DBG_STRING;
 /* Define the string copy and duplication functions */
 /* ------------------------------------------------------------------------- */
 
-#define MPIU_Memcpy(dst, src, len)                \
+#define MPIR_Memcpy(dst, src, len)                \
     do {                                          \
-        MPIU_MEM_CHECK_MEMCPY((dst),(src),(len)); \
+        CHECK_MEMCPY((dst),(src),(len)); \
         memcpy((dst), (src), (len));              \
     } while (0)
 
@@ -107,12 +107,12 @@ extern MPL_dbg_class MPIR_DBG_STRING;
 /* Standard macro for generating error codes.  We set the error to be
  * recoverable by default, but this can be changed. */
 #ifdef HAVE_ERROR_CHECKING
-#define MPIU_CHKMEM_SETERR(rc_,nbytes_,name_) \
+#define MPIR_CHKMEM_SETERR(rc_,nbytes_,name_) \
      rc_=MPIR_Err_create_code( MPI_SUCCESS, \
           MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, \
           MPI_ERR_OTHER, "**nomem2", "**nomem2 %d %s", nbytes_, name_ )
 #else
-#define MPIU_CHKMEM_SETERR(rc_,nbytes_,name_) rc_=MPI_ERR_OTHER
+#define MPIR_CHKMEM_SETERR(rc_,nbytes_,name_) rc_=MPI_ERR_OTHER
 #endif
 
     /* CHKPMEM_REGISTER is used for memory allocated within another routine */
@@ -126,95 +126,95 @@ extern MPL_dbg_class MPIR_DBG_STRING;
 /* Define decl with a dummy definition to allow us to put a semi-colon
    after the macro without causing the declaration block to end (restriction
    imposed by C) */
-#define MPIU_CHKLMEM_DECL(n_) int dummy_ ATTRIBUTE((unused))
-#define MPIU_CHKLMEM_FREEALL()
-#define MPIU_CHKLMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,stmt_) \
+#define MPIR_CHKLMEM_DECL(n_) int dummy_ ATTRIBUTE((unused))
+#define MPIR_CHKLMEM_FREEALL()
+#define MPIR_CHKLMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,stmt_) \
 {pointer_ = (type_)alloca(nbytes_); \
     if (!(pointer_) && (nbytes_ > 0)) {	   \
-    MPIU_CHKMEM_SETERR(rc_,nbytes_,name_); \
+    MPIR_CHKMEM_SETERR(rc_,nbytes_,name_); \
     stmt_;\
 }}
 #else
-#define MPIU_CHKLMEM_DECL(n_) \
+#define MPIR_CHKLMEM_DECL(n_) \
  void *(mpiu_chklmem_stk_[n_]); \
  int mpiu_chklmem_stk_sp_=0;\
- MPIU_AssertDeclValue(const int mpiu_chklmem_stk_sz_,n_)
+ MPIR_AssertDeclValue(const int mpiu_chklmem_stk_sz_,n_)
 
-#define MPIU_CHKLMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,stmt_) \
+#define MPIR_CHKLMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,stmt_) \
 {pointer_ = (type_)MPL_malloc(nbytes_); \
 if (pointer_) { \
-    MPIU_Assert(mpiu_chklmem_stk_sp_<mpiu_chklmem_stk_sz_);\
+    MPIR_Assert(mpiu_chklmem_stk_sp_<mpiu_chklmem_stk_sz_);\
     mpiu_chklmem_stk_[mpiu_chklmem_stk_sp_++] = pointer_;\
  } else if (nbytes_ > 0) {				 \
-    MPIU_CHKMEM_SETERR(rc_,nbytes_,name_); \
+    MPIR_CHKMEM_SETERR(rc_,nbytes_,name_); \
     stmt_;\
 }}
-#define MPIU_CHKLMEM_FREEALL() \
+#define MPIR_CHKLMEM_FREEALL() \
     do { while (mpiu_chklmem_stk_sp_ > 0) {\
        MPL_free( mpiu_chklmem_stk_[--mpiu_chklmem_stk_sp_] ); } } while(0)
 #endif /* HAVE_ALLOCA */
-#define MPIU_CHKLMEM_MALLOC(pointer_,type_,nbytes_,rc_,name_) \
-    MPIU_CHKLMEM_MALLOC_ORJUMP(pointer_,type_,nbytes_,rc_,name_)
-#define MPIU_CHKLMEM_MALLOC_ORJUMP(pointer_,type_,nbytes_,rc_,name_) \
-    MPIU_CHKLMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,goto fn_fail)
+#define MPIR_CHKLMEM_MALLOC(pointer_,type_,nbytes_,rc_,name_) \
+    MPIR_CHKLMEM_MALLOC_ORJUMP(pointer_,type_,nbytes_,rc_,name_)
+#define MPIR_CHKLMEM_MALLOC_ORJUMP(pointer_,type_,nbytes_,rc_,name_) \
+    MPIR_CHKLMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,goto fn_fail)
 
 /* Persistent memory that we may want to recover if something goes wrong */
-#define MPIU_CHKPMEM_DECL(n_) \
+#define MPIR_CHKPMEM_DECL(n_) \
  void *(mpiu_chkpmem_stk_[n_]) = { NULL };     \
  int mpiu_chkpmem_stk_sp_=0;\
- MPIU_AssertDeclValue(const int mpiu_chkpmem_stk_sz_,n_)
-#define MPIU_CHKPMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,stmt_) \
+ MPIR_AssertDeclValue(const int mpiu_chkpmem_stk_sz_,n_)
+#define MPIR_CHKPMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,stmt_) \
 {pointer_ = (type_)MPL_malloc(nbytes_); \
 if (pointer_) { \
-    MPIU_Assert(mpiu_chkpmem_stk_sp_<mpiu_chkpmem_stk_sz_);\
+    MPIR_Assert(mpiu_chkpmem_stk_sp_<mpiu_chkpmem_stk_sz_);\
     mpiu_chkpmem_stk_[mpiu_chkpmem_stk_sp_++] = pointer_;\
  } else if (nbytes_ > 0) {				 \
-    MPIU_CHKMEM_SETERR(rc_,nbytes_,name_); \
+    MPIR_CHKMEM_SETERR(rc_,nbytes_,name_); \
     stmt_;\
 }}
-#define MPIU_CHKPMEM_REGISTER(pointer_) \
-    {MPIU_Assert(mpiu_chkpmem_stk_sp_<mpiu_chkpmem_stk_sz_);\
+#define MPIR_CHKPMEM_REGISTER(pointer_) \
+    {MPIR_Assert(mpiu_chkpmem_stk_sp_<mpiu_chkpmem_stk_sz_);\
     mpiu_chkpmem_stk_[mpiu_chkpmem_stk_sp_++] = pointer_;}
-#define MPIU_CHKPMEM_REAP() \
+#define MPIR_CHKPMEM_REAP() \
     { while (mpiu_chkpmem_stk_sp_ > 0) {\
        MPL_free( mpiu_chkpmem_stk_[--mpiu_chkpmem_stk_sp_] ); } }
-#define MPIU_CHKPMEM_COMMIT() \
+#define MPIR_CHKPMEM_COMMIT() \
     mpiu_chkpmem_stk_sp_ = 0
-#define MPIU_CHKPMEM_MALLOC(pointer_,type_,nbytes_,rc_,name_) \
-    MPIU_CHKPMEM_MALLOC_ORJUMP(pointer_,type_,nbytes_,rc_,name_)
-#define MPIU_CHKPMEM_MALLOC_ORJUMP(pointer_,type_,nbytes_,rc_,name_) \
-    MPIU_CHKPMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,goto fn_fail)
+#define MPIR_CHKPMEM_MALLOC(pointer_,type_,nbytes_,rc_,name_) \
+    MPIR_CHKPMEM_MALLOC_ORJUMP(pointer_,type_,nbytes_,rc_,name_)
+#define MPIR_CHKPMEM_MALLOC_ORJUMP(pointer_,type_,nbytes_,rc_,name_) \
+    MPIR_CHKPMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,goto fn_fail)
 
 /* now the CALLOC version for zeroed memory */
-#define MPIU_CHKPMEM_CALLOC(pointer_,type_,nbytes_,rc_,name_) \
-    MPIU_CHKPMEM_CALLOC_ORJUMP(pointer_,type_,nbytes_,rc_,name_)
-#define MPIU_CHKPMEM_CALLOC_ORJUMP(pointer_,type_,nbytes_,rc_,name_) \
-    MPIU_CHKPMEM_CALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,goto fn_fail)
-#define MPIU_CHKPMEM_CALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,stmt_) \
+#define MPIR_CHKPMEM_CALLOC(pointer_,type_,nbytes_,rc_,name_) \
+    MPIR_CHKPMEM_CALLOC_ORJUMP(pointer_,type_,nbytes_,rc_,name_)
+#define MPIR_CHKPMEM_CALLOC_ORJUMP(pointer_,type_,nbytes_,rc_,name_) \
+    MPIR_CHKPMEM_CALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,goto fn_fail)
+#define MPIR_CHKPMEM_CALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,stmt_) \
     do {                                                                   \
         pointer_ = (type_)MPL_calloc(1, (nbytes_));                       \
         if (pointer_) {                                                    \
-            MPIU_Assert(mpiu_chkpmem_stk_sp_<mpiu_chkpmem_stk_sz_);        \
+            MPIR_Assert(mpiu_chkpmem_stk_sp_<mpiu_chkpmem_stk_sz_);        \
             mpiu_chkpmem_stk_[mpiu_chkpmem_stk_sp_++] = pointer_;          \
         }                                                                  \
         else if (nbytes_ > 0) {                                            \
-            MPIU_CHKMEM_SETERR(rc_,nbytes_,name_);                         \
+            MPIR_CHKMEM_SETERR(rc_,nbytes_,name_);                         \
             stmt_;                                                         \
         }                                                                  \
     } while (0)
 
 /* A special version for routines that only allocate one item */
-#define MPIU_CHKPMEM_MALLOC1(pointer_,type_,nbytes_,rc_,name_,stmt_) \
+#define MPIR_CHKPMEM_MALLOC1(pointer_,type_,nbytes_,rc_,name_,stmt_) \
 {pointer_ = (type_)MPL_malloc(nbytes_); \
     if (!(pointer_) && (nbytes_ > 0)) {	   \
-    MPIU_CHKMEM_SETERR(rc_,nbytes_,name_); \
+    MPIR_CHKMEM_SETERR(rc_,nbytes_,name_); \
     stmt_;\
 }}
 
 /* Provides a easy way to use realloc safely and avoid the temptation to use
  * realloc unsafely (direct ptr assignment).  Zero-size reallocs returning NULL
  * are handled and are not considered an error. */
-#define MPIU_REALLOC_ORJUMP(ptr_,size_,rc_) do { \
+#define MPIR_REALLOC_ORJUMP(ptr_,size_,rc_) do { \
     void *realloc_tmp_ = MPL_realloc((ptr_), (size_)); \
     if (size_) \
         MPIR_ERR_CHKANDJUMP2(!realloc_tmp_,rc_,MPI_ERR_OTHER,"**nomem2","**nomem2 %d %s",(size_),MPL_QUOTE(ptr_)); \
@@ -222,9 +222,9 @@ if (pointer_) { \
 } while (0)
 
 #if defined(HAVE_STRNCASECMP)
-#   define MPIU_Strncasecmp strncasecmp
+#   define MPIR_Strncasecmp strncasecmp
 #elif defined(HAVE_STRNICMP)
-#   define MPIU_Strncasecmp strnicmp
+#   define MPIR_Strncasecmp strnicmp
 #else
 /* FIXME: Provide a fallback function ? */
 #   error "No function defined for case-insensitive strncmp"
@@ -232,29 +232,29 @@ if (pointer_) { \
 
 /* Evaluates to a boolean expression, true if the given byte ranges overlap,
  * false otherwise.  That is, true iff [a_,a_+a_len_) overlaps with [b_,b_+b_len_) */
-#define MPIU_MEM_RANGES_OVERLAP(a_,a_len_,b_,b_len_) \
+#define MPIR_MEM_RANGES_OVERLAP(a_,a_len_,b_,b_len_) \
     ( ((char *)(a_) >= (char *)(b_) && ((char *)(a_) < ((char *)(b_) + (b_len_)))) ||  \
       ((char *)(b_) >= (char *)(a_) && ((char *)(b_) < ((char *)(a_) + (a_len_)))) )
 #if (!defined(NDEBUG) && defined(HAVE_ERROR_CHECKING))
 
 /* May be used to perform sanity and range checking on memcpy and mempcy-like
-   function calls.  This macro will bail out much like an MPIU_Assert if any of
+   function calls.  This macro will bail out much like an MPIR_Assert if any of
    the checks fail. */
-#define MPIU_MEM_CHECK_MEMCPY(dst_,src_,len_)                                                                   \
+#define CHECK_MEMCPY(dst_,src_,len_)                                                                   \
     do {                                                                                                        \
         if (len_) {                                                                                             \
-            MPIU_Assert((dst_) != NULL);                                                                        \
-            MPIU_Assert((src_) != NULL);                                                                        \
+            MPIR_Assert((dst_) != NULL);                                                                        \
+            MPIR_Assert((src_) != NULL);                                                                        \
             MPL_VG_CHECK_MEM_IS_ADDRESSABLE((dst_),(len_));                                                     \
             MPL_VG_CHECK_MEM_IS_ADDRESSABLE((src_),(len_));                                                     \
-            if (MPIU_MEM_RANGES_OVERLAP((dst_),(len_),(src_),(len_))) {                                          \
-                MPIU_Assert_fmt_msg(FALSE,("memcpy argument memory ranges overlap, dst_=%p src_=%p len_=%ld\n", \
+            if (MPIR_MEM_RANGES_OVERLAP((dst_),(len_),(src_),(len_))) {                                          \
+                MPIR_Assert_fmt_msg(FALSE,("memcpy argument memory ranges overlap, dst_=%p src_=%p len_=%ld\n", \
                                            (dst_), (src_), (long)(len_)));                                      \
             }                                                                                                   \
         }                                                                                                       \
     } while (0)
 #else
-#define MPIU_MEM_CHECK_MEMCPY(dst_,src_,len_) do {} while(0)
+#define CHECK_MEMCPY(dst_,src_,len_) do {} while(0)
 #endif
 
 /* valgrind macros are now provided by MPL (via mpl.h included in mpiimpl.h) */
diff --git a/src/include/mpir_misc.h b/src/include/mpir_misc.h
index 022f8b5..f827888 100644
--- a/src/include/mpir_misc.h
+++ b/src/include/mpir_misc.h
@@ -48,21 +48,14 @@ typedef enum MPIR_Lang_t {
 #endif
 } MPIR_Lang_t;
 
-typedef enum MPIR_MPI_State_t {
-    MPICH_MPI_STATE__PRE_INIT=0,
-    MPICH_MPI_STATE__IN_INIT,
-    MPICH_MPI_STATE__POST_INIT,
-    MPICH_MPI_STATE__POST_FINALIZED
-} MPIR_MPI_State_t;
-
-extern const char MPIR_Version_string[];
-extern const char MPIR_Version_date[];
-extern const char MPIR_Version_configure[];
-extern const char MPIR_Version_device[];
-extern const char MPIR_Version_CC[];
-extern const char MPIR_Version_CXX[];
-extern const char MPIR_Version_F77[];
-extern const char MPIR_Version_FC[];
+extern const char MPII_Version_string[];
+extern const char MPII_Version_date[];
+extern const char MPII_Version_configure[];
+extern const char MPII_Version_device[];
+extern const char MPII_Version_CC[];
+extern const char MPII_Version_CXX[];
+extern const char MPII_Version_F77[];
+extern const char MPII_Version_FC[];
 
 int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
                    void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype);
@@ -84,11 +77,11 @@ Notes:
 void MPIR_Add_finalize( int (*routine)( void * ), void *extra, int priority );
 
 /* Routines for determining local and remote processes */
-int MPIU_Find_local_and_external(struct MPIR_Comm *comm, int *local_size_p, int *local_rank_p, int **local_ranks_p,
+int MPIR_Find_local_and_external(struct MPIR_Comm *comm, int *local_size_p, int *local_rank_p, int **local_ranks_p,
                                  int *external_size_p, int *external_rank_p, int **external_ranks_p,
                                  int **intranode_table, int **internode_table_p);
-int MPIU_Get_internode_rank(MPIR_Comm *comm_ptr, int r);
-int MPIU_Get_intranode_rank(MPIR_Comm *comm_ptr, int r);
+int MPIR_Get_internode_rank(MPIR_Comm *comm_ptr, int r);
+int MPIR_Get_intranode_rank(MPIR_Comm *comm_ptr, int r);
 
 int MPIR_Close_port_impl(const char *port_name);
 int MPIR_Open_port_impl(MPIR_Info *info_ptr, char *port_name);
diff --git a/src/include/mpir_nbc.h b/src/include/mpir_nbc.h
index 588172b..e90587d 100644
--- a/src/include/mpir_nbc.h
+++ b/src/include/mpir_nbc.h
@@ -8,18 +8,18 @@
 #define MPIR_NBC_H_INCLUDED
 
 /* This specifies the interface that must be exposed by the ADI in order to
- * support MPI-3 non-blocking collectives.  MPID_Sched_ routines are all
+ * support MPI-3 non-blocking collectives.  MPIR_Sched_ routines are all
  * permitted to be inlines.  They are not permitted to be macros.
  *
  * Most (currently all) devices will just use the default implementation that
  * lives in "src/mpid/common/sched" */
 
-/* The device must supply a typedef for MPID_Sched_t.  MPID_Sched_t is a handle
+/* The device must supply a typedef for MPIR_Sched_t.  MPIR_Sched_t is a handle
  * to the schedule (often a pointer under the hood), not the actual schedule.
  * This makes it easy to cheaply pass the schedule between functions.  Many
  *
  * The device must also define a constant (possibly a macro) for an invalid
- * schedule: MPID_SCHED_NULL */
+ * schedule: MPIR_SCHED_NULL */
 
 /* Context/tag strategy for send/recv ops:
  * -------------------------------
@@ -41,49 +41,49 @@
  * mechanism as well.
  */
 
-#define MPID_SCHED_NULL (NULL)
+#define MPIR_SCHED_NULL (NULL)
 
 /* Open question: should tag allocation be rolled into Sched_start?  Keeping it
  * separate potentially allows more parallelism in the future, but it also
  * pushes more work onto the clients of this interface. */
-int MPID_Sched_next_tag(MPIR_Comm *comm_ptr, int *tag);
+int MPIR_Sched_next_tag(MPIR_Comm *comm_ptr, int *tag);
 
-/* the device must provide a typedef for MPID_Sched_t in mpidpre.h */
+/* the device must provide a typedef for MPIR_Sched_t in mpidpre.h */
 
 /* creates a new opaque schedule object and returns a handle to it in (*sp) */
-int MPID_Sched_create(MPID_Sched_t *sp);
+int MPIR_Sched_create(MPIR_Sched_t *sp);
 /* clones orig and returns a handle to the new schedule in (*cloned) */
-int MPID_Sched_clone(MPID_Sched_t orig, MPID_Sched_t *cloned);
-/* sets (*sp) to MPID_SCHED_NULL and gives you back a request pointer in (*req).
+int MPIR_Sched_clone(MPIR_Sched_t orig, MPIR_Sched_t *cloned);
+/* sets (*sp) to MPIR_SCHED_NULL and gives you back a request pointer in (*req).
  * The caller is giving up ownership of the opaque schedule object.
  *
  * comm should be the primary (user) communicator with which this collective is
  * associated, even if other hidden communicators are used for a subset of the
  * operations.  It will be used for error handling and similar operations. */
-int MPID_Sched_start(MPID_Sched_t *sp, MPIR_Comm *comm, int tag, MPIR_Request **req);
+int MPIR_Sched_start(MPIR_Sched_t *sp, MPIR_Comm *comm, int tag, MPIR_Request **req);
 
 /* send and recv take a comm ptr to enable hierarchical collectives */
-int MPID_Sched_send(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, MPIR_Comm *comm, MPID_Sched_t s);
-int MPID_Sched_recv(void *buf, MPI_Aint count, MPI_Datatype datatype, int src, MPIR_Comm *comm, MPID_Sched_t s);
+int MPIR_Sched_send(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, MPIR_Comm *comm, MPIR_Sched_t s);
+int MPIR_Sched_recv(void *buf, MPI_Aint count, MPI_Datatype datatype, int src, MPIR_Comm *comm, MPIR_Sched_t s);
 
 /* just like MPI_Issend, can't complete until the matching recv is posted */
-int MPID_Sched_ssend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, MPIR_Comm *comm, MPID_Sched_t s);
+int MPIR_Sched_ssend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, MPIR_Comm *comm, MPIR_Sched_t s);
 
-int MPID_Sched_reduce(const void *inbuf, void *inoutbuf, MPI_Aint count, MPI_Datatype datatype, MPI_Op op, MPID_Sched_t s);
+int MPIR_Sched_reduce(const void *inbuf, void *inoutbuf, MPI_Aint count, MPI_Datatype datatype, MPI_Op op, MPIR_Sched_t s);
 /* packing/unpacking can be accomplished by passing MPI_PACKED as either intype
  * or outtype */
-int MPID_Sched_copy(const void *inbuf,  MPI_Aint incount,  MPI_Datatype intype,
-                    void *outbuf, MPI_Aint outcount, MPI_Datatype outtype, MPID_Sched_t s);
+int MPIR_Sched_copy(const void *inbuf,  MPI_Aint incount,  MPI_Datatype intype,
+                    void *outbuf, MPI_Aint outcount, MPI_Datatype outtype, MPIR_Sched_t s);
 /* require that all previously added ops are complete before subsequent ops
  * may begin to execute */
-int MPID_Sched_barrier(MPID_Sched_t s);
+int MPIR_Sched_barrier(MPIR_Sched_t s);
 
 /* A convenience macro for the extremely common case that "mpi_errno" is the
  * variable used for tracking error state and MPIR_ERR_POP is needed.  This
  * declutters the NBC code substantially. */
-#define MPID_SCHED_BARRIER(sched_)              \
+#define MPIR_SCHED_BARRIER(sched_)              \
     do {                                        \
-        mpi_errno = MPID_Sched_barrier(sched_); \
+        mpi_errno = MPIR_Sched_barrier(sched_); \
         if (mpi_errno) MPIR_ERR_POP(mpi_errno); \
     } while (0)
 
@@ -94,15 +94,15 @@ int MPID_Sched_barrier(MPID_Sched_t s);
  * A corresponding _recv_defer function is not currently provided because there
  * is no known use case.  The recv count is just an upper bound, not an exact
  * amount to be received, so an oversized recv is used instead of deferral. */
-int MPID_Sched_send_defer(const void *buf, const MPI_Aint *count, MPI_Datatype datatype, int dest, MPIR_Comm *comm, MPID_Sched_t s);
-/* Just like MPID_Sched_recv except it populates the given status object with
+int MPIR_Sched_send_defer(const void *buf, const MPI_Aint *count, MPI_Datatype datatype, int dest, MPIR_Comm *comm, MPIR_Sched_t s);
+/* Just like MPIR_Sched_recv except it populates the given status object with
  * the received count and error information, much like a normal recv.  Often
- * useful in conjunction with MPID_Sched_send_defer. */
-int MPID_Sched_recv_status(void *buf, MPI_Aint count, MPI_Datatype datatype, int src, MPIR_Comm *comm, MPI_Status *status, MPID_Sched_t s);
+ * useful in conjunction with MPIR_Sched_send_defer. */
+int MPIR_Sched_recv_status(void *buf, MPI_Aint count, MPI_Datatype datatype, int src, MPIR_Comm *comm, MPI_Status *status, MPIR_Sched_t s);
 
 /* buffer management, fancy reductions, etc */
-int MPID_Sched_cb(MPID_Sched_cb_t *cb_p, void *cb_state, MPID_Sched_t s);
-int MPID_Sched_cb2(MPID_Sched_cb2_t *cb_p, void *cb_state, void *cb_state2, MPID_Sched_t s);
+int MPIR_Sched_cb(MPIR_Sched_cb_t *cb_p, void *cb_state, MPIR_Sched_t s);
+int MPIR_Sched_cb2(MPIR_Sched_cb2_t *cb_p, void *cb_state, void *cb_state2, MPIR_Sched_t s);
 
 /* TODO: develop a caching infrastructure for use by the upper level as well,
  * hopefully s.t. uthash can be used somehow */
@@ -110,21 +110,21 @@ int MPID_Sched_cb2(MPID_Sched_cb2_t *cb_p, void *cb_state, void *cb_state2, MPID
 /* common callback utility functions */
 int MPIR_Sched_cb_free_buf(MPIR_Comm *comm, int tag, void *state);
 
-/* an upgraded version of MPIU_CHKPMEM_MALLOC/_DECL/_REAP/_COMMIT that adds
+/* an upgraded version of MPIR_CHKPMEM_MALLOC/_DECL/_REAP/_COMMIT that adds
  * corresponding cleanup callbacks to the given schedule at _COMMIT time */
 #define MPIR_SCHED_CHKPMEM_DECL(n_)                               \
     void *(mpir_sched_chkpmem_stk_[n_]) = { NULL };               \
     int mpir_sched_chkpmem_stk_sp_=0;                             \
-    MPIU_AssertDeclValue(const int mpir_sched_chkpmem_stk_sz_,n_)
+    MPIR_AssertDeclValue(const int mpir_sched_chkpmem_stk_sz_,n_)
 
 #define MPIR_SCHED_CHKPMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,stmt_)  \
     do {                                                                          \
         (pointer_) = (type_)MPL_malloc(nbytes_);                                 \
         if (pointer_) {                                                           \
-            MPIU_Assert(mpir_sched_chkpmem_stk_sp_ < mpir_sched_chkpmem_stk_sz_); \
+            MPIR_Assert(mpir_sched_chkpmem_stk_sp_ < mpir_sched_chkpmem_stk_sz_); \
             mpir_sched_chkpmem_stk_[mpir_sched_chkpmem_stk_sp_++] = (pointer_);   \
         } else if ((nbytes_) > 0) {                                               \
-            MPIU_CHKMEM_SETERR((rc_),(nbytes_),(name_));                          \
+            MPIR_CHKMEM_SETERR((rc_),(nbytes_),(name_));                          \
             stmt_;                                                                \
         }                                                                         \
     } while (0)
@@ -142,9 +142,9 @@ int MPIR_Sched_cb_free_buf(MPIR_Comm *comm, int tag, void *state);
 
 #define MPIR_SCHED_CHKPMEM_COMMIT(sched_)                                                      \
     do {                                                                                       \
-        MPID_SCHED_BARRIER(s);                                                                 \
+        MPIR_SCHED_BARRIER(s);                                                                 \
         while (mpir_sched_chkpmem_stk_sp_ > 0) {                                               \
-            mpi_errno = MPID_Sched_cb(&MPIR_Sched_cb_free_buf,                                 \
+            mpi_errno = MPIR_Sched_cb(&MPIR_Sched_cb_free_buf,                                 \
                                       (mpir_sched_chkpmem_stk_[--mpir_sched_chkpmem_stk_sp_]), \
                                       (sched_));                                               \
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);                                            \
diff --git a/src/include/mpir_objects.h b/src/include/mpir_objects.h
index a11ebc1..c11c397 100644
--- a/src/include/mpir_objects.h
+++ b/src/include/mpir_objects.h
@@ -17,13 +17,9 @@
   as integers; this makes implementation of the C/Fortran handle transfer
   calls (part of MPI-2) easy.
 
-  MPID objects (again with the possible exception of 'MPI_Request's)
+  MPIR objects
   are allocated by a common set of object allocation functions.
-  These are
-.vb
-    void *MPIU_Handle_obj_create( MPIU_Object_alloc_t *objmem )
-    void MPIU_Handle_obj_destroy( MPIU_Object_alloc_t *objmem, void *object )
-.ve
+
   where 'objmem' is a pointer to a memory allocation object that knows
   enough to allocate objects, including the
   size of the object and the location of preallocated memory, as well
@@ -48,8 +44,8 @@
   reference counts must be accessed and updated atomically.
   A reference count for
   `any` object can be incremented (atomically)
-  with 'MPIU_Object_add_ref(objptr)'
-  and decremented with 'MPIU_Object_release_ref(objptr,newval_ptr)'.
+  with 'MPIR_Object_add_ref(objptr)'
+  and decremented with 'MPIR_Object_release_ref(objptr,newval_ptr)'.
   These have been designed so that then can be implemented as inlined
   macros rather than function calls, even in the multithreaded case, and
   can use special processor instructions that guarantee atomicity to
@@ -59,11 +55,11 @@
   value otherwise.  If this value is zero, then the routine that decremented
   the
   reference count should free the object.  This may be as simple as
-  calling 'MPIU_Handle_obj_destroy' (for simple objects with no other allocated
+  calling 'destroy' (for simple objects with no other allocated
   storage) or may require calling a separate routine to destroy the object.
   Because MPI uses 'MPI_xxx_free' to both decrement the reference count and
   free the object if the reference count is zero, we avoid the use of 'free'
-  in the MPID routines.
+  in the MPIR destruction routines.
 
   The 'inuse_ptr' approach is used rather than requiring the post-decrement
   value because, for reference-count semantics, all that is necessary is
@@ -79,7 +75,7 @@
   Structure Definitions:
   The structure definitions in this document define `only` that part of
   a structure that may be used by code that is making use of the ADI.
-  Thus, some structures, such as 'MPID_Comm', have many defined fields;
+  Thus, some structures, such as 'MPIR_Comm', have many defined fields;
   these are used to support MPI routines such as 'MPI_Comm_size' and
   'MPI_Comm_remote_group'.  Other structures may have few or no defined
   members; these structures have no fields used outside of the ADI.
@@ -114,7 +110,7 @@
   rather than the handles themselves.  However, each structure contains an 
   'handle' field that is the corresponding integer handle for the MPI object.
 
-  MPID objects (objects used within the implementation of MPI) are not opaque.
+  MPIR objects are not opaque.
 
   T*/
 
@@ -122,7 +118,7 @@
    and for the handles.  This is a 4 bit value.  0 is reserved for so
    that all-zero handles can be flagged as an error. */
 /*E
-  MPIR_Object_kind - Object kind (communicator, window, or file)
+  MPII_Object_kind - Object kind (communicator, window, or file)
 
   Notes:
   This enum is used by keyvals and errhandlers to indicate the type of
@@ -145,7 +141,7 @@
   Module:
   Attribute-DS
   E*/
-typedef enum MPIR_Object_kind {
+typedef enum MPII_Object_kind {
   MPIR_COMM       = 0x1,
   MPIR_GROUP      = 0x2,
   MPIR_DATATYPE   = 0x3,
@@ -160,7 +156,7 @@ typedef enum MPIR_Object_kind {
   MPIR_PROCGROUP  = 0xc,               /* These are internal device objects */
   MPIR_VCONN      = 0xd,
   MPIR_GREQ_CLASS = 0xf
-} MPIR_Object_kind;
+} MPII_Object_kind;
 
 
 #define HANDLE_MPI_KIND_SHIFT 26
@@ -168,7 +164,7 @@ typedef enum MPIR_Object_kind {
 #define HANDLE_SET_MPI_KIND(a,kind) ((a) | ((kind) << HANDLE_MPI_KIND_SHIFT))
 
 /* returns the name of the handle kind for debugging/logging purposes */
-const char *MPIU_Handle_get_kind_str(int kind);
+const char *MPIR_Handle_get_kind_str(int kind);
 
 /* Handle types.  These are really 2 bits */
 #define HANDLE_KIND_INVALID  0x0
@@ -220,160 +216,102 @@ extern MPL_dbg_class MPIR_DBG_HANDLE;
    as they are incremented */
 #ifdef MPICH_DEBUG_HANDLES
 #define MPICH_DEBUG_MAX_REFCOUNT 64
-#define MPIU_HANDLE_CHECK_REFCOUNT(objptr_,op_)                                                     \
+#define HANDLE_CHECK_REFCOUNT(objptr_,op_)                                                     \
     do {                                                                                            \
-        int local_ref_count_ = MPIU_Object_get_ref(objptr_);                                        \
+        int local_ref_count_ = MPIR_Object_get_ref(objptr_);                                        \
         if (local_ref_count_ > MPICH_DEBUG_MAX_REFCOUNT || local_ref_count_ < 0)                    \
         {                                                                                           \
             MPL_DBG_MSG_FMT(MPIR_DBG_HANDLE,TYPICAL,(MPL_DBG_FDEST,                                        \
                                              "Invalid refcount (%d) in %p (0x%08x) %s",             \
                                              local_ref_count_, (objptr_), (objptr_)->handle, op_)); \
         }                                                                                           \
-        MPIU_Assert(local_ref_count_ >= 0);                                                         \
+        MPIR_Assert(local_ref_count_ >= 0);                                                         \
     } while (0)
 #else
-#define MPIU_HANDLE_CHECK_REFCOUNT(objptr_,op_) \
-    MPIU_Assert(MPIU_Object_get_ref(objptr_) >= 0)
+#define HANDLE_CHECK_REFCOUNT(objptr_,op_) \
+    MPIR_Assert(MPIR_Object_get_ref(objptr_) >= 0)
 #endif
 
-#define MPIU_HANDLE_LOG_REFCOUNT_CHANGE(objptr_, action_str_)                                          \
+#define HANDLE_LOG_REFCOUNT_CHANGE(objptr_, action_str_)                                          \
     MPL_DBG_MSG_FMT(MPIR_DBG_HANDLE,TYPICAL,(MPL_DBG_FDEST,                                                   \
                                      "%s %p (0x%08x kind=%s) refcount to %d",                          \
                                      (action_str_),                                                    \
                                      (objptr_),                                                        \
                                      (objptr_)->handle,                                                \
-                                     MPIU_Handle_get_kind_str(HANDLE_GET_MPI_KIND((objptr_)->handle)), \
-                                     MPIU_Object_get_ref(objptr_)))
-
-
-/*M
-   MPIU_Object_add_ref - Increment the reference count for an MPI object
-
-   Synopsis:
-.vb
-    MPIU_Object_add_ref( MPIU_Object *ptr )
-.ve
-
-   Input Parameter:
-.  ptr - Pointer to the object.
-
-   Notes:
-   In an unthreaded implementation, this function will usually be implemented
-   as a single-statement macro.  In an 'MPI_THREAD_MULTIPLE' implementation,
-   this routine must implement an atomic increment operation, using, for
-   example, a lock on datatypes or special assembly code.
-M*/
-/*M
-   MPIU_Object_release_ref - Decrement the reference count for an MPI object
-
-   Synopsis:
-.vb
-   MPIU_Object_release_ref( MPIU_Object *ptr, int *inuse_ptr )
-.ve
-
-   Input Parameter:
-.  objptr - Pointer to the object.
-
-   Output Parameter:
-.  inuse_ptr - Pointer to the value of the reference count after decrementing.
-   This value is either zero or non-zero. See below for details.
-
-   Notes:
-   In an unthreaded implementation, this function will usually be implemented
-   as a single-statement macro.  In an 'MPI_THREAD_MULTIPLE' implementation,
-   this routine must implement an atomic decrement operation, using, for
-   example, a lock on datatypes or special assembly code.
-
-   Once the reference count is decremented to zero, it is an error to
-   change it.  A correct MPI program will never do that, but an incorrect one
-   (particularly a multithreaded program with a race condition) might.
-
-   The following code is `invalid`\:
-.vb
-   MPIU_Object_release_ref( datatype_ptr );
-   if (datatype_ptr->ref_count == 0) MPID_Datatype_free( datatype_ptr );
-.ve
-   In a multi-threaded implementation, the value of 'datatype_ptr->ref_count'
-   may have been changed by another thread, resulting in both threads calling
-   'MPID_Datatype_free'.  Instead, use
-.vb
-   MPIU_Object_release_ref( datatype_ptr, &inUse );
-   if (!inuse)
-       MPID_Datatype_free( datatype_ptr );
-.ve
-  M*/
+                                     MPIR_Handle_get_kind_str(HANDLE_GET_MPI_KIND((objptr_)->handle)), \
+                                     MPIR_Object_get_ref(objptr_)))
 
 /* The "_always" versions of these macros unconditionally manipulate the
  * reference count of the given object.  They exist to permit an optimization
  * of not reference counting predefined objects. */
 
-/* The MPIU_DBG... statements are macros that vanish unless
-   --enable-g=log is selected.  MPIU_HANDLE_CHECK_REFCOUNT is
+/* The MPL_DBG... statements are macros that vanish unless
+   --enable-g=log is selected.  HANDLE_CHECK_REFCOUNT is
    defined above, and adds an additional sanity check for the refcounts
 */
-#if MPIU_THREAD_REFCOUNT == MPIU_REFCOUNT_NONE
+#if MPICH_THREAD_REFCOUNT == MPICH_REFCOUNT__NONE
 
-typedef int MPIU_Handle_ref_count;
+typedef int Handle_ref_count;
 
-#define MPIU_Object_set_ref(objptr_,val)                 \
+#define MPIR_Object_set_ref(objptr_,val)                 \
     do {                                                 \
         (objptr_)->ref_count = val;                      \
-        MPIU_HANDLE_LOG_REFCOUNT_CHANGE(objptr_, "set"); \
+        HANDLE_LOG_REFCOUNT_CHANGE(objptr_, "set"); \
     } while (0)
 
 /* must be used with care, since there is no synchronization for this read */
-#define MPIU_Object_get_ref(objptr_) \
+#define MPIR_Object_get_ref(objptr_) \
     ((objptr_)->ref_count)
 
-#define MPIU_Object_add_ref_always(objptr_)               \
+#define Object_add_ref_always(objptr_)               \
     do {                                                  \
         (objptr_)->ref_count++;                           \
-        MPIU_HANDLE_LOG_REFCOUNT_CHANGE(objptr_, "incr"); \
-        MPIU_HANDLE_CHECK_REFCOUNT(objptr_,"incr");       \
+        HANDLE_LOG_REFCOUNT_CHANGE(objptr_, "incr"); \
+        HANDLE_CHECK_REFCOUNT(objptr_,"incr");       \
     } while (0)
-#define MPIU_Object_release_ref_always(objptr_,inuse_ptr) \
+#define MPIR_Object_release_ref_always(objptr_,inuse_ptr) \
     do {                                                  \
         *(inuse_ptr) = --((objptr_)->ref_count);          \
-        MPIU_HANDLE_LOG_REFCOUNT_CHANGE(objptr_, "decr"); \
-        MPIU_HANDLE_CHECK_REFCOUNT(objptr_,"decr");       \
+        HANDLE_LOG_REFCOUNT_CHANGE(objptr_, "decr"); \
+        HANDLE_CHECK_REFCOUNT(objptr_,"decr");       \
     } while (0)
 
-#elif MPIU_THREAD_REFCOUNT == MPIU_REFCOUNT_LOCKFREE
+#elif MPICH_THREAD_REFCOUNT == MPICH_REFCOUNT__LOCKFREE
 
 #include "opa_primitives.h"
-typedef OPA_int_t MPIU_Handle_ref_count;
+typedef OPA_int_t Handle_ref_count;
 
-#define MPIU_Object_set_ref(objptr_,val)                 \
+#define MPIR_Object_set_ref(objptr_,val)                 \
     do {                                                 \
         OPA_store_int(&(objptr_)->ref_count, val);       \
-        MPIU_HANDLE_LOG_REFCOUNT_CHANGE(objptr_, "set"); \
+        HANDLE_LOG_REFCOUNT_CHANGE(objptr_, "set"); \
     } while (0)
 
 /* must be used with care, since there is no synchronization for this read */
-#define MPIU_Object_get_ref(objptr_) \
+#define MPIR_Object_get_ref(objptr_) \
     (OPA_load_int(&(objptr_)->ref_count))
 
-#define MPIU_Object_add_ref_always(objptr_)               \
+#define Object_add_ref_always(objptr_)               \
     do {                                                  \
         OPA_incr_int(&((objptr_)->ref_count));            \
-        MPIU_HANDLE_LOG_REFCOUNT_CHANGE(objptr_, "incr"); \
-        MPIU_HANDLE_CHECK_REFCOUNT(objptr_,"incr");       \
+        HANDLE_LOG_REFCOUNT_CHANGE(objptr_, "incr"); \
+        HANDLE_CHECK_REFCOUNT(objptr_,"incr");       \
     } while (0)
-#define MPIU_Object_release_ref_always(objptr_,inuse_ptr)               \
+#define MPIR_Object_release_ref_always(objptr_,inuse_ptr)               \
     do {                                                                \
         int got_zero_ = OPA_decr_and_test_int(&((objptr_)->ref_count)); \
         *(inuse_ptr) = got_zero_ ? 0 : 1;                               \
-        MPIU_HANDLE_LOG_REFCOUNT_CHANGE(objptr_, "decr");               \
-        MPIU_HANDLE_CHECK_REFCOUNT(objptr_,"decr");                     \
+        HANDLE_LOG_REFCOUNT_CHANGE(objptr_, "decr");               \
+        HANDLE_CHECK_REFCOUNT(objptr_,"decr");                     \
     } while (0)
 #else
-#error invalid value for MPIU_THREAD_REFCOUNT
+#error invalid value for MPICH_THREAD_REFCOUNT
 #endif
 
 /* TODO someday we should probably always suppress predefined object refcounting,
  * but we don't have total confidence in it yet.  So until we gain sufficient
  * confidence, this is a configurable option. */
-#if defined(MPIU_THREAD_SUPPRESS_PREDEFINED_REFCOUNTS)
+#if defined(MPICH_THREAD_SUPPRESS_PREDEFINED_REFCOUNTS)
 
 /* The assumption here is that objects with handles of type HANDLE_KIND_BUILTIN
  * will be created/destroyed only at MPI_Init/MPI_Finalize time and don't need
@@ -385,26 +323,26 @@ typedef OPA_int_t MPIU_Handle_ref_count;
  * HANDLE_SET_KIND(0, HANDLE_KIND_INVALID) */
 /* TODO profile and examine the assembly that is generated for this if() on Blue
  * Gene (and elsewhere).  We may need to mark it unlikely(). */
-#define MPIU_Object_add_ref(objptr_)                           \
+#define MPIR_Object_add_ref(objptr_)                           \
     do {                                                       \
         int handle_kind_ = HANDLE_GET_KIND((objptr_)->handle); \
         if (unlikely(handle_kind_ != HANDLE_KIND_BUILTIN)) {   \
-            MPIU_Object_add_ref_always((objptr_));             \
+            Object_add_ref_always((objptr_));             \
         }                                                      \
         else {                                                                                                 \
             MPL_DBG_MSG_FMT(MPIR_DBG_HANDLE,TYPICAL,(MPL_DBG_FDEST,                                                   \
                                              "skipping add_ref on %p (0x%08x kind=%s) refcount=%d",            \
                                              (objptr_),                                                        \
                                              (objptr_)->handle,                                                \
-                                             MPIU_Handle_get_kind_str(HANDLE_GET_MPI_KIND((objptr_)->handle)), \
-                                             MPIU_Object_get_ref(objptr_)))                                    \
+                                             MPIR_Handle_get_kind_str(HANDLE_GET_MPI_KIND((objptr_)->handle)), \
+                                             MPIR_Object_get_ref(objptr_)))                                    \
         }                                                                                                      \
     } while (0)
-#define MPIU_Object_release_ref(objptr_,inuse_ptr_)                  \
+#define MPIR_Object_release_ref(objptr_,inuse_ptr_)                  \
     do {                                                             \
         int handle_kind_ = HANDLE_GET_KIND((objptr_)->handle);       \
         if (unlikely(handle_kind_ != HANDLE_KIND_BUILTIN)) {         \
-            MPIU_Object_release_ref_always((objptr_), (inuse_ptr_)); \
+            MPIR_Object_release_ref_always((objptr_), (inuse_ptr_)); \
         }                                                            \
         else {                                                       \
             *(inuse_ptr_) = 1;                                       \
@@ -412,18 +350,18 @@ typedef OPA_int_t MPIU_Handle_ref_count;
                                              "skipping release_ref on %p (0x%08x kind=%s) refcount=%d",        \
                                              (objptr_),                                                        \
                                              (objptr_)->handle,                                                \
-                                             MPIU_Handle_get_kind_str(HANDLE_GET_MPI_KIND((objptr_)->handle)), \
-                                             MPIU_Object_get_ref(objptr_)))                                    \
+                                             MPIR_Handle_get_kind_str(HANDLE_GET_MPI_KIND((objptr_)->handle)), \
+                                             MPIR_Object_get_ref(objptr_)))                                    \
         }                                                            \
     } while (0)
 
-#else /* !defined(MPIU_THREAD_SUPPRESS_PREDEFINED_REFCOUNTS) */
+#else /* !defined(MPICH_THREAD_SUPPRESS_PREDEFINED_REFCOUNTS) */
 
 /* the base case, where we just always manipulate the reference counts */
-#define MPIU_Object_add_ref(objptr_) \
-    MPIU_Object_add_ref_always((objptr_))
-#define MPIU_Object_release_ref(objptr_,inuse_ptr_) \
-    MPIU_Object_release_ref_always((objptr_),(inuse_ptr_))
+#define MPIR_Object_add_ref(objptr_) \
+    Object_add_ref_always((objptr_))
+#define MPIR_Object_release_ref(objptr_,inuse_ptr_) \
+    MPIR_Object_release_ref_always((objptr_),(inuse_ptr_))
 
 #endif
 
@@ -438,39 +376,39 @@ typedef OPA_int_t MPIU_Handle_ref_count;
  *
  * All *active* (in use) objects have the handle as the first value; objects
  * with referene counts have the reference count as the second value.  See
- * MPIU_Object_add_ref and MPIU_Object_release_ref.
+ * MPIR_Object_add_ref and MPIR_Object_release_ref.
  *
  * NOTE: This macro *must* be invoked as the very first element of the structure! */
-#define MPIU_OBJECT_HEADER             \
+#define MPIR_OBJECT_HEADER             \
     int handle;                        \
-    MPIU_Handle_ref_count ref_count/*semicolon intentionally omitted*/
+    Handle_ref_count ref_count/*semicolon intentionally omitted*/
 
 /* ALL objects have the handle as the first value. */
 /* Inactive (unused and stored on the appropriate avail list) objects 
-   have MPIU_Handle_common as the head */
-typedef struct MPIU_Handle_common {
-    MPIU_OBJECT_HEADER;
+   have MPIR_Handle_common as the head */
+typedef struct MPIR_Handle_common {
+    MPIR_OBJECT_HEADER;
     void *next;   /* Free handles use this field to point to the next
                      free object */
-} MPIU_Handle_common;
+} MPIR_Handle_common;
 
 /* This type contains all of the data, except for the direct array,
    used by the object allocators. */
-typedef struct MPIU_Object_alloc_t {
-    MPIU_Handle_common *avail;          /* Next available object */
+typedef struct MPIR_Object_alloc_t {
+    MPIR_Handle_common *avail;          /* Next available object */
     int                initialized;     /* */
     void              *(*indirect)[];   /* Pointer to indirect object blocks */
     int                indirect_size;   /* Number of allocated indirect blocks */
-    MPIR_Object_kind   kind;            /* Kind of object this is for */
+    MPII_Object_kind   kind;            /* Kind of object this is for */
     int                size;            /* Size of an individual object */
     void               *direct;         /* Pointer to direct block, used 
                                            for allocation */
     int                direct_size;     /* Size of direct block */
-} MPIU_Object_alloc_t;
-extern void *MPIU_Handle_obj_alloc(MPIU_Object_alloc_t *);
-extern void *MPIU_Handle_obj_alloc_unsafe(MPIU_Object_alloc_t *);
-extern void MPIU_Handle_obj_free( MPIU_Object_alloc_t *, void * );
-void *MPIU_Handle_get_ptr_indirect( int, MPIU_Object_alloc_t * );
+} MPIR_Object_alloc_t;
+extern void *MPIR_Handle_obj_alloc(MPIR_Object_alloc_t *);
+extern void *MPIR_Handle_obj_alloc_unsafe(MPIR_Object_alloc_t *);
+extern void MPIR_Handle_obj_free( MPIR_Object_alloc_t *, void * );
+void *MPIR_Handle_get_ptr_indirect( int, MPIR_Object_alloc_t * );
 
 
 /* Convert Handles to objects for MPI types that have predefined objects */
@@ -488,7 +426,7 @@ void *MPIU_Handle_get_ptr_indirect( int, MPIU_Object_alloc_t * );
           break;                                                        \
       case HANDLE_KIND_INDIRECT:                                        \
           ptr=((MPIR_##kind*)                                           \
-               MPIU_Handle_get_ptr_indirect(a,&MPIR_##kind##_mem));     \
+               MPIR_Handle_get_ptr_indirect(a,&MPIR_##kind##_mem));     \
           break;                                                        \
       case HANDLE_KIND_INVALID:                                         \
       default:								\
@@ -507,7 +445,7 @@ void *MPIU_Handle_get_ptr_indirect( int, MPIU_Object_alloc_t * );
           break;							\
       case HANDLE_KIND_INDIRECT:					\
           ptr=((MPIR_##kind*)						\
-               MPIU_Handle_get_ptr_indirect(a,&MPIR_##kind##_mem));	\
+               MPIR_Handle_get_ptr_indirect(a,&MPIR_##kind##_mem));	\
           break;							\
       case HANDLE_KIND_INVALID:						\
       case HANDLE_KIND_BUILTIN:						\
@@ -527,24 +465,24 @@ void *MPIU_Handle_get_ptr_indirect( int, MPIU_Object_alloc_t * );
 #define MPIR_Win_get_ptr(a,ptr)        MPIR_Get_ptr(Win,a,ptr)
 #define MPIR_Request_get_ptr(a,ptr)    MPIR_Get_ptr(Request,a,ptr)
 #define MPIR_Grequest_class_get_ptr(a,ptr) MPIR_Get_ptr(Grequest_class,a,ptr)
-/* Keyvals have a special format. This is roughly MPID_Get_ptrb, but
+/* Keyvals have a special format. This is roughly MPIR_Get_ptrb, but
    the handle index is in a smaller bit field.  In addition,
    there is no storage for the builtin keyvals.
    For the indirect case, we mask off the part of the keyval that is
    in the bits normally used for the indirect block index.
 */
-#define MPIR_Keyval_get_ptr(a,ptr)     \
+#define MPII_Keyval_get_ptr(a,ptr)     \
 {                                                                       \
    switch (HANDLE_GET_KIND(a)) {                                        \
       case HANDLE_KIND_BUILTIN:                                         \
           ptr=0;                                                        \
           break;                                                        \
       case HANDLE_KIND_DIRECT:                                          \
-          ptr=MPIR_Keyval_direct+((a)&0x3fffff);                        \
+          ptr=MPII_Keyval_direct+((a)&0x3fffff);                        \
           break;                                                        \
       case HANDLE_KIND_INDIRECT:                                        \
-          ptr=((MPIR_Keyval*)                                           \
-             MPIU_Handle_get_ptr_indirect((a)&0xfc3fffff,&MPIR_Keyval_mem)); \
+          ptr=((MPII_Keyval*)                                           \
+             MPIR_Handle_get_ptr_indirect((a)&0xfc3fffff,&MPII_Keyval_mem)); \
           break;                                                        \
       case HANDLE_KIND_INVALID:                                         \
       default:								\
diff --git a/src/include/mpir_op.h b/src/include/mpir_op.h
index 7a22a7a..715dfe7 100644
--- a/src/include/mpir_op.h
+++ b/src/include/mpir_op.h
@@ -102,7 +102,7 @@ typedef union MPIR_User_function {
   Collective-DS
   S*/
 typedef struct MPIR_Op {
-     MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+     MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
      MPIR_Op_kind       kind;
      MPIR_Lang_t        language;
      MPIR_User_function function;
@@ -110,12 +110,12 @@ typedef struct MPIR_Op {
 #define MPIR_OP_N_BUILTIN 15
 extern MPIR_Op MPIR_Op_builtin[MPIR_OP_N_BUILTIN];
 extern MPIR_Op MPIR_Op_direct[];
-extern MPIU_Object_alloc_t MPIR_Op_mem;
+extern MPIR_Object_alloc_t MPIR_Op_mem;
 
 #define MPIR_Op_add_ref(_op) \
-    do { MPIU_Object_add_ref(_op); } while (0)
+    do { MPIR_Object_add_ref(_op); } while (0)
 #define MPIR_Op_release_ref( _op, _inuse ) \
-    do { MPIU_Object_release_ref( _op, _inuse ); } while (0)
+    do { MPIR_Object_release_ref( _op, _inuse ); } while (0)
 
 /* release and free-if-not-in-use helper */
 #define MPIR_Op_release(op_p_)                           \
@@ -123,7 +123,7 @@ extern MPIU_Object_alloc_t MPIR_Op_mem;
         int in_use_;                                     \
         MPIR_Op_release_ref((op_p_), &in_use_);          \
         if (!in_use_) {                                  \
-            MPIU_Handle_obj_free(&MPIR_Op_mem, (op_p_)); \
+            MPIR_Handle_obj_free(&MPIR_Op_mem, (op_p_)); \
         }                                                \
     } while (0)
 
diff --git a/src/include/mpir_pointers.h b/src/include/mpir_pointers.h
index 763f8cd..345d53d 100644
--- a/src/include/mpir_pointers.h
+++ b/src/include/mpir_pointers.h
@@ -29,7 +29,7 @@
  * -1 or similar, and the 0xecec... case when --enable-g=mem is used */
 #define MPIR_Comm_valid_ptr(ptr,err,ignore_rev) {     \
      MPIR_Valid_ptr_class(Comm,ptr,MPI_ERR_COMM,err); \
-     if ((ptr) && MPIU_Object_get_ref(ptr) <= 0) {    \
+     if ((ptr) && MPIR_Object_get_ref(ptr) <= 0) {    \
          MPIR_ERR_SET(err,MPI_ERR_COMM,"**comm");     \
          ptr = 0;                                     \
      } else if ((ptr) && (ptr)->revoked && !(ignore_rev)) {        \
@@ -41,7 +41,7 @@
 #define MPIR_Op_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Op,ptr,MPI_ERR_OP,err)
 #define MPIR_Errhandler_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Errhandler,ptr,MPI_ERR_ARG,err)
 #define MPIR_Request_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Request,ptr,MPI_ERR_REQUEST,err)
-#define MPIR_Keyval_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Keyval,ptr,MPI_ERR_KEYVAL,err)
+#define MPII_Keyval_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Keyval,ptr,MPI_ERR_KEYVAL,err)
 
 
 /* Assigns (src_) to (dst_), checking that (src_) fits in (dst_) without
@@ -51,10 +51,10 @@
  * promotion/truncation/conversion rules in mind.  A discussion of these issues
  * can be found in Chapter 5 of "Secure Coding in C and C++" by Robert Seacord.
  */
-#define MPIU_Assign_trunc(dst_,src_,dst_type_)                                         \
+#define MPIR_Assign_trunc(dst_,src_,dst_type_)                                         \
     do {                                                                               \
         /* will catch some of the cases if the expr_inttype macros aren't available */ \
-        MPIU_Assert((src_) == (dst_type_)(src_));                                      \
+        MPIR_Assert((src_) == (dst_type_)(src_));                                      \
         dst_ = (dst_type_)(src_);                                                      \
     } while (0)
 
@@ -64,8 +64,8 @@
  *
  * \param[in]  aint  Variable of type MPI_Aint
  */
-#define MPIU_Ensure_Aint_fits_in_int(aint) \
-  MPIU_Assert((aint) == (MPI_Aint)(int)(aint));
+#define MPIR_Ensure_Aint_fits_in_int(aint) \
+  MPIR_Assert((aint) == (MPI_Aint)(int)(aint));
 
 /*
  * Ensure an MPI_Aint value fits into a pointer.
@@ -73,7 +73,7 @@
  *
  * \param[in]  aint  Variable of type MPI_Aint
  */
-#define MPIU_Ensure_Aint_fits_in_pointer(aint) \
-  MPIU_Assert((aint) == (MPI_Aint)(uintptr_t) MPIU_AINT_CAST_TO_VOID_PTR(aint));
+#define MPIR_Ensure_Aint_fits_in_pointer(aint) \
+  MPIR_Assert((aint) == (MPI_Aint)(uintptr_t) MPIR_AINT_CAST_TO_VOID_PTR(aint));
 
 #endif /* MPIR_POINTERS_H_INCLUDED */
diff --git a/src/include/mpir_process.h b/src/include/mpir_process.h
index d70e89b..8c2dcab 100644
--- a/src/include/mpir_process.h
+++ b/src/include/mpir_process.h
@@ -19,7 +19,7 @@ typedef struct PreDefined_attrs {
     int wtime_is_global; /* Wtime is global over processes in COMM_WORLD */
 } PreDefined_attrs;
 
-typedef struct MPICH_PerProcess_t {
+typedef struct MPIR_Process_t {
     OPA_int_t mpich_state; /* State of MPICH. Use OPA_int_t to make MPI_Initialized() etc.
                               thread-safe per MPI-3.1.  See MPI-Forum ticket 357 */
     int               do_error_checks;  /* runtime error check control */
@@ -57,7 +57,7 @@ typedef struct MPICH_PerProcess_t {
        to specify the kind (comm,file,win) */
     void  (*cxx_call_errfn) ( int, int *, int *, void (*)(void) );
 #endif /* HAVE_CXX_BINDING */
-} MPICH_PerProcess_t;
-extern MPICH_PerProcess_t MPIR_Process;
+} MPIR_Process_t;
+extern MPIR_Process_t MPIR_Process;
 
 #endif /* MPIR_PROCESS_H_INCLUDED */
diff --git a/src/include/mpir_refcount.h b/src/include/mpir_refcount.h
index 5130abc..27d45d3 100644
--- a/src/include/mpir_refcount.h
+++ b/src/include/mpir_refcount.h
@@ -12,9 +12,9 @@
 
 #if MPICH_THREAD_LEVEL == MPI_THREAD_MULTIPLE
 
-#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL
+#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL
 #include "mpir_refcount_global.h"
-#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT
+#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ
 #include "mpir_refcount_pobj.h"
 #endif
 
diff --git a/src/include/mpir_request.h b/src/include/mpir_request.h
index eb63831..9b90c13 100644
--- a/src/include/mpir_request.h
+++ b/src/include/mpir_request.h
@@ -59,7 +59,7 @@ struct MPIR_Grequest_fns {
 };
 
 typedef struct MPIR_Grequest_class {
-     MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+     MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
      MPI_Grequest_query_function *query_fn;
      MPI_Grequest_free_function *free_fn;
      MPI_Grequest_cancel_function *cancel_fn;
@@ -103,7 +103,7 @@ typedef struct MPIR_Grequest_class {
 
   S*/
 struct MPIR_Request {
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
 
     MPIR_Request_kind_t kind;
 
@@ -170,7 +170,7 @@ struct MPIR_Request {
 
 #define MPIR_REQUEST_PREALLOC 8
 
-extern MPIU_Object_alloc_t MPIR_Request_mem;
+extern MPIR_Object_alloc_t MPIR_Request_mem;
 /* Preallocated request objects */
 extern MPIR_Request MPIR_Request_direct[];
 
@@ -178,12 +178,12 @@ static inline MPIR_Request *MPIR_Request_create(MPIR_Request_kind_t kind)
 {
     MPIR_Request *req;
 
-    req = MPIU_Handle_obj_alloc(&MPIR_Request_mem);
+    req = MPIR_Handle_obj_alloc(&MPIR_Request_mem);
     if (req != NULL) {
 	MPL_DBG_MSG_P(MPIR_DBG_REQUEST,VERBOSE,
                       "allocated request, handle=0x%08x", req->handle);
 #ifdef MPICH_DBG_OUTPUT
-	/*MPIU_Assert(HANDLE_GET_MPI_KIND(req->handle) == MPIR_REQUEST);*/
+	/*MPIR_Assert(HANDLE_GET_MPI_KIND(req->handle) == MPIR_REQUEST);*/
 	if (HANDLE_GET_MPI_KIND(req->handle) != MPIR_REQUEST)
 	{
 	    int mpi_errno;
@@ -199,7 +199,7 @@ static inline MPIR_Request *MPIR_Request_create(MPIR_Request_kind_t kind)
          * inheritance).  For example, do we really* want to set the
          * kind to UNDEFINED? And should the RMA values be set only
          * for RMA requests? */
-	MPIU_Object_set_ref(req, 1);
+	MPIR_Object_set_ref(req, 1);
 	req->kind = kind;
         MPIR_cc_set(&req->cc, 1);
 	req->cc_ptr		   = &req->cc;
@@ -230,10 +230,10 @@ static inline MPIR_Request *MPIR_Request_create(MPIR_Request_kind_t kind)
 }
 
 #define MPIR_Request_add_ref( _req ) \
-    do { MPIU_Object_add_ref( _req ); } while (0)
+    do { MPIR_Object_add_ref( _req ); } while (0)
 
 #define MPIR_Request_release_ref( _req, _inuse ) \
-    do { MPIU_Object_release_ref( _req, _inuse ); } while (0)
+    do { MPIR_Object_release_ref( _req, _inuse ); } while (0)
 
 static inline void MPIR_Request_free(MPIR_Request *req)
 {
@@ -277,7 +277,7 @@ static inline void MPIR_Request_free(MPIR_Request *req)
 
         MPID_Request_finalize(req);
 
-        MPIU_Handle_obj_free(&MPIR_Request_mem, req);
+        MPIR_Handle_obj_free(&MPIR_Request_mem, req);
     }
 }
 
diff --git a/src/include/mpir_strerror.h b/src/include/mpir_strerror.h
index c69c58b..51ac036 100644
--- a/src/include/mpir_strerror.h
+++ b/src/include/mpir_strerror.h
@@ -7,9 +7,9 @@
 #define MPIR_STRERROR_H_INCLUDED
 
 /*
- * MPIU_Sterror()
+ * MPIR_Sterror()
  *
  * Thread safe implementation of strerror(), whenever possible. */
-const char *MPIU_Strerror(int errnum);
+const char *MPIR_Strerror(int errnum);
 
 #endif /* !defined(MPIR_STRERROR_H_INCLUDED) */
diff --git a/src/include/mpir_thread.h b/src/include/mpir_thread.h
index 7d17382..ed2426f 100644
--- a/src/include/mpir_thread.h
+++ b/src/include/mpir_thread.h
@@ -42,7 +42,7 @@ extern MPIR_Thread_info_t MPIR_ThreadInfo;
 typedef struct {
     int op_errno;               /* For errors in predefined MPI_Ops */
 
-    /* error string storage for MPIU_Strerror */
+    /* error string storage for MPIR_Strerror */
     char strerrbuf[MPIR_STRERROR_BUF_SIZE];
 
 #if (MPICH_THREAD_LEVEL == MPI_THREAD_MULTIPLE)
@@ -66,12 +66,12 @@ extern MPID_Thread_tls_t MPIR_Per_thread_key;
 #define MPIR_THREAD_CHECK_END
 #endif /* MPICH_IS_THREADED */
 
-#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL || \
-    MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT
+#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL || \
+    MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ
 extern MPID_Thread_mutex_t MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX;
 #endif
 
-#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT
+#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ
 extern MPID_Thread_mutex_t MPIR_THREAD_POBJ_HANDLE_MUTEX;
 extern MPID_Thread_mutex_t MPIR_THREAD_POBJ_MSGQ_MUTEX;
 extern MPID_Thread_mutex_t MPIR_THREAD_POBJ_COMPLETION_MUTEX;
diff --git a/src/include/mpir_topo.h b/src/include/mpir_topo.h
index 824e085..7541b7b 100644
--- a/src/include/mpir_topo.h
+++ b/src/include/mpir_topo.h
@@ -16,7 +16,7 @@
  * collective operations, provided in the MPIR_Collops structure).
  */
 
-typedef struct MPIR_TopoOps {
+typedef struct MPII_Topo_ops {
     int (*cartCreate)( const MPIR_Comm *, int, const int[], const int [],
 		       int, MPI_Comm * );
     int (*cartMap)   ( const MPIR_Comm *, int, const int[], const int [],
@@ -25,25 +25,25 @@ typedef struct MPIR_TopoOps {
 			int, MPI_Comm * );
     int (*graphMap)   ( const MPIR_Comm *, int, const int[], const int[],
 			int * );
-} MPIR_TopoOps;
+} MPII_Topo_ops;
 
 
-typedef struct MPIR_Graph_topology {
+typedef struct MPII_Graph_topology {
   int nnodes;
   int nedges;
   int *index;
   int *edges;
-} MPIR_Graph_topology;
+} MPII_Graph_topology;
 
-typedef struct MPIR_Cart_topology {
+typedef struct MPII_Cart_topology {
   int nnodes;     /* Product of dims[*], gives the size of the topology */
   int ndims;
   int *dims;
   int *periodic;
   int *position;
-} MPIR_Cart_topology;
+} MPII_Cart_topology;
 
-typedef struct MPIR_Dist_graph_topology {
+typedef struct MPII_Dist_graph_topology {
     int indegree;
     int *in;
     int *in_weights;
@@ -51,14 +51,14 @@ typedef struct MPIR_Dist_graph_topology {
     int *out;
     int *out_weights;
     int is_weighted;
-} MPIR_Dist_graph_topology;
+} MPII_Dist_graph_topology;
 
 struct MPIR_Topology {
   MPIR_Topo_type kind;
   union topo { 
-    MPIR_Graph_topology graph;
-    MPIR_Cart_topology  cart;
-    MPIR_Dist_graph_topology dist_graph;
+    MPII_Graph_topology graph;
+    MPII_Cart_topology  cart;
+    MPII_Dist_graph_topology dist_graph;
   } topo;
 };
 
diff --git a/src/include/mpir_type_defs.h b/src/include/mpir_type_defs.h
index 5bb9484..ec66410 100644
--- a/src/include/mpir_type_defs.h
+++ b/src/include/mpir_type_defs.h
@@ -46,7 +46,7 @@
 
 #include "mpl.h"
 
-/* Use the MPIU_PtrToXXX macros to convert pointers to and from integer types */
+/* Use the MPIR_PtrToXXX macros to convert pointers to and from integer types */
 
 /* The Microsoft compiler will not allow casting of different sized types
  * without
@@ -57,25 +57,25 @@
 
 /* PtrToAint converts a pointer to an MPI_Aint type, truncating bits if necessary */
 #ifdef HAVE_PTRTOAINT
-#define MPIU_PtrToAint(a) ((MPI_Aint)(INT_PTR) (a))
+#define MPIR_Ptr_to_aint(a) ((MPI_Aint)(INT_PTR) (a))
 #else
 /* An MPI_Aint may be *larger* than a pointer.  By using 2 casts, we can
    keep some compilers from complaining about converting a pointer to an
    integer of a different size */
-#define MPIU_PtrToAint(a) ((MPI_Aint)(uintptr_t)(a))
+#define MPIR_Ptr_to_aint(a) ((MPI_Aint)(uintptr_t)(a))
 #endif
 
 /* AintToPtr converts an MPI_Aint to a pointer type, extending bits if necessary */
 #ifdef HAVE_AINTTOPTR
-#define MPIU_AintToPtr(a) ((VOID *)(INT_PTR)((MPI_Aint)a))
+#define MPIR_Aint_to_ptr(a) ((VOID *)(INT_PTR)((MPI_Aint)a))
 #else
-#define MPIU_AintToPtr(a) (void*)(a)
+#define MPIR_Aint_to_ptr(a) (void*)(a)
 #endif
 
 /* Adding the 32-bit compute/64-bit I/O related type-casts in here as
  * they are not a part of the MPI standard yet. */
-#define MPIU_AINT_CAST_TO_VOID_PTR (void *)(intptr_t)
-#define MPIU_VOID_PTR_CAST_TO_MPI_AINT (MPI_Aint)(uintptr_t)
-#define MPIU_PTR_DISP_CAST_TO_MPI_AINT (MPI_Aint)(intptr_t)
+#define MPIR_AINT_CAST_TO_VOID_PTR (void *)(intptr_t)
+#define MPIR_VOID_PTR_CAST_TO_MPI_AINT (MPI_Aint)(uintptr_t)
+#define MPIR_PTR_DISP_CAST_TO_MPI_AINT (MPI_Aint)(intptr_t)
 
 #endif /* !defined(MPIR_TYPE_DEFS_H_INCLUDED) */
diff --git a/src/include/mpir_win.h b/src/include/mpir_win.h
index 12d3233..c19db03 100644
--- a/src/include/mpir_win.h
+++ b/src/include/mpir_win.h
@@ -41,7 +41,7 @@
 
   S*/
 struct MPIR_Win {
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
     MPID_Thread_mutex_t mutex;
     MPIR_Errhandler *errhandler;  /* Pointer to the error handler structure */
     void *base;
@@ -83,7 +83,7 @@ struct MPIR_Win {
     MPID_DEV_WIN_DECL
 #endif
 };
-extern MPIU_Object_alloc_t MPIR_Win_mem;
+extern MPIR_Object_alloc_t MPIR_Win_mem;
 /* Preallocated win objects */
 extern MPIR_Win MPIR_Win_direct[];
 
diff --git a/src/include/mpit.h b/src/include/mpit.h
index 51bc5d2..6757358 100644
--- a/src/include/mpit.h
+++ b/src/include/mpit.h
@@ -45,7 +45,7 @@ static inline cvar_table_entry_t * LOOKUP_CVAR_BY_NAME(const char* cvar_name)
     int cvar_idx;
     name2index_hash_t *hash_entry;
     HASH_FIND_STR(cvar_hash, cvar_name, hash_entry);
-    MPIU_Assert(hash_entry != NULL);
+    MPIR_Assert(hash_entry != NULL);
     cvar_idx = hash_entry->idx;
     return (cvar_table_entry_t *)utarray_eltptr(cvar_table, cvar_idx);
 }
@@ -105,7 +105,7 @@ static inline cvar_table_entry_t * LOOKUP_CVAR_BY_NAME(const char* cvar_name)
 #define MPIR_T_CVAR_REGISTER_STATIC(dtype_, name_, addr_, count_, verb_, \
             scope_, default_, cat_, desc_) \
     do { \
-        MPIU_Assert(count_ > 0); \
+        MPIR_Assert(count_ > 0); \
         MPIR_T_CVAR_REGISTER_impl(dtype_, #name_, addr_, count_, MPI_T_ENUM_NULL, \
             verb_, MPI_T_BIND_NO_OBJECT, scope_, NULL, NULL, default_, cat_, desc_); \
     } while (0)
@@ -117,8 +117,8 @@ static inline cvar_table_entry_t * LOOKUP_CVAR_BY_NAME(const char* cvar_name)
 #define MPIR_T_CVAR_REGISTER_DYNAMIC(dtype_, name_, addr_, count_, etype_, \
             verb_, bind_, scope_, get_addr_, get_count_, default_, cat_, desc_) \
     do { \
-        MPIU_Assert(addr_ != NULL || get_addr_ != NULL); \
-        MPIU_Assert(count_ > 0 || get_count_ != NULL); \
+        MPIR_Assert(addr_ != NULL || get_addr_ != NULL); \
+        MPIR_Assert(count_ > 0 || get_count_ != NULL); \
         MPIR_T_CVAR_REGISTER_impl(dtype_, #name_, addr_, count_, etype_, \
             verb_, bind_, scope_, get_addr_, get_count_, default_, cat_, desc_); \
     } while (0)
diff --git a/src/include/mpitimpl.h b/src/include/mpitimpl.h
index 6789fe0..a3af890 100644
--- a/src/include/mpitimpl.h
+++ b/src/include/mpitimpl.h
@@ -376,12 +376,12 @@ extern void MPIR_T_PVAR_REGISTER_impl(
     do { \
         void *addr_; \
         /* Allowable datatypes only */ \
-        MPIU_Assert((dtype_) == MPI_INT); \
+        MPIR_Assert((dtype_) == MPI_INT); \
         /* Double check if dtype_ and name_ match */ \
-        MPIU_Assert(sizeof(PVAR_STATE_##name_) == MPID_Datatype_get_basic_size(dtype_)); \
-        MPIU_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
+        MPIR_Assert(sizeof(PVAR_STATE_##name_) == MPID_Datatype_get_basic_size(dtype_)); \
+        MPIR_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
         /* State pvars should be describled further by an enum */ \
-        MPIU_Assert((etype_) != MPI_T_ENUM_NULL); \
+        MPIR_Assert((etype_) != MPI_T_ENUM_NULL); \
         PVAR_STATE_##name_ = (initval_); \
         addr_ = &PVAR_STATE_##name_; \
         MPIR_T_PVAR_REGISTER_impl(MPI_T_PVAR_CLASS_STATE, dtype_, #name_, \
@@ -393,10 +393,10 @@ extern void MPIR_T_PVAR_REGISTER_impl(
             etype_, verb_, bind_, flags_, get_value_, get_count_, cat_, desc_) \
     do { \
         /* Allowable datatypes */ \
-        MPIU_Assert((dtype_) == MPI_INT); \
-        MPIU_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
-        MPIU_Assert((etype_) != MPI_T_ENUM_NULL); \
-        MPIU_Assert((addr_) != NULL || (get_value_) != NULL); \
+        MPIR_Assert((dtype_) == MPI_INT); \
+        MPIR_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
+        MPIR_Assert((etype_) != MPI_T_ENUM_NULL); \
+        MPIR_Assert((addr_) != NULL || (get_value_) != NULL); \
         MPIR_T_PVAR_REGISTER_impl(MPI_T_PVAR_CLASS_STATE, dtype_, #name_, \
             addr_, count_, etype_, verb_, bind_, flags_, get_value_, cat_, desc_); \
     } while (0)
@@ -430,11 +430,11 @@ extern void MPIR_T_PVAR_REGISTER_impl(
     do { \
         void *addr_; \
         /* Allowable datatypes only */ \
-        MPIU_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
+        MPIR_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
                     (dtype_) == MPI_UNSIGNED_LONG_LONG || (dtype_) == MPI_DOUBLE); \
         /* Double check if dtype_ and name_ match */ \
-        MPIU_Assert(sizeof(PVAR_LEVEL_##name_) == MPID_Datatype_get_basic_size(dtype_)); \
-        MPIU_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
+        MPIR_Assert(sizeof(PVAR_LEVEL_##name_) == MPID_Datatype_get_basic_size(dtype_)); \
+        MPIR_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
         PVAR_LEVEL_##name_ = (initval_); \
         addr_ = &PVAR_LEVEL_##name_; \
         MPIR_T_PVAR_REGISTER_impl(MPI_T_PVAR_CLASS_LEVEL, dtype_, #name_, \
@@ -446,10 +446,10 @@ extern void MPIR_T_PVAR_REGISTER_impl(
             addr_, count_, verb_, bind_, flags_, get_value_, get_count, cat_, desc_) \
     do { \
         /* Allowable datatypes */ \
-        MPIU_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
+        MPIR_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
                     (dtype_) == MPI_UNSIGNED_LONG_LONG || (dtype_) == MPI_DOUBLE); \
-        MPIU_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
-        MPIU_Assert((addr_) != NULL || (get_value_) != NULL); \
+        MPIR_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
+        MPIR_Assert((addr_) != NULL || (get_value_) != NULL); \
         MPIR_T_PVAR_REGISTER_impl(MPI_T_PVAR_CLASS_LEVEL, dtype_, #name_, \
             addr_, count_, MPI_T_ENUM_NULL, verb_, bind_, flags_, get_value_, \
             get_count, cat_, desc_); \
@@ -476,11 +476,11 @@ extern void MPIR_T_PVAR_REGISTER_impl(
     do { \
         void *addr_; \
         /* Allowable datatypes only */ \
-        MPIU_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
+        MPIR_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
                     (dtype_) == MPI_UNSIGNED_LONG_LONG || (dtype_) == MPI_DOUBLE); \
         /* Double check if dtype_ and name_ match */ \
-        MPIU_Assert(sizeof(PVAR_SIZE_##name_) == MPID_Datatype_get_basic_size(dtype_)); \
-        MPIU_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
+        MPIR_Assert(sizeof(PVAR_SIZE_##name_) == MPID_Datatype_get_basic_size(dtype_)); \
+        MPIR_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
         PVAR_SIZE_##name_ = (initval_); \
         addr_ = &PVAR_SIZE_##name_; \
         MPIR_T_PVAR_REGISTER_impl(MPI_T_PVAR_CLASS_SIZE, dtype_, #name_, \
@@ -492,10 +492,10 @@ extern void MPIR_T_PVAR_REGISTER_impl(
             addr_, count_, verb_, bind_, flags_, get_value_, get_count_, cat_, desc_) \
     do { \
         /* Allowable datatypes */ \
-        MPIU_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
+        MPIR_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
                     (dtype_) == MPI_UNSIGNED_LONG_LONG || (dtype_) == MPI_DOUBLE); \
-        MPIU_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
-        MPIU_Assert((addr_) != NULL || (get_value_) != NULL); \
+        MPIR_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
+        MPIR_Assert((addr_) != NULL || (get_value_) != NULL); \
         MPIR_T_PVAR_REGISTER_impl(MPI_T_PVAR_CLASS_SIZE, dtype_, #name_, \
             addr_, count_, MPI_T_ENUM_NULL, verb_, bind_, flags_, get_value_, \
             get_count_, cat_, desc_); \
@@ -508,7 +508,7 @@ extern void MPIR_T_PVAR_REGISTER_impl(
 /* Interfaces through pointer or name */
 #define MPIR_T_PVAR_PERCENTAGE_SET_VAR_impl(ptr_, val_) \
     do { \
-        MPIU_Assert(0.0 <= (val_) && (val_) <= 1.0); \
+        MPIR_Assert(0.0 <= (val_) && (val_) <= 1.0); \
         *(ptr_) = (val_); \
     } while (0)
 #define MPIR_T_PVAR_PERCENTAGE_GET_VAR_impl(ptr_) \
@@ -525,10 +525,10 @@ extern void MPIR_T_PVAR_REGISTER_impl(
     do { \
         void *addr_; \
         /* Allowable datatypes only */ \
-        MPIU_Assert((dtype_) == MPI_DOUBLE); \
+        MPIR_Assert((dtype_) == MPI_DOUBLE); \
         /* Double check if dtype_ and name_ match */ \
-        MPIU_Assert(sizeof(PVAR_PERCENTAGE_##name_) == MPID_Datatype_get_basic_size(dtype_)); \
-        MPIU_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
+        MPIR_Assert(sizeof(PVAR_PERCENTAGE_##name_) == MPID_Datatype_get_basic_size(dtype_)); \
+        MPIR_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
         addr_ = &PVAR_PERCENTAGE_##name_; \
         PVAR_PERCENTAGE_##name_ = (initval_); \
         MPIR_T_PVAR_REGISTER_impl(MPI_T_PVAR_CLASS_PERCENTAGE, dtype_, #name_, \
@@ -540,9 +540,9 @@ extern void MPIR_T_PVAR_REGISTER_impl(
             addr_, count_, verb_, bind_, flags_, get_value_, get_count_, cat_, desc_) \
     do { \
         /* Allowable datatypes */ \
-        MPIU_Assert((dtype_) == MPI_DOUBLE); \
-        MPIU_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
-        MPIU_Assert((addr_) != NULL || (get_value_) != NULL); \
+        MPIR_Assert((dtype_) == MPI_DOUBLE); \
+        MPIR_Assert((flags_) & MPIR_T_PVAR_FLAG_CONTINUOUS); \
+        MPIR_Assert((addr_) != NULL || (get_value_) != NULL); \
         MPIR_T_PVAR_REGISTER_impl(MPI_T_PVAR_CLASS_PERCENTAGE, dtype_, #name_, \
             addr_, count_, MPI_T_ENUM_NULL, verb_, bind_, flags_, get_value_, \
             get_count_, cat_, desc_); \
@@ -576,10 +576,10 @@ extern void MPIR_T_PVAR_REGISTER_impl(
     do { \
         void *addr_; \
         /* Allowable datatypes only */ \
-        MPIU_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
+        MPIR_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
                     (dtype_) == MPI_UNSIGNED_LONG_LONG); \
         /* Double check if dtype_ and name_ match*/ \
-        MPIU_Assert(sizeof(PVAR_COUNTER_##name_) == MPID_Datatype_get_basic_size(dtype_)); \
+        MPIR_Assert(sizeof(PVAR_COUNTER_##name_) == MPID_Datatype_get_basic_size(dtype_)); \
         PVAR_COUNTER_##name_ = 0; \
         addr_ = &PVAR_COUNTER_##name_; \
         MPIR_T_PVAR_REGISTER_impl(MPI_T_PVAR_CLASS_COUNTER, dtype_, #name_, \
@@ -591,9 +591,9 @@ extern void MPIR_T_PVAR_REGISTER_impl(
             addr_, count_, verb_, bind_, flags_, get_value_, get_count_, cat_, desc_) \
     do { \
         /* Allowable datatypes */ \
-        MPIU_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
+        MPIR_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
                     (dtype_) == MPI_UNSIGNED_LONG_LONG); \
-        MPIU_Assert((addr_) != NULL || (get_value_) != NULL); \
+        MPIR_Assert((addr_) != NULL || (get_value_) != NULL); \
         MPIR_T_PVAR_REGISTER_impl(MPI_T_PVAR_CLASS_COUNTER, dtype_, #name_, \
             addr_, count_, MPI_T_ENUM_NULL, verb_, bind_, flags_, get_value_, \
             get_count_, cat_, desc_); \
@@ -630,10 +630,10 @@ extern void MPIR_T_PVAR_REGISTER_impl(
         void *addr_; \
         int count_;  \
         /* Allowable datatypes only */ \
-        MPIU_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
+        MPIR_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
                     (dtype_) == MPI_UNSIGNED_LONG_LONG); \
         /* Double check if dtype_ and name_ match */ \
-        MPIU_Assert(sizeof(PVAR_COUNTER_##name_[0]) == MPID_Datatype_get_basic_size(dtype_)); \
+        MPIR_Assert(sizeof(PVAR_COUNTER_##name_[0]) == MPID_Datatype_get_basic_size(dtype_)); \
         addr_ = PVAR_COUNTER_##name_; \
         MPIR_T_PVAR_COUNTER_ARRAY_INIT_impl(name_); \
         count_ = sizeof(PVAR_COUNTER_##name_)/sizeof(mpit_pvar_##name_[0]); \
@@ -668,10 +668,10 @@ extern void MPIR_T_PVAR_REGISTER_impl(
     do { \
         void *addr; \
         /* Allowable datatypes only */ \
-        MPIU_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
+        MPIR_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
                     (dtype_) == MPI_UNSIGNED_LONG_LONG || (dtype_) == MPI_DOUBLE); \
         /* Double check if dtype_ and name_ match*/ \
-        MPIU_Assert(sizeof(PVAR_AGGREGATE_##name_) == MPID_Datatype_get_basic_size(dtype_)); \
+        MPIR_Assert(sizeof(PVAR_AGGREGATE_##name_) == MPID_Datatype_get_basic_size(dtype_)); \
         PVAR_AGGREGATE_##name_ = 0; \
         addr_ = &PVAR_AGGREGATE_##name_; \
         MPIR_T_PVAR_REGISTER_impl(MPI_T_PVAR_CLASS_AGGREGATE, dtype_, #name_, \
@@ -683,9 +683,9 @@ extern void MPIR_T_PVAR_REGISTER_impl(
             addr_, count_, verb_, bind_, flags_, get_value_, get_count_, cat_, desc_) \
     do { \
         /* Allowable datatypes */ \
-        MPIU_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
+        MPIR_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
                     (dtype_) == MPI_UNSIGNED_LONG_LONG || (dtype_) == MPI_DOUBLE); \
-        MPIU_Assert((addr_) != NULL || (get_value_) != NULL); \
+        MPIR_Assert((addr_) != NULL || (get_value_) != NULL); \
         MPIR_T_PVAR_REGISTER_impl(MPI_T_PVAR_CLASS_AGGREGATE, dtype_, #name_, \
             addr_, count_, MPI_T_ENUM_NULL, verb_, bind_, flags_, get_value_, \
             get_count_, cat_, desc_); \
@@ -745,7 +745,7 @@ void get_timer_time_in_double(MPIR_T_pvar_timer_t *timer, void *obj_handle,
         void *addr_; \
         void *count_addr_; \
         /* Allowable datatypes only */ \
-        MPIU_Assert((dtype_) == MPI_DOUBLE); \
+        MPIR_Assert((dtype_) == MPI_DOUBLE); \
         MPIR_T_PVAR_TIMER_INIT_impl(name_); \
         addr_ = &PVAR_TIMER_##name_; \
         count_addr_ = &(PVAR_TIMER_##name_.count); \
@@ -894,7 +894,7 @@ void get_timer_time_in_double(MPIR_T_pvar_timer_t *timer, void *obj_handle,
     do { \
         void *addr_; \
         /* Allowable datatypes only */ \
-        MPIU_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
+        MPIR_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
                     (dtype_) == MPI_UNSIGNED_LONG_LONG || (dtype_) == MPI_DOUBLE); \
         switch (dtype_) { \
         case MPI_UNSIGNED: \
@@ -918,9 +918,9 @@ void get_timer_time_in_double(MPIR_T_pvar_timer_t *timer, void *obj_handle,
             addr_, count_, verb_, bind_, flags_, get_value_, get_count_, cat_, desc_) \
     do { \
         /* Allowable datatypes */ \
-        MPIU_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
+        MPIR_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
                     (dtype_) == MPI_UNSIGNED_LONG_LONG || (dtype_) == MPI_DOUBLE); \
-        MPIU_Assert((addr_) != NULL || (get_value_) != NULL); \
+        MPIR_Assert((addr_) != NULL || (get_value_) != NULL); \
         MPIR_T_PVAR_REGISTER_impl(MPI_T_PVAR_CLASS_HIGHWATERMARK, dtype_, #name_, \
             addr_, count_, MPI_T_ENUM_NULL, verb_, bind_, flags_, get_value_, \
             get_count_, cat_, desc_); \
@@ -1059,7 +1059,7 @@ void get_timer_time_in_double(MPIR_T_pvar_timer_t *timer, void *obj_handle,
     do { \
         void *addr_; \
         /* Allowable datatypes only */ \
-        MPIU_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
+        MPIR_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
                     (dtype_) == MPI_UNSIGNED_LONG_LONG || (dtype_) == MPI_DOUBLE); \
         switch (dtype_) { \
         case MPI_UNSIGNED: \
@@ -1083,9 +1083,9 @@ void get_timer_time_in_double(MPIR_T_pvar_timer_t *timer, void *obj_handle,
             addr_, count_, verb_, bind_, flags_, get_value_, get_count_, cat_, desc_) \
     do { \
         /* Allowable datatypes */ \
-        MPIU_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
+        MPIR_Assert((dtype_) == MPI_UNSIGNED || (dtype_) == MPI_UNSIGNED_LONG || \
                     (dtype_) == MPI_UNSIGNED_LONG_LONG || (dtype_) == MPI_DOUBLE); \
-        MPIU_Assert((addr_) != NULL || (get_value_) != NULL); \
+        MPIR_Assert((addr_) != NULL || (get_value_) != NULL); \
         MPIR_T_PVAR_REGISTER_impl(MPI_T_PVAR_CLASS_LOWWATERMARK, dtype_, #name_, \
             addr_, count_, MPI_T_ENUM_NULL, verb_, bind_, flags_, get_value_, \
             get_count_, cat_, desc_); \
@@ -1178,7 +1178,7 @@ extern MPID_Thread_mutex_t mpi_t_mutex;
         int err_; \
         MPIR_T_THREAD_CHECK_BEGIN \
         MPID_Thread_mutex_create(&mpi_t_mutex, &err_); \
-        MPIU_Assert(err_ == 0); \
+        MPIR_Assert(err_ == 0); \
         MPIR_T_THREAD_CHECK_END \
     } while (0)
 
@@ -1187,7 +1187,7 @@ extern MPID_Thread_mutex_t mpi_t_mutex;
         int err_; \
         MPIR_T_THREAD_CHECK_BEGIN \
         MPID_Thread_mutex_destroy(&mpi_t_mutex, &err_); \
-        MPIU_Assert(err_ == 0); \
+        MPIR_Assert(err_ == 0); \
         MPIR_T_THREAD_CHECK_END \
     } while (0)
 
diff --git a/src/include/rlog_macros.h b/src/include/rlog_macros.h
index 1bf00ce..465b20d 100644
--- a/src/include/rlog_macros.h
+++ b/src/include/rlog_macros.h
@@ -13,29 +13,34 @@
 #error Failed to select a defintion for MPIDM_Wtime_todouble
 #endif
 
+/* prototype the initialization/finalization functions */
+int MPII_Timer_init(int rank, int size);
+int MPII_Timer_finalize(void);
+int MPII_Describe_timer_states(void);
+
 /* structures, global variables */
 /* FIXME: All global names should follow the prefix rules to ensure that 
    there are no collisions with user-defined global names.  g_pRLOG should be
-   RLOG_something */
-extern RLOG_Struct *g_pRLOG;
+   RLOGI_something */
+extern RLOGI_Struct *g_pRLOG;
 
 /* state declaration macros */
-#define MPIDU_STATE_DECL(a) MPID_Time_t time_stamp_in##a , time_stamp_out##a
-#define MPIDU_INIT_STATE_DECL(a)
-#define MPIDU_FINALIZE_STATE_DECL(a)
+#define RLOG_STATE_DECL(a) MPID_Time_t time_stamp_in##a , time_stamp_out##a
+#define RLOG_INIT_STATE_DECL(a)
+#define RLOG_FINALIZE_STATE_DECL(a)
 
 /* function enter and exit macros */
-#define MPIDU_FUNC_ENTER(a) \
+#define RLOG_FUNC_ENTER(a) \
 if (g_pRLOG) \
 { \
     g_pRLOG->nRecursion++; \
     MPID_Wtime( &time_stamp_in##a ); \
 }
 
-#define RLOG_MACRO_HEADER_CAST() ((RLOG_HEADER*)g_pRLOG->pOutput->pCurHeader)
-#define RLOG_MACRO_EVENT_CAST()  ((RLOG_EVENT*)((char*)g_pRLOG->pOutput->pCurHeader + sizeof(RLOG_HEADER)))
+#define RLOGI_MACRO_HEADER_CAST() ((RLOGI_HEADER*)g_pRLOG->pOutput->pCurHeader)
+#define RLOGI_MACRO_EVENT_CAST()  ((RLOGI_EVENT*)((char*)g_pRLOG->pOutput->pCurHeader + sizeof(RLOGI_HEADER)))
 
-#define MPIDU_FUNC_EXIT(a) \
+#define RLOG_FUNC_EXIT(a) \
 if (g_pRLOG) \
 { \
     if (g_pRLOG->bLogging) \
@@ -45,73 +50,122 @@ if (g_pRLOG) \
 	MPIDM_Wtime_todouble( ( &time_stamp_in##a ), &d1); \
 	MPIDM_Wtime_todouble( ( &time_stamp_out##a ), &d2); \
 	g_pRLOG->nRecursion--; \
-	if (g_pRLOG->pOutput->pCurHeader + sizeof(RLOG_HEADER) + sizeof(RLOG_EVENT) > g_pRLOG->pOutput->pEnd) \
+	if (g_pRLOG->pOutput->pCurHeader + sizeof(RLOGI_HEADER) + sizeof(RLOGI_EVENT) > g_pRLOG->pOutput->pEnd) \
 	{ \
 	    WriteCurrentDataAndLogEvent(g_pRLOG, a , d1, d2, g_pRLOG->nRecursion); \
 	} \
 	else \
 	{ \
-	    RLOG_MACRO_HEADER_CAST()->type = RLOG_EVENT_TYPE; \
-	    RLOG_MACRO_HEADER_CAST()->length = sizeof(RLOG_HEADER) + sizeof(RLOG_EVENT); \
-	    RLOG_MACRO_EVENT_CAST()->rank = g_pRLOG->nRank; \
-	    RLOG_MACRO_EVENT_CAST()->end_time = d2 - g_pRLOG->dFirstTimestamp; \
-	    RLOG_MACRO_EVENT_CAST()->start_time = d1 - g_pRLOG->dFirstTimestamp; \
-	    RLOG_MACRO_EVENT_CAST()->event = a ; \
-	    RLOG_MACRO_EVENT_CAST()->recursion = g_pRLOG->nRecursion; \
+	    RLOGI_MACRO_HEADER_CAST()->type = RLOGI_EVENT_TYPE; \
+	    RLOGI_MACRO_HEADER_CAST()->length = sizeof(RLOGI_HEADER) + sizeof(RLOGI_EVENT); \
+	    RLOGI_MACRO_EVENT_CAST()->rank = g_pRLOG->nRank; \
+	    RLOGI_MACRO_EVENT_CAST()->end_time = d2 - g_pRLOG->dFirstTimestamp; \
+	    RLOGI_MACRO_EVENT_CAST()->start_time = d1 - g_pRLOG->dFirstTimestamp; \
+	    RLOGI_MACRO_EVENT_CAST()->event = a ; \
+	    RLOGI_MACRO_EVENT_CAST()->recursion = g_pRLOG->nRecursion; \
 	    /* advance the current position pointer */ \
-	    g_pRLOG->pOutput->pCurHeader += sizeof(RLOG_HEADER) + sizeof(RLOG_EVENT); \
+	    g_pRLOG->pOutput->pCurHeader += sizeof(RLOGI_HEADER) + sizeof(RLOGI_EVENT); \
 	} \
     } \
 }
 
-#define MPIDU_PT2PT_FUNC_ENTER(a)     MPIDU_FUNC_ENTER(a)
-#define MPIDU_PT2PT_FUNC_EXIT(a)      MPIDU_FUNC_EXIT(a)
-#define MPIDU_COLL_FUNC_ENTER(a)      MPIDU_FUNC_ENTER(a)
-#define MPIDU_COLL_FUNC_EXIT(a)       MPIDU_FUNC_EXIT(a)
-#define MPIDU_RMA_FUNC_ENTER(a)       MPIDU_FUNC_ENTER(a)
-#define MPIDU_RMA_FUNC_EXIT(a)        MPIDU_FUNC_EXIT(a)
-#define MPIDU_INIT_FUNC_ENTER(a)
-#define MPIDU_INIT_FUNC_EXIT(a)
-#define MPIDU_FINALIZE_FUNC_ENTER(a)
-#define MPIDU_FINALIZE_FUNC_EXIT(a)
+#define RLOG_PT2PT_FUNC_ENTER(a)     RLOG_FUNC_ENTER(a)
+#define RLOG_PT2PT_FUNC_EXIT(a)      RLOG_FUNC_EXIT(a)
+#define RLOG_COLL_FUNC_ENTER(a)      RLOG_FUNC_ENTER(a)
+#define RLOG_COLL_FUNC_EXIT(a)       RLOG_FUNC_EXIT(a)
+#define RLOG_RMA_FUNC_ENTER(a)       RLOG_FUNC_ENTER(a)
+#define RLOG_RMA_FUNC_EXIT(a)        RLOG_FUNC_EXIT(a)
+#define RLOG_INIT_FUNC_ENTER(a)
+#define RLOG_INIT_FUNC_EXIT(a)
+#define RLOG_FINALIZE_FUNC_ENTER(a)
+#define RLOG_FINALIZE_FUNC_EXIT(a)
 
 /* arrow generating enter and exit macros */
-#define MPIDU_PT2PT_FUNC_ENTER_FRONT(a) \
+#define RLOG_PT2PT_FUNC_ENTER_FRONT(a) \
 if (g_pRLOG) \
 { \
     g_pRLOG->nRecursion++; \
     MPID_Wtime( &time_stamp_in##a ); \
-    RLOG_LogSend( g_pRLOG, dest, tag, count ); \
+    RLOGI_LogSend( g_pRLOG, dest, tag, count ); \
 }
 
-#define MPIDU_PT2PT_FUNC_ENTER_BACK(a) \
+#define RLOG_PT2PT_FUNC_ENTER_BACK(a) \
 if (g_pRLOG) \
 { \
     g_pRLOG->nRecursion++; \
     MPID_Wtime( &time_stamp_in##a ); \
-    RLOG_LogRecv( g_pRLOG, source, tag, count ); \
+    RLOGI_LogRecv( g_pRLOG, source, tag, count ); \
 }
 
-#define MPIDU_PT2PT_FUNC_ENTER_BOTH(a) \
+#define RLOG_PT2PT_FUNC_ENTER_BOTH(a) \
 if (g_pRLOG) \
 { \
     g_pRLOG->nRecursion++; \
     MPID_Wtime( &time_stamp_in##a ); \
-    RLOG_LogSend( g_pRLOG, dest, sendtag, sendcount ); \
+    RLOGI_LogSend( g_pRLOG, dest, sendtag, sendcount ); \
 }
 
-#define MPIDU_PT2PT_FUNC_EXIT_BACK(a) \
+#define RLOG_PT2PT_FUNC_EXIT_BACK(a) \
 if (g_pRLOG) \
 { \
-    RLOG_LogRecv( g_pRLOG, source, tag, count ); \
-    MPIDU_PT2PT_FUNC_EXIT(a) \
+    RLOGI_LogRecv( g_pRLOG, source, tag, count ); \
+    RLOG_PT2PT_FUNC_EXIT(a) \
 }
 
-#define MPIDU_PT2PT_FUNC_EXIT_BOTH(a) \
+#define RLOG_PT2PT_FUNC_EXIT_BOTH(a) \
 if (g_pRLOG) \
 { \
-    RLOG_LogRecv( g_pRLOG, source, recvtag, recvcount ); \
-    MPIDU_PT2PT_FUNC_EXIT(a) \
+    RLOGI_LogRecv( g_pRLOG, source, recvtag, recvcount ); \
+    RLOG_PT2PT_FUNC_EXIT(a) \
 }
 
+
+/* MPI layer definitions */
+#define MPIR_FUNC_TERSE_STATE_DECL(a)                RLOG_STATE_DECL(a)
+#define MPIR_FUNC_TERSE_INIT_STATE_DECL(a)           RLOG_INIT_STATE_DECL(a)
+#define MPIR_FUNC_TERSE_FINALIZE_STATE_DECL(a)       RLOG_FINALIZE_STATE_DECL(a)
+
+#define MPIR_FUNC_TERSE_ENTER(a)                RLOG_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_EXIT(a)                 RLOG_FUNC_EXIT(a)
+#define MPIR_FUNC_TERSE_PT2PT_ENTER(a)          RLOG_PT2PT_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_PT2PT_EXIT(a)           RLOG_PT2PT_FUNC_EXIT(a)
+#define MPIR_FUNC_TERSE_COLL_ENTER(a)           RLOG_COLL_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_COLL_EXIT(a)            RLOG_COLL_FUNC_EXIT(a)
+#define MPIR_FUNC_TERSE_RMA_ENTER(a)            RLOG_RMA_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_RMA_EXIT(a)             RLOG_RMA_FUNC_EXIT(a)
+#define MPIR_FUNC_TERSE_INIT_ENTER(a)           RLOG_INIT_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_INIT_EXIT(a)            RLOG_INIT_FUNC_EXIT(a)
+#define MPIR_FUNC_TERSE_FINALIZE_ENTER(a)       RLOG_FINALIZE_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_FINALIZE_EXIT(a)        RLOG_FINALIZE_FUNC_EXIT(a)
+
+#define MPIR_FUNC_TERSE_PT2PT_ENTER_FRONT(a)    RLOG_PT2PT_FUNC_ENTER_FRONT(a)
+#define MPIR_FUNC_TERSE_PT2PT_EXIT_FRONT(a)     RLOG_PT2PT_FUNC_EXIT(a)
+#define MPIR_FUNC_TERSE_PT2PT_ENTER_BACK(a)     RLOG_PT2PT_FUNC_ENTER(a)
+#define MPIR_FUNC_TERSE_PT2PT_EXIT_BACK(a)      RLOG_PT2PT_FUNC_EXIT_BACK(a)
+#define MPIR_FUNC_TERSE_PT2PT_EXIT_BOTH(a)      RLOG_PT2PT_FUNC_EXIT_BOTH(a)
+#define MPIR_FUNC_TERSE_PT2PT_ENTER_BOTH(a)     RLOG_PT2PT_FUNC_ENTER_BOTH(a)
+
+#if defined(HAVE_TIMING) && (HAVE_TIMING == MPICH_TIMING_KIND__LOG_DETAILED || HAVE_TIMING == MPICH_TIMING_KIND__ALL)
+
+/* device layer definitions */
+#define MPIR_FUNC_VERBOSE_STATE_DECL(a)                RLOG_STATE_DECL(a)
+#define MPIR_FUNC_VERBOSE_ENTER(a)                RLOG_FUNC_ENTER(a)
+#define MPIR_FUNC_VERBOSE_EXIT(a)                 RLOG_FUNC_EXIT(a)
+#define MPIR_FUNC_VERBOSE_PT2PT_ENTER(a)          RLOG_PT2PT_FUNC_ENTER(a)
+#define MPIR_FUNC_VERBOSE_PT2PT_EXIT(a)           RLOG_PT2PT_FUNC_EXIT(a)
+#define MPIR_FUNC_VERBOSE_RMA_ENTER(a)            RLOG_RMA_FUNC_ENTER(a)
+#define MPIR_FUNC_VERBOSE_RMA_EXIT(a)             RLOG_RMA_FUNC_EXIT(a)
+
+#else
+
+#define MPIR_FUNC_VERBOSE_STATE_DECL(a)
+#define MPIR_FUNC_VERBOSE_ENTER(a)
+#define MPIR_FUNC_VERBOSE_EXIT(a)
+#define MPIR_FUNC_VERBOSE_PT2PT_ENTER(a)
+#define MPIR_FUNC_VERBOSE_PT2PT_EXIT(a)
+#define MPIR_FUNC_VERBOSE_RMA_ENTER(a)
+#define MPIR_FUNC_VERBOSE_RMA_EXIT(a)
+
+#endif /* (HAVE_TIMING == MPICH_TIMING_KIND__LOG_DETAILED || HAVE_TIMING == MPICH_TIMING_KIND__ALL) */
+
 #endif
diff --git a/src/mpi/attr/attr.h b/src/mpi/attr/attr.h
index 1189382..6a9c0fb 100644
--- a/src/mpi/attr/attr.h
+++ b/src/mpi/attr/attr.h
@@ -16,9 +16,9 @@
 /*
   Keyval and attribute storage
  */
-extern MPIU_Object_alloc_t MPIR_Keyval_mem;
-extern MPIU_Object_alloc_t MPID_Attr_mem;
-extern MPIR_Keyval MPIR_Keyval_direct[];
+extern MPIR_Object_alloc_t MPII_Keyval_mem;
+extern MPIR_Object_alloc_t MPID_Attr_mem;
+extern MPII_Keyval MPII_Keyval_direct[];
 
 extern int MPIR_Attr_dup_list( int, MPIR_Attribute *, MPIR_Attribute ** );
 extern int MPIR_Attr_delete_list( int, MPIR_Attribute ** );
diff --git a/src/mpi/attr/attr_delete.c b/src/mpi/attr/attr_delete.c
index 662eb1c..989e5ba 100644
--- a/src/mpi/attr/attr_delete.c
+++ b/src/mpi/attr/attr_delete.c
@@ -57,13 +57,13 @@ int MPI_Attr_delete(MPI_Comm comm, int keyval)
     static const char FCNAME[] = "MPI_Attr_delete";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPIR_Keyval *keyval_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ATTR_DELETE);
+    MPII_Keyval *keyval_ptr;
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ATTR_DELETE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ATTR_DELETE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ATTR_DELETE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -78,7 +78,7 @@ int MPI_Attr_delete(MPI_Comm comm, int keyval)
     
     /* Convert MPI object handles to object pointers */
     MPIR_Comm_get_ptr( comm, comm_ptr );
-    MPIR_Keyval_get_ptr( keyval, keyval_ptr );
+    MPII_Keyval_get_ptr( keyval, keyval_ptr );
 
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -89,7 +89,7 @@ int MPI_Attr_delete(MPI_Comm comm, int keyval)
             MPIR_Comm_valid_ptr( comm_ptr, mpi_errno, TRUE );
 	    /* If comm_ptr is not valid, it will be reset to null */
             /* Validate keyval_ptr */
-	    MPIR_Keyval_valid_ptr( keyval_ptr, mpi_errno );
+	    MPII_Keyval_valid_ptr( keyval_ptr, mpi_errno );
             if (mpi_errno) goto fn_fail;
         }
         MPID_END_ERROR_CHECKS;
@@ -104,7 +104,7 @@ int MPI_Attr_delete(MPI_Comm comm, int keyval)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ATTR_DELETE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ATTR_DELETE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/attr_get.c b/src/mpi/attr/attr_get.c
index bea3e0e..d69d750 100644
--- a/src/mpi/attr/attr_get.c
+++ b/src/mpi/attr/attr_get.c
@@ -76,12 +76,12 @@ int MPI_Attr_get(MPI_Comm comm, int keyval, void *attribute_val, int *flag)
     static const char FCNAME[] = "MPI_Attr_get";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ATTR_GET);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ATTR_GET);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ATTR_GET);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ATTR_GET);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -123,13 +123,13 @@ int MPI_Attr_get(MPI_Comm comm, int keyval, void *attribute_val, int *flag)
 
     /* ... body of routine ...  */
 
-    mpi_errno = MPIR_CommGetAttr( comm, keyval, attribute_val, flag, MPIR_ATTR_PTR);
+    mpi_errno = MPII_Comm_get_attr( comm, keyval, attribute_val, flag, MPIR_ATTR_PTR);
     if (mpi_errno != MPI_SUCCESS) goto fn_fail;
 
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ATTR_GET);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ATTR_GET);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/attr_put.c b/src/mpi/attr/attr_put.c
index b8d8870..cd154af 100644
--- a/src/mpi/attr/attr_put.c
+++ b/src/mpi/attr/attr_put.c
@@ -75,12 +75,12 @@ int MPI_Attr_put(MPI_Comm comm, int keyval, void *attribute_val)
     static const char FCNAME[] = "MPI_Attr_put";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ATTR_PUT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ATTR_PUT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ATTR_PUT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ATTR_PUT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -120,7 +120,7 @@ int MPI_Attr_put(MPI_Comm comm, int keyval, void *attribute_val)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ATTR_PUT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ATTR_PUT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/attrutil.c b/src/mpi/attr/attrutil.c
index e84274a..22fc93c 100644
--- a/src/mpi/attr/attrutil.c
+++ b/src/mpi/attr/attrutil.c
@@ -27,10 +27,10 @@
 #endif
 
 /* Preallocated keyval objects */
-MPIR_Keyval MPIR_Keyval_direct[MPID_KEYVAL_PREALLOC] = { {0} };
-MPIU_Object_alloc_t MPIR_Keyval_mem = { 0, 0, 0, 0, MPIR_KEYVAL,
-					    sizeof(MPIR_Keyval),
-					    MPIR_Keyval_direct,
+MPII_Keyval MPII_Keyval_direct[MPID_KEYVAL_PREALLOC] = { {0} };
+MPIR_Object_alloc_t MPII_Keyval_mem = { 0, 0, 0, 0, MPIR_KEYVAL,
+					    sizeof(MPII_Keyval),
+					    MPII_Keyval_direct,
 					    MPID_KEYVAL_PREALLOC, };
 
 #ifndef MPIR_ATTR_PREALLOC
@@ -39,7 +39,7 @@ MPIU_Object_alloc_t MPIR_Keyval_mem = { 0, 0, 0, 0, MPIR_KEYVAL,
 
 /* Preallocated keyval objects */
 MPIR_Attribute MPID_Attr_direct[MPIR_ATTR_PREALLOC] = { {0} };
-MPIU_Object_alloc_t MPID_Attr_mem = { 0, 0, 0, 0, MPIR_ATTR,
+MPIR_Object_alloc_t MPID_Attr_mem = { 0, 0, 0, 0, MPIR_ATTR,
 					    sizeof(MPIR_Attribute),
 					    MPID_Attr_direct,
 					    MPIR_ATTR_PREALLOC, };
@@ -47,17 +47,17 @@ MPIU_Object_alloc_t MPID_Attr_mem = { 0, 0, 0, 0, MPIR_ATTR,
 /* Provides a way to trap all attribute allocations when debugging leaks. */
 MPIR_Attribute *MPID_Attr_alloc(void)
 {
-    MPIR_Attribute *attr = (MPIR_Attribute *)MPIU_Handle_obj_alloc(&MPID_Attr_mem);
+    MPIR_Attribute *attr = (MPIR_Attribute *)MPIR_Handle_obj_alloc(&MPID_Attr_mem);
     /* attributes don't have refcount semantics, but let's keep valgrind and
      * the debug logging pacified */
-    MPIU_Assert(attr != NULL);
-    MPIU_Object_set_ref(attr, 0);
+    MPIR_Assert(attr != NULL);
+    MPIR_Object_set_ref(attr, 0);
     return attr;
 }
 
 void MPID_Attr_free(MPIR_Attribute *attr_ptr)
 {
-    MPIU_Handle_obj_free(&MPID_Attr_mem, attr_ptr);
+    MPIR_Handle_obj_free(&MPID_Attr_mem, attr_ptr);
 }
 
 #undef FUNCNAME
@@ -81,7 +81,7 @@ int MPIR_Call_attr_delete( int handle, MPIR_Attribute *attr_p )
 {
     int rc;
     int mpi_errno = MPI_SUCCESS;
-    MPIR_Keyval* kv = attr_p->keyval;
+    MPII_Keyval* kv = attr_p->keyval;
 
     if(kv->delfn.user_function == NULL)
         goto fn_exit;
@@ -96,7 +96,7 @@ int MPIR_Call_attr_delete( int handle, MPIR_Attribute *attr_p )
                 );
     /* --BEGIN ERROR HANDLING-- */
     if(rc != 0){
-#if MPICH_ERROR_MSG_LEVEL < MPICH_ERROR_MSG_ALL
+#if MPICH_ERROR_MSG_LEVEL < MPICH_ERROR_MSG__ALL
 	/* If rc is a valid error class, then return that.  
 	   Note that it may be a dynamic error class */
 	/* AMBIGUOUS: This is an ambiguity in the MPI standard: What is the
@@ -138,7 +138,7 @@ int MPIR_Call_attr_copy( int handle, MPIR_Attribute *attr_p, void** value_copy,
 {
     int mpi_errno = MPI_SUCCESS;
     int rc;
-    MPIR_Keyval* kv = attr_p->keyval;
+    MPII_Keyval* kv = attr_p->keyval;
 
     if(kv->copyfn.user_function == NULL)
         goto fn_exit;
@@ -156,7 +156,7 @@ int MPIR_Call_attr_copy( int handle, MPIR_Attribute *attr_p, void** value_copy,
 
     /* --BEGIN ERROR HANDLING-- */
     if(rc != 0){
-#if MPICH_ERROR_MSG_LEVEL < MPICH_ERROR_MSG_ALL
+#if MPICH_ERROR_MSG_LEVEL < MPICH_ERROR_MSG__ALL
 	mpi_errno = rc;
 #else
         mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**user", "**usercopy %d", rc);
@@ -217,13 +217,13 @@ int MPIR_Attr_dup_list( int handle, MPIR_Attribute *old_attrs,
 
         new_p->keyval = p->keyval;
         /* Remember that we need this keyval */
-        MPIR_Keyval_add_ref(p->keyval);
+        MPII_Keyval_add_ref(p->keyval);
 
         new_p->attrType         = p->attrType;
         new_p->pre_sentinal     = 0;
 	/* FIXME: This is not correct in some cases (size(MPI_Aint)>
 	 sizeof(intptr_t)) */
-        new_p->value            = (MPIR_AttrVal_t)(intptr_t)new_value;
+        new_p->value            = (MPII_Attr_val_t)(intptr_t)new_value;
         new_p->post_sentinal    = 0;
         new_p->next             = 0;
 
@@ -277,13 +277,13 @@ int MPIR_Attr_delete_list( int handle, MPIR_Attribute **attr )
 	{
 	    int in_use;
 	    /* Decrement the use of the keyval */
-	    MPIR_Keyval_release_ref( p->keyval, &in_use);
+	    MPII_Keyval_release_ref( p->keyval, &in_use);
 	    if (!in_use) {
-		MPIU_Handle_obj_free( &MPIR_Keyval_mem, p->keyval );
+		MPIR_Handle_obj_free( &MPII_Keyval_mem, p->keyval );
 	    }
 	}
 	
-	MPIU_Handle_obj_free( &MPID_Attr_mem, p );
+	MPIR_Handle_obj_free( &MPID_Attr_mem, p );
 	
 	p = new_p;
     }
@@ -302,12 +302,12 @@ int MPIR_Attr_delete_list( int handle, MPIR_Attribute **attr )
 }
 
 int
-MPIR_Attr_copy_c_proxy(
+MPII_Attr_copy_c_proxy(
     MPI_Comm_copy_attr_function* user_function,
     int handle,
     int keyval,
     void* extra_state,
-    MPIR_AttrType attrib_type,
+    MPIR_Attr_type attrib_type,
     void* attrib,
     void** attrib_copy,
     int* flag
@@ -317,7 +317,7 @@ MPIR_Attr_copy_c_proxy(
     int ret;
 
     /* Make sure that the attribute value is delieverd as a pointer */
-    if (MPIR_ATTR_KIND(attrib_type) == MPIR_ATTR_KIND(MPIR_ATTR_INT)){
+    if (MPII_ATTR_KIND(attrib_type) == MPII_ATTR_KIND(MPIR_ATTR_INT)){
         attrib_val = &attrib;
     }
     else{
@@ -336,11 +336,11 @@ MPIR_Attr_copy_c_proxy(
 
 
 int
-MPIR_Attr_delete_c_proxy(
+MPII_Attr_delete_c_proxy(
     MPI_Comm_delete_attr_function* user_function,
     int handle,
     int keyval,
-    MPIR_AttrType attrib_type,
+    MPIR_Attr_type attrib_type,
     void* attrib,
     void* extra_state
     )
@@ -349,7 +349,7 @@ MPIR_Attr_delete_c_proxy(
     int ret;
 
     /* Make sure that the attribute value is delieverd as a pointer */
-    if (MPIR_ATTR_KIND(attrib_type) == MPIR_ATTR_KIND(MPIR_ATTR_INT))
+    if (MPII_ATTR_KIND(attrib_type) == MPII_ATTR_KIND(MPIR_ATTR_INT))
         attrib_val = &attrib;
     else
         attrib_val = attrib;
@@ -366,14 +366,14 @@ MPIR_Attr_delete_c_proxy(
 
 /* FIXME: Missing routine description */
 void
-MPIR_Keyval_set_proxy(
+MPII_Keyval_set_proxy(
     int keyval,
-    MPIR_Attr_copy_proxy copy_proxy,
-    MPIR_Attr_delete_proxy delete_proxy
+    MPII_Attr_copy_proxy copy_proxy,
+    MPII_Attr_delete_proxy delete_proxy
     )
 {
-    MPIR_Keyval*  keyval_ptr;
-    MPIR_Keyval_get_ptr( keyval, keyval_ptr );
+    MPII_Keyval*  keyval_ptr;
+    MPII_Keyval_get_ptr( keyval, keyval_ptr );
     if(keyval_ptr == NULL)
         return;
 
diff --git a/src/mpi/attr/comm_create_keyval.c b/src/mpi/attr/comm_create_keyval.c
index 251052b..bb4a155 100644
--- a/src/mpi/attr/comm_create_keyval.c
+++ b/src/mpi/attr/comm_create_keyval.c
@@ -46,9 +46,9 @@ int MPIR_Comm_create_keyval_impl(MPI_Comm_copy_attr_function *comm_copy_attr_fn,
                                  int *comm_keyval, void *extra_state)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIR_Keyval *keyval_ptr;
+    MPII_Keyval *keyval_ptr;
         
-    keyval_ptr = (MPIR_Keyval *)MPIU_Handle_obj_alloc( &MPIR_Keyval_mem );
+    keyval_ptr = (MPII_Keyval *)MPIR_Handle_obj_alloc( &MPII_Keyval_mem );
     MPIR_ERR_CHKANDJUMP(!keyval_ptr, mpi_errno, MPI_ERR_OTHER,"**nomem");
 
     /* Initialize the attribute dup function */
@@ -61,14 +61,14 @@ int MPIR_Comm_create_keyval_impl(MPI_Comm_copy_attr_function *comm_copy_attr_fn,
        field */
     keyval_ptr->handle           = (keyval_ptr->handle & ~(0x03c00000)) |
 	                           (MPIR_COMM << 22);
-    MPIU_Object_set_ref(keyval_ptr,1);
+    MPIR_Object_set_ref(keyval_ptr,1);
     keyval_ptr->was_freed        = 0;
     keyval_ptr->kind	         = MPIR_COMM;
     keyval_ptr->extra_state      = extra_state;
     keyval_ptr->copyfn.user_function = comm_copy_attr_fn;
-    keyval_ptr->copyfn.proxy = MPIR_Attr_copy_c_proxy;
+    keyval_ptr->copyfn.proxy = MPII_Attr_copy_c_proxy;
     keyval_ptr->delfn.user_function = comm_delete_attr_fn;
-    keyval_ptr->delfn.proxy = MPIR_Attr_delete_c_proxy;
+    keyval_ptr->delfn.proxy = MPII_Attr_delete_c_proxy;
 
     MPIR_OBJ_PUBLISH_HANDLE(*comm_keyval, keyval_ptr->handle);
 
@@ -126,12 +126,12 @@ int MPI_Comm_create_keyval(MPI_Comm_copy_attr_function *comm_copy_attr_fn,
 			   int *comm_keyval, void *extra_state)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_CREATE_KEYVAL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_CREATE_KEYVAL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_CREATE_KEYVAL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_CREATE_KEYVAL);
 
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -152,7 +152,7 @@ int MPI_Comm_create_keyval(MPI_Comm_copy_attr_function *comm_copy_attr_fn,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_CREATE_KEYVAL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_CREATE_KEYVAL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/comm_delete_attr.c b/src/mpi/attr/comm_delete_attr.c
index f35c18d..3ec8476 100644
--- a/src/mpi/attr/comm_delete_attr.c
+++ b/src/mpi/attr/comm_delete_attr.c
@@ -30,7 +30,7 @@ int MPI_Comm_delete_attr(MPI_Comm comm, int comm_keyval) __attribute__((weak,ali
 #define FUNCNAME MPIR_Comm_delete_attr_impl
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Comm_delete_attr_impl(MPIR_Comm *comm_ptr, MPIR_Keyval *keyval_ptr)
+int MPIR_Comm_delete_attr_impl(MPIR_Comm *comm_ptr, MPII_Keyval *keyval_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Attribute *p, **old_p;
@@ -67,9 +67,9 @@ int MPIR_Comm_delete_attr_impl(MPIR_Comm *comm_ptr, MPIR_Keyval *keyval_ptr)
         /* We found the attribute.  Remove it from the list */
         *old_p = p->next;
         /* Decrement the use of the keyval */
-        MPIR_Keyval_release_ref( p->keyval, &in_use);
+        MPII_Keyval_release_ref( p->keyval, &in_use);
         if (!in_use) {
-            MPIU_Handle_obj_free( &MPIR_Keyval_mem, p->keyval );
+            MPIR_Handle_obj_free( &MPII_Keyval_mem, p->keyval );
         }
         MPID_Attr_free(p);
     }
@@ -109,13 +109,13 @@ int MPI_Comm_delete_attr(MPI_Comm comm, int comm_keyval)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPIR_Keyval *keyval_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_DELETE_ATTR);
+    MPII_Keyval *keyval_ptr;
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_DELETE_ATTR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_DELETE_ATTR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_DELETE_ATTR);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -132,7 +132,7 @@ int MPI_Comm_delete_attr(MPI_Comm comm, int comm_keyval)
 
     /* Convert MPI object handles to object pointers */
     MPIR_Comm_get_ptr( comm, comm_ptr );
-    MPIR_Keyval_get_ptr( comm_keyval, keyval_ptr );
+    MPII_Keyval_get_ptr( comm_keyval, keyval_ptr );
     
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -143,7 +143,7 @@ int MPI_Comm_delete_attr(MPI_Comm comm, int comm_keyval)
             MPIR_Comm_valid_ptr( comm_ptr, mpi_errno, TRUE );
 	    /* If comm_ptr is not valid, it will be reset to null */
             /* Validate keyval_ptr */
-	    MPIR_Keyval_valid_ptr( keyval_ptr, mpi_errno );
+	    MPII_Keyval_valid_ptr( keyval_ptr, mpi_errno );
             if (mpi_errno) goto fn_fail;
 	}
         MPID_END_ERROR_CHECKS;
@@ -158,7 +158,7 @@ int MPI_Comm_delete_attr(MPI_Comm comm, int comm_keyval)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_DELETE_ATTR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_DELETE_ATTR);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/comm_free_keyval.c b/src/mpi/attr/comm_free_keyval.c
index 6fa4922..289ccf8 100644
--- a/src/mpi/attr/comm_free_keyval.c
+++ b/src/mpi/attr/comm_free_keyval.c
@@ -33,14 +33,14 @@ int MPI_Comm_free_keyval(int *comm_keyval) __attribute__((weak,alias("PMPI_Comm_
 void MPIR_Comm_free_keyval_impl(int keyval)
 {
     int in_use;
-    MPIR_Keyval *keyval_ptr;
+    MPII_Keyval *keyval_ptr;
     
-    MPIR_Keyval_get_ptr(keyval, keyval_ptr);
+    MPII_Keyval_get_ptr(keyval, keyval_ptr);
     if (!keyval_ptr->was_freed) {
         keyval_ptr->was_freed = 1;
-        MPIR_Keyval_release_ref( keyval_ptr, &in_use);
+        MPII_Keyval_release_ref( keyval_ptr, &in_use);
         if (!in_use) {
-            MPIU_Handle_obj_free( &MPIR_Keyval_mem, keyval_ptr );
+            MPIR_Handle_obj_free( &MPII_Keyval_mem, keyval_ptr );
         }
     }
     return;
@@ -74,12 +74,12 @@ Key values are global (they can be used with any and all communicators)
 int MPI_Comm_free_keyval(int *comm_keyval)
 {
     int          mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_FREE_KEYVAL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_FREE_KEYVAL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_FREE_KEYVAL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_FREE_KEYVAL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -99,12 +99,12 @@ int MPI_Comm_free_keyval(int *comm_keyval)
     {
         MPID_BEGIN_ERROR_CHECKS;
         {
-            MPIR_Keyval *keyval_ptr = NULL;
+            MPII_Keyval *keyval_ptr = NULL;
 
             /* Convert MPI object handles to object pointers */
-            MPIR_Keyval_get_ptr( *comm_keyval, keyval_ptr );
+            MPII_Keyval_get_ptr( *comm_keyval, keyval_ptr );
 
-	    MPIR_Keyval_valid_ptr( keyval_ptr, mpi_errno );
+	    MPII_Keyval_valid_ptr( keyval_ptr, mpi_errno );
             if (mpi_errno) goto fn_fail;
         }
         MPID_END_ERROR_CHECKS;
@@ -121,7 +121,7 @@ int MPI_Comm_free_keyval(int *comm_keyval)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_FREE_KEYVAL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_FREE_KEYVAL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/comm_get_attr.c b/src/mpi/attr/comm_get_attr.c
index 8fd2d5d..0a93c92 100644
--- a/src/mpi/attr/comm_get_attr.c
+++ b/src/mpi/attr/comm_get_attr.c
@@ -26,7 +26,7 @@ int MPI_Comm_get_attr(MPI_Comm comm, int comm_keyval, void *attribute_val, int *
 #define MPI_Comm_get_attr PMPI_Comm_get_attr
 
 #undef FUNCNAME
-#define FUNCNAME MPIR_CommGetAttr
+#define FUNCNAME MPII_Comm_get_attr
 
 /* Find the requested attribute.  If it exists, return either the attribute
    entry or the address of the entry, based on whether the request is for 
@@ -36,19 +36,19 @@ int MPI_Comm_get_attr(MPI_Comm comm, int comm_keyval, void *attribute_val, int *
    If the attribute has the same type as the request, it is returned as-is.
    Otherwise, the address of the attribute is returned.
 */
-int MPIR_CommGetAttr( MPI_Comm comm, int comm_keyval, void *attribute_val, 
-		      int *flag, MPIR_AttrType outAttrType )
+int MPII_Comm_get_attr( MPI_Comm comm, int comm_keyval, void *attribute_val,
+		      int *flag, MPIR_Attr_type outAttrType )
 {
-    static const char FCNAME[] = "MPIR_CommGetAttr";
+    static const char FCNAME[] = "MPII_Comm_get_attr";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     static PreDefined_attrs attr_copy;    /* Used to provide a copy of the
 					     predefined attributes */
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_GET_ATTR);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_GET_ATTR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_GET_ATTR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_GET_ATTR);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -275,7 +275,7 @@ int MPIR_CommGetAttr( MPI_Comm comm, int comm_keyval, void *attribute_val,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_GET_ATTR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_GET_ATTR);
     return mpi_errno;
 
   fn_fail:
@@ -294,14 +294,14 @@ int MPIR_CommGetAttr( MPI_Comm comm, int comm_keyval, void *attribute_val,
 
 /* This function is called by the fortran bindings. */
 /* FIXME: There is no reason to have this routine since it unnecessarily 
-   duplicates the MPIR_CommGetAttr interface. */
-int MPIR_CommGetAttr_fort(MPI_Comm comm, int comm_keyval, void *attribute_val,
-                          int *flag, MPIR_AttrType outAttrType )
+   duplicates the MPII_Comm_get_attr interface. */
+int MPII_Comm_get_attr_fort(MPI_Comm comm, int comm_keyval, void *attribute_val,
+                          int *flag, MPIR_Attr_type outAttrType )
 {
     int mpi_errno;
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    mpi_errno = MPIR_CommGetAttr(comm, comm_keyval, attribute_val, flag, outAttrType);
+    mpi_errno = MPII_Comm_get_attr(comm, comm_keyval, attribute_val, flag, outAttrType);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 
     return mpi_errno;
@@ -351,21 +351,21 @@ int MPI_Comm_get_attr(MPI_Comm comm, int comm_keyval, void *attribute_val,
 		      int *flag)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_GET_ATTR);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_GET_ATTR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_GET_ATTR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_GET_ATTR);
 
     /* Instead, ask for a desired type. */
-    mpi_errno = MPIR_CommGetAttr( comm, comm_keyval, attribute_val, flag, 
+    mpi_errno = MPII_Comm_get_attr( comm, comm_keyval, attribute_val, flag,
 				  MPIR_ATTR_PTR );
     if (mpi_errno) goto fn_fail;
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_GET_ATTR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_GET_ATTR);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/comm_set_attr.c b/src/mpi/attr/comm_set_attr.c
index ae7ff50..db47c40 100644
--- a/src/mpi/attr/comm_set_attr.c
+++ b/src/mpi/attr/comm_set_attr.c
@@ -31,10 +31,10 @@ int MPI_Comm_set_attr(MPI_Comm comm, int comm_keyval, void *attribute_val) __att
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Comm_set_attr_impl(MPIR_Comm *comm_ptr, int comm_keyval, void *attribute_val,
-                            MPIR_AttrType attrType)
+                            MPIR_Attr_type attrType)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIR_Keyval *keyval_ptr = NULL;
+    MPII_Keyval *keyval_ptr = NULL;
     MPIR_Attribute *p;
 
     MPIR_ERR_CHKANDJUMP(comm_keyval == MPI_KEYVAL_INVALID, mpi_errno, MPI_ERR_KEYVAL, "**keyvalinvalid");
@@ -44,8 +44,8 @@ int MPIR_Comm_set_attr_impl(MPIR_Comm *comm_ptr, int comm_keyval, void *attribut
        a simple linear list algorithm because few applications use more than a 
        handful of attributes */
 
-    MPIR_Keyval_get_ptr( comm_keyval, keyval_ptr );
-    MPIU_Assert(keyval_ptr != NULL);
+    MPII_Keyval_get_ptr( comm_keyval, keyval_ptr );
+    MPIR_Assert(keyval_ptr != NULL);
 
     /* printf( "Setting attr val to %x\n", attribute_val ); */
     p     = comm_ptr->attributes;
@@ -65,7 +65,7 @@ int MPIR_Comm_set_attr_impl(MPIR_Comm *comm_ptr, int comm_keyval, void *attribut
 	       dual casts are a sign that this is faulty. This will 
 	       need to be fixed in the type/win set_attr routines as 
 	       well. */
-	    p->value    = (MPIR_AttrVal_t)(intptr_t)attribute_val;
+	    p->value    = (MPII_Attr_val_t)(intptr_t)attribute_val;
 	    /* printf( "Updating attr at %x\n", &p->value ); */
 	    /* Does not change the reference count on the keyval */
 	    break;
@@ -81,10 +81,10 @@ int MPIR_Comm_set_attr_impl(MPIR_Comm *comm_ptr, int comm_keyval, void *attribut
 	new_p->attrType      = attrType;
 	new_p->pre_sentinal  = 0;
 	/* FIXME: See the comment above on this dual cast. */
-	new_p->value	     = (MPIR_AttrVal_t)(intptr_t)attribute_val;
+	new_p->value	     = (MPII_Attr_val_t)(intptr_t)attribute_val;
 	new_p->post_sentinal = 0;
 	new_p->next	     = comm_ptr->attributes;
-	MPIR_Keyval_add_ref( keyval_ptr );
+	MPII_Keyval_add_ref( keyval_ptr );
 	comm_ptr->attributes = new_p;
 	/* printf( "Creating attr at %x\n", &new_p->value ); */
     }
@@ -103,20 +103,20 @@ int MPIR_Comm_set_attr_impl(MPIR_Comm *comm_ptr, int comm_keyval, void *attribut
 
 
 #undef FUNCNAME
-#define FUNCNAME MPIR_CommSetAttr
+#define FUNCNAME MPII_Comm_set_attr
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_CommSetAttr( MPI_Comm comm, int comm_keyval, void *attribute_val, 
-		      MPIR_AttrType attrType )
+int MPII_Comm_set_attr( MPI_Comm comm, int comm_keyval, void *attribute_val,
+		      MPIR_Attr_type attrType )
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_SET_ATTR);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_SET_ATTR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_SET_ATTR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_SET_ATTR);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -139,14 +139,14 @@ int MPIR_CommSetAttr( MPI_Comm comm, int comm_keyval, void *attribute_val,
     {
         MPID_BEGIN_ERROR_CHECKS;
         {
-            MPIR_Keyval *keyval_ptr = NULL;
+            MPII_Keyval *keyval_ptr = NULL;
 
             /* Validate comm_ptr */
             MPIR_Comm_valid_ptr( comm_ptr, mpi_errno, TRUE );
 	    /* If comm_ptr is not valid, it will be reset to null */
 	    /* Validate keyval_ptr */
-            MPIR_Keyval_get_ptr( comm_keyval, keyval_ptr );
-	    MPIR_Keyval_valid_ptr( keyval_ptr, mpi_errno );
+            MPII_Keyval_get_ptr( comm_keyval, keyval_ptr );
+	    MPII_Keyval_valid_ptr( keyval_ptr, mpi_errno );
             if (mpi_errno) goto fn_fail;
 	}
         MPID_END_ERROR_CHECKS;
@@ -160,7 +160,7 @@ int MPIR_CommSetAttr( MPI_Comm comm, int comm_keyval, void *attribute_val,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_SET_ATTR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_SET_ATTR);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
@@ -220,10 +220,10 @@ int MPI_Comm_set_attr(MPI_Comm comm, int comm_keyval, void *attribute_val)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_SET_ATTR);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_SET_ATTR);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_SET_ATTR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_SET_ATTR);
 
      /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -246,14 +246,14 @@ int MPI_Comm_set_attr(MPI_Comm comm, int comm_keyval, void *attribute_val)
     {
         MPID_BEGIN_ERROR_CHECKS;
         {
-            MPIR_Keyval *keyval_ptr = NULL;
+            MPII_Keyval *keyval_ptr = NULL;
 
             /* Validate comm_ptr */
             MPIR_Comm_valid_ptr( comm_ptr, mpi_errno, TRUE );
 	    /* If comm_ptr is not valid, it will be reset to null */
 	    /* Validate keyval_ptr */
-            MPIR_Keyval_get_ptr( comm_keyval, keyval_ptr );
-	    MPIR_Keyval_valid_ptr( keyval_ptr, mpi_errno );
+            MPII_Keyval_get_ptr( comm_keyval, keyval_ptr );
+	    MPII_Keyval_valid_ptr( keyval_ptr, mpi_errno );
             if (mpi_errno) goto fn_fail;
 	}
         MPID_END_ERROR_CHECKS;
@@ -266,7 +266,7 @@ int MPI_Comm_set_attr(MPI_Comm comm, int comm_keyval, void *attribute_val)
     /* ... end of body of routine ... */
 
  fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_SET_ATTR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_SET_ATTR);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/keyval_create.c b/src/mpi/attr/keyval_create.c
index 0b90d3f..1e0282c 100644
--- a/src/mpi/attr/keyval_create.c
+++ b/src/mpi/attr/keyval_create.c
@@ -72,12 +72,12 @@ int MPI_Keyval_create(MPI_Copy_function *copy_fn,
 {
     static const char FCNAME[] = "MPI_Keyval_create";
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_KEYVAL_CREATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_KEYVAL_CREATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_KEYVAL_CREATE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_KEYVAL_CREATE);
 
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -98,7 +98,7 @@ int MPI_Keyval_create(MPI_Copy_function *copy_fn,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_KEYVAL_CREATE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_KEYVAL_CREATE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/keyval_free.c b/src/mpi/attr/keyval_free.c
index 26ddf07..b221a64 100644
--- a/src/mpi/attr/keyval_free.c
+++ b/src/mpi/attr/keyval_free.c
@@ -59,12 +59,12 @@ int MPI_Keyval_free(int *keyval)
 {
     static const char FCNAME[] = "MPI_Keyval_free";
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_KEYVAL_FREE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_KEYVAL_FREE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_KEYVAL_FREE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_KEYVAL_FREE);
 #   ifdef HAVE_ERROR_CHECKING
     {
         MPID_BEGIN_ERROR_CHECKS;
@@ -80,12 +80,12 @@ int MPI_Keyval_free(int *keyval)
     {
         MPID_BEGIN_ERROR_CHECKS;
         {
-            MPIR_Keyval *keyval_ptr = NULL;
+            MPII_Keyval *keyval_ptr = NULL;
 
             /* Convert MPI object handles to object pointers */
-            MPIR_Keyval_get_ptr( *keyval, keyval_ptr );
+            MPII_Keyval_get_ptr( *keyval, keyval_ptr );
 
-	    MPIR_Keyval_valid_ptr( keyval_ptr, mpi_errno );
+	    MPII_Keyval_valid_ptr( keyval_ptr, mpi_errno );
             if (mpi_errno) goto fn_fail;
         }
         MPID_END_ERROR_CHECKS;
@@ -100,7 +100,7 @@ int MPI_Keyval_free(int *keyval)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_KEYVAL_FREE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_KEYVAL_FREE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/type_create_keyval.c b/src/mpi/attr/type_create_keyval.c
index 0d53c54..6a95b31 100644
--- a/src/mpi/attr/type_create_keyval.c
+++ b/src/mpi/attr/type_create_keyval.c
@@ -76,13 +76,13 @@ int MPI_Type_create_keyval(MPI_Type_copy_attr_function *type_copy_attr_fn,
 {
     static const char FCNAME[] = "MPI_Type_create_keyval";
     int mpi_errno = MPI_SUCCESS;
-    MPIR_Keyval *keyval_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_KEYVAL);
+    MPII_Keyval *keyval_ptr;
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_KEYVAL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_CREATE_KEYVAL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_CREATE_KEYVAL);
     
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -97,7 +97,7 @@ int MPI_Type_create_keyval(MPI_Type_copy_attr_function *type_copy_attr_fn,
 
     /* ... body of routine ...  */
     
-    keyval_ptr = (MPIR_Keyval *)MPIU_Handle_obj_alloc( &MPIR_Keyval_mem );
+    keyval_ptr = (MPII_Keyval *)MPIR_Handle_obj_alloc( &MPII_Keyval_mem );
     MPIR_ERR_CHKANDJUMP(!keyval_ptr,mpi_errno,MPI_ERR_OTHER,"**nomem");
 
     /* Initialize the attribute dup function */
@@ -110,24 +110,24 @@ int MPI_Type_create_keyval(MPI_Type_copy_attr_function *type_copy_attr_fn,
        field */
     keyval_ptr->handle           = (keyval_ptr->handle & ~(0x03c00000)) |
 	(MPIR_DATATYPE << 22);
-    MPIU_Object_set_ref(keyval_ptr,1);
+    MPIR_Object_set_ref(keyval_ptr,1);
     keyval_ptr->was_freed        = 0;
     keyval_ptr->kind	         = MPIR_DATATYPE;
     keyval_ptr->extra_state      = extra_state;
     keyval_ptr->copyfn.user_function = type_copy_attr_fn;
-    keyval_ptr->copyfn.proxy = MPIR_Attr_copy_c_proxy;
+    keyval_ptr->copyfn.proxy = MPII_Attr_copy_c_proxy;
     keyval_ptr->delfn.user_function = type_delete_attr_fn;
-    keyval_ptr->delfn.proxy = MPIR_Attr_delete_c_proxy;
+    keyval_ptr->delfn.proxy = MPII_Attr_delete_c_proxy;
 
     /* Tell finalize to check for attributes on permenant types */
-    MPIR_DatatypeAttrFinalize();
+    MPII_Datatype_attr_finalize();
     
     MPIR_OBJ_PUBLISH_HANDLE(*type_keyval, keyval_ptr->handle);
 
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_CREATE_KEYVAL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_CREATE_KEYVAL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/type_delete_attr.c b/src/mpi/attr/type_delete_attr.c
index 6a174b8..5010ede 100644
--- a/src/mpi/attr/type_delete_attr.c
+++ b/src/mpi/attr/type_delete_attr.c
@@ -54,15 +54,15 @@ int MPI_Type_delete_attr(MPI_Datatype datatype, int type_keyval)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Datatype *type_ptr = NULL;
     MPIR_Attribute *p, **old_p;
-    MPIR_Keyval *keyval_ptr = 0;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_DELETE_ATTR);
+    MPII_Keyval *keyval_ptr = 0;
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_DELETE_ATTR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     /* The thread lock prevents a valid attr delete on the same datatype
        but in a different thread from causing problems */
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_DELETE_ATTR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_DELETE_ATTR);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -79,7 +79,7 @@ int MPI_Type_delete_attr(MPI_Datatype datatype, int type_keyval)
 
     /* Validate parameters and objects (post conversion) */
     MPID_Datatype_get_ptr( datatype, type_ptr );
-    MPIR_Keyval_get_ptr( type_keyval, keyval_ptr );
+    MPII_Keyval_get_ptr( type_keyval, keyval_ptr );
 
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -129,10 +129,10 @@ int MPI_Type_delete_attr(MPI_Datatype datatype, int type_keyval)
 	    /* We found the attribute.  Remove it from the list */
 	    *old_p = p->next;
 	    /* Decrement the use of the keyval */
-	    MPIR_Keyval_release_ref( p->keyval, &in_use);
+	    MPII_Keyval_release_ref( p->keyval, &in_use);
 	    if (!in_use)
 	    {
-		MPIU_Handle_obj_free( &MPIR_Keyval_mem, p->keyval );
+		MPIR_Handle_obj_free( &MPII_Keyval_mem, p->keyval );
 	    }
 	    MPID_Attr_free(p);
 	}
@@ -144,7 +144,7 @@ int MPI_Type_delete_attr(MPI_Datatype datatype, int type_keyval)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_DELETE_ATTR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_DELETE_ATTR);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/type_free_keyval.c b/src/mpi/attr/type_free_keyval.c
index 1fdbc86..17e524f 100644
--- a/src/mpi/attr/type_free_keyval.c
+++ b/src/mpi/attr/type_free_keyval.c
@@ -51,15 +51,15 @@ int MPI_Type_free_keyval(int *type_keyval)
 #ifdef HAVE_ERROR_CHECKING
     static const char FCNAME[] = "MPI_Type_free_keyval";
 #endif
-    MPIR_Keyval *keyval_ptr = NULL;
+    MPII_Keyval *keyval_ptr = NULL;
     int          in_use;
     int          mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_FREE_KEYVAL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_FREE_KEYVAL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_FREE_KEYVAL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_FREE_KEYVAL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -75,14 +75,14 @@ int MPI_Type_free_keyval(int *type_keyval)
 #   endif
     
     /* Convert MPI object handles to object pointers */
-    MPIR_Keyval_get_ptr( *type_keyval, keyval_ptr );
+    MPII_Keyval_get_ptr( *type_keyval, keyval_ptr );
     
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
     {
         MPID_BEGIN_ERROR_CHECKS;
         {
-	    MPIR_Keyval_valid_ptr( keyval_ptr, mpi_errno );
+	    MPII_Keyval_valid_ptr( keyval_ptr, mpi_errno );
             if (mpi_errno) goto fn_fail;
         }
         MPID_END_ERROR_CHECKS;
@@ -93,9 +93,9 @@ int MPI_Type_free_keyval(int *type_keyval)
     
     if (!keyval_ptr->was_freed) {
         keyval_ptr->was_freed = 1;
-        MPIR_Keyval_release_ref( keyval_ptr, &in_use);
+        MPII_Keyval_release_ref( keyval_ptr, &in_use);
         if (!in_use) {
-            MPIU_Handle_obj_free( &MPIR_Keyval_mem, keyval_ptr );
+            MPIR_Handle_obj_free( &MPII_Keyval_mem, keyval_ptr );
         }
     }
     *type_keyval = MPI_KEYVAL_INVALID;
@@ -105,7 +105,7 @@ int MPI_Type_free_keyval(int *type_keyval)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_FREE_KEYVAL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_FREE_KEYVAL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/type_get_attr.c b/src/mpi/attr/type_get_attr.c
index 2da9d25..49cba5b 100644
--- a/src/mpi/attr/type_get_attr.c
+++ b/src/mpi/attr/type_get_attr.c
@@ -27,10 +27,10 @@ int MPI_Type_get_attr(MPI_Datatype datatype, int type_keyval, void *attribute_va
 #define MPI_Type_get_attr PMPI_Type_get_attr
 
 #undef FUNCNAME
-#define FUNCNAME MPIR_TypeGetAttr
+#define FUNCNAME MPII_Type_get_attr
 
-int MPIR_TypeGetAttr( MPI_Datatype datatype, int type_keyval, void *attribute_val,
-		      int *flag, MPIR_AttrType outAttrType )
+int MPII_Type_get_attr( MPI_Datatype datatype, int type_keyval, void *attribute_val,
+		      int *flag, MPIR_Attr_type outAttrType )
 {
 #ifdef HAVE_ERROR_CHECKING
     static const char FCNAME[] = "MPI_Type_get_attr";
@@ -38,12 +38,12 @@ int MPIR_TypeGetAttr( MPI_Datatype datatype, int type_keyval, void *attribute_va
     int mpi_errno = MPI_SUCCESS;
     MPIR_Datatype *type_ptr = NULL;
     MPIR_Attribute *p;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_TYPE_GET_ATTR);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_TYPE_GET_ATTR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_TYPE_GET_ATTR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_TYPE_GET_ATTR);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -129,7 +129,7 @@ int MPIR_TypeGetAttr( MPI_Datatype datatype, int type_keyval, void *attribute_va
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_TYPE_GET_ATTR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_TYPE_GET_ATTR);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
@@ -189,21 +189,21 @@ int MPI_Type_get_attr(MPI_Datatype datatype, int type_keyval, void *attribute_va
 		      int *flag)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_GET_ATTR);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_GET_ATTR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_GET_ATTR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_GET_ATTR);
     
     /* ... body of routine ...  */
-    mpi_errno = MPIR_TypeGetAttr( datatype, type_keyval, attribute_val, flag,
+    mpi_errno = MPII_Type_get_attr( datatype, type_keyval, attribute_val, flag,
 				  MPIR_ATTR_PTR );
     if (mpi_errno) goto fn_fail;
     
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_GET_ATTR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_GET_ATTR);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/attr/type_set_attr.c b/src/mpi/attr/type_set_attr.c
index 571fc46..0673df9 100644
--- a/src/mpi/attr/type_set_attr.c
+++ b/src/mpi/attr/type_set_attr.c
@@ -27,23 +27,23 @@ int MPI_Type_set_attr(MPI_Datatype datatype, int type_keyval, void *attribute_va
 #define MPI_Type_set_attr PMPI_Type_set_attr
 
 #undef FUNCNAME
-#define FUNCNAME MPIR_TypeSetAttr
-int MPIR_TypeSetAttr(MPI_Datatype datatype, int type_keyval, void *attribute_val,
-		     MPIR_AttrType attrType )
+#define FUNCNAME MPII_Type_set_attr
+int MPII_Type_set_attr(MPI_Datatype datatype, int type_keyval, void *attribute_val,
+		     MPIR_Attr_type attrType )
 {
-    static const char FCNAME[] = "MPIR_TypeSetAttr";
+    static const char FCNAME[] = "MPII_Type_set_attr";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Datatype *type_ptr = NULL;
-    MPIR_Keyval *keyval_ptr = NULL;
+    MPII_Keyval *keyval_ptr = NULL;
     MPIR_Attribute *p, **old_p;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_TYPE_SET_ATTR);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_TYPE_SET_ATTR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     /* The thread lock prevents a valid attr delete on the same datatype
        but in a different thread from causing problems */
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_TYPE_SET_ATTR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_TYPE_SET_ATTR);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -60,7 +60,7 @@ int MPIR_TypeSetAttr(MPI_Datatype datatype, int type_keyval, void *attribute_val
 
     /* Convert MPI object handles to object pointers */
     MPID_Datatype_get_ptr( datatype, type_ptr );
-    MPIR_Keyval_get_ptr( type_keyval, keyval_ptr );
+    MPII_Keyval_get_ptr( type_keyval, keyval_ptr );
     
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -71,7 +71,7 @@ int MPIR_TypeSetAttr(MPI_Datatype datatype, int type_keyval, void *attribute_val
             MPIR_Datatype_valid_ptr( type_ptr, mpi_errno );
 	    /* If type_ptr is not valid, it will be reset to null */
 	    /* Validate keyval_ptr */
-		MPIR_Keyval_valid_ptr( keyval_ptr, mpi_errno );
+		MPII_Keyval_valid_ptr( keyval_ptr, mpi_errno );
             if (mpi_errno) goto fn_fail;
         }
         MPID_END_ERROR_CHECKS;
@@ -95,7 +95,7 @@ int MPIR_TypeSetAttr(MPI_Datatype datatype, int type_keyval, void *attribute_val
 		goto fn_fail;
 	    }
 	    /* --END ERROR HANDLING-- */
-	    p->value    = (MPIR_AttrVal_t)(intptr_t)attribute_val;
+	    p->value    = (MPII_Attr_val_t)(intptr_t)attribute_val;
 	    p->attrType = attrType;
 	    break;
 	}
@@ -106,10 +106,10 @@ int MPIR_TypeSetAttr(MPI_Datatype datatype, int type_keyval, void *attribute_val
 	    new_p->keyval	 = keyval_ptr;
 	    new_p->attrType      = attrType;
 	    new_p->pre_sentinal	 = 0;
-	    new_p->value	 = (MPIR_AttrVal_t)(intptr_t)attribute_val;
+	    new_p->value	 = (MPII_Attr_val_t)(intptr_t)attribute_val;
 	    new_p->post_sentinal = 0;
 	    new_p->next		 = p->next;
-	    MPIR_Keyval_add_ref( keyval_ptr );
+	    MPII_Keyval_add_ref( keyval_ptr );
 	    p->next		 = new_p;
 	    break;
 	}
@@ -125,10 +125,10 @@ int MPIR_TypeSetAttr(MPI_Datatype datatype, int type_keyval, void *attribute_val
 	new_p->keyval	     = keyval_ptr;
 	new_p->attrType      = attrType;
 	new_p->pre_sentinal  = 0;
-	new_p->value	     = (MPIR_AttrVal_t)(intptr_t)attribute_val;
+	new_p->value	     = (MPII_Attr_val_t)(intptr_t)attribute_val;
 	new_p->post_sentinal = 0;
 	new_p->next	     = 0;
-	MPIR_Keyval_add_ref( keyval_ptr );
+	MPII_Keyval_add_ref( keyval_ptr );
 	*old_p		     = new_p;
     }
     
@@ -140,7 +140,7 @@ int MPIR_TypeSetAttr(MPI_Datatype datatype, int type_keyval, void *attribute_val
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_TYPE_SET_ATTR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_TYPE_SET_ATTR);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
@@ -190,18 +190,18 @@ int MPI_Type_set_attr(MPI_Datatype datatype, int type_keyval, void *attribute_va
 {
     static const char FCNAME[] = "MPI_Type_set_attr";
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_SET_ATTR);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_SET_ATTR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_SET_ATTR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_SET_ATTR);
 
-    mpi_errno = MPIR_TypeSetAttr( datatype, type_keyval, attribute_val,
+    mpi_errno = MPII_Type_set_attr( datatype, type_keyval, attribute_val,
 				  MPIR_ATTR_PTR );
     if (mpi_errno) goto fn_fail;
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_SET_ATTR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_SET_ATTR);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/attr/win_create_keyval.c b/src/mpi/attr/win_create_keyval.c
index d945cb2..e0cd788 100644
--- a/src/mpi/attr/win_create_keyval.c
+++ b/src/mpi/attr/win_create_keyval.c
@@ -76,13 +76,13 @@ int MPI_Win_create_keyval(MPI_Win_copy_attr_function *win_copy_attr_fn,
 {
     static const char FCNAME[] = "MPI_Win_create_keyval";
     int mpi_errno = MPI_SUCCESS;
-    MPIR_Keyval *keyval_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_CREATE_KEYVAL);
+    MPII_Keyval *keyval_ptr;
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_CREATE_KEYVAL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_CREATE_KEYVAL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_CREATE_KEYVAL);
 
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -97,9 +97,9 @@ int MPI_Win_create_keyval(MPI_Win_copy_attr_function *win_copy_attr_fn,
 
     /* ... body of routine ...  */
     
-    keyval_ptr = (MPIR_Keyval *)MPIU_Handle_obj_alloc( &MPIR_Keyval_mem );
+    keyval_ptr = (MPII_Keyval *)MPIR_Handle_obj_alloc( &MPII_Keyval_mem );
     MPIR_ERR_CHKANDJUMP1(!keyval_ptr,mpi_errno,MPI_ERR_OTHER,"**nomem",
-			 "**nomem %s", "MPIR_Keyval" );
+			 "**nomem %s", "MPII_Keyval" );
     /* Initialize the attribute dup function */
     if (!MPIR_Process.attr_dup) {
 	MPIR_Process.attr_dup  = MPIR_Attr_dup_list;
@@ -110,20 +110,20 @@ int MPI_Win_create_keyval(MPI_Win_copy_attr_function *win_copy_attr_fn,
        field */
     keyval_ptr->handle           = (keyval_ptr->handle & ~(0x03c00000)) |
 	(MPIR_WIN << 22);
-    MPIU_Object_set_ref(keyval_ptr,1);
+    MPIR_Object_set_ref(keyval_ptr,1);
     keyval_ptr->was_freed        = 0;
     keyval_ptr->kind	         = MPIR_WIN;
     keyval_ptr->extra_state      = extra_state;
     keyval_ptr->copyfn.user_function = win_copy_attr_fn;
-    keyval_ptr->copyfn.proxy = MPIR_Attr_copy_c_proxy;
+    keyval_ptr->copyfn.proxy = MPII_Attr_copy_c_proxy;
     keyval_ptr->delfn.user_function = win_delete_attr_fn;
-    keyval_ptr->delfn.proxy = MPIR_Attr_delete_c_proxy;
+    keyval_ptr->delfn.proxy = MPII_Attr_delete_c_proxy;
     
     MPIR_OBJ_PUBLISH_HANDLE(*win_keyval, keyval_ptr->handle);
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_CREATE_KEYVAL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_CREATE_KEYVAL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/win_delete_attr.c b/src/mpi/attr/win_delete_attr.c
index caf1211..029b55f 100644
--- a/src/mpi/attr/win_delete_attr.c
+++ b/src/mpi/attr/win_delete_attr.c
@@ -55,15 +55,15 @@ int MPI_Win_delete_attr(MPI_Win win, int win_keyval)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
     MPIR_Attribute *p, **old_p;
-    MPIR_Keyval *keyval_ptr=0;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_DELETE_ATTR);
+    MPII_Keyval *keyval_ptr=0;
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_DELETE_ATTR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     /* The thread lock prevents a valid attr delete on the same window
        but in a different thread from causing problems */
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_DELETE_ATTR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_DELETE_ATTR);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -80,7 +80,7 @@ int MPI_Win_delete_attr(MPI_Win win, int win_keyval)
     
     /* Convert MPI object handles to object pointers */
     MPIR_Win_get_ptr( win, win_ptr );
-    MPIR_Keyval_get_ptr( win_keyval, keyval_ptr );
+    MPII_Keyval_get_ptr( win_keyval, keyval_ptr );
     
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -90,7 +90,7 @@ int MPI_Win_delete_attr(MPI_Win win, int win_keyval)
             MPIR_Win_valid_ptr( win_ptr, mpi_errno );
 	    /* If win_ptr is not valid, it will be reset to null */
 	    /* Validate keyval_ptr */
-	    MPIR_Keyval_valid_ptr( keyval_ptr, mpi_errno );
+	    MPII_Keyval_valid_ptr( keyval_ptr, mpi_errno );
             if (mpi_errno) goto fn_fail;
         }
         MPID_END_ERROR_CHECKS;
@@ -128,10 +128,10 @@ int MPI_Win_delete_attr(MPI_Win win, int win_keyval)
 	    /* We found the attribute.  Remove it from the list */
 	    *old_p = p->next;
 	    /* Decrement the use of the keyval */
-	    MPIR_Keyval_release_ref( p->keyval, &in_use);
+	    MPII_Keyval_release_ref( p->keyval, &in_use);
 	    if (!in_use)
 	    {
-		MPIU_Handle_obj_free( &MPIR_Keyval_mem, p->keyval );
+		MPIR_Handle_obj_free( &MPII_Keyval_mem, p->keyval );
 	    }
 	    MPID_Attr_free(p);
 	}
@@ -143,7 +143,7 @@ int MPI_Win_delete_attr(MPI_Win win, int win_keyval)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_DELETE_ATTR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_DELETE_ATTR);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/win_free_keyval.c b/src/mpi/attr/win_free_keyval.c
index 18d8e18..60a0898 100644
--- a/src/mpi/attr/win_free_keyval.c
+++ b/src/mpi/attr/win_free_keyval.c
@@ -53,14 +53,14 @@ int MPI_Win_free_keyval(int *win_keyval)
     static const char FCNAME[] = "MPI_Win_free_keyval";
 #endif
     int mpi_errno = MPI_SUCCESS;
-    MPIR_Keyval *keyval_ptr = NULL;
+    MPII_Keyval *keyval_ptr = NULL;
     int          in_use;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_FREE_KEYVAL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_FREE_KEYVAL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_FREE_KEYVAL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_FREE_KEYVAL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -76,14 +76,14 @@ int MPI_Win_free_keyval(int *win_keyval)
 #   endif
 
     /* Convert MPI object handles to object pointers */
-    MPIR_Keyval_get_ptr( *win_keyval, keyval_ptr );
+    MPII_Keyval_get_ptr( *win_keyval, keyval_ptr );
 
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
     {
         MPID_BEGIN_ERROR_CHECKS;
         {
-	    MPIR_Keyval_valid_ptr( keyval_ptr, mpi_errno );
+	    MPII_Keyval_valid_ptr( keyval_ptr, mpi_errno );
             if (mpi_errno) goto fn_fail;
         }
         MPID_END_ERROR_CHECKS;
@@ -94,9 +94,9 @@ int MPI_Win_free_keyval(int *win_keyval)
     
     if (!keyval_ptr->was_freed) {
         keyval_ptr->was_freed = 1;
-        MPIR_Keyval_release_ref( keyval_ptr, &in_use);
+        MPII_Keyval_release_ref( keyval_ptr, &in_use);
         if (!in_use) {
-            MPIU_Handle_obj_free( &MPIR_Keyval_mem, keyval_ptr );
+            MPIR_Handle_obj_free( &MPII_Keyval_mem, keyval_ptr );
         }
     }
     *win_keyval = MPI_KEYVAL_INVALID;
@@ -106,7 +106,7 @@ int MPI_Win_free_keyval(int *win_keyval)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_FREE_KEYVAL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_FREE_KEYVAL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/attr/win_get_attr.c b/src/mpi/attr/win_get_attr.c
index 44f8405..456ce73 100644
--- a/src/mpi/attr/win_get_attr.c
+++ b/src/mpi/attr/win_get_attr.c
@@ -26,20 +26,20 @@ int MPI_Win_get_attr(MPI_Win win, int win_keyval, void *attribute_val, int *flag
 #define MPI_Win_get_attr PMPI_Win_get_attr
 
 #undef FUNCNAME
-#define FUNCNAME MPIR_WinGetAttr
+#define FUNCNAME MPII_Win_get_attr
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_WinGetAttr( MPI_Win win, int win_keyval, void *attribute_val, 
-		     int *flag, MPIR_AttrType outAttrType )
+int MPII_Win_get_attr( MPI_Win win, int win_keyval, void *attribute_val,
+		     int *flag, MPIR_Attr_type outAttrType )
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_WIN_GET_ATTR);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_WIN_GET_ATTR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_WIN_GET_ATTR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_WIN_GET_ATTR);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -124,30 +124,30 @@ int MPIR_WinGetAttr( MPI_Win win, int win_keyval, void *attribute_val,
 	    *attr_val_p = &win_ptr->copyModel;
 	    break;
 #ifdef HAVE_FORTRAN_BINDING
-	case MPIR_ATTR_C_TO_FORTRAN(MPI_WIN_BASE):
+	case MPII_ATTR_C_TO_FORTRAN(MPI_WIN_BASE):
 	    /* The Fortran routine that matches this routine should
 	       provide an address-sized integer, not an MPI_Fint */
-	    *attr_int = MPIU_VOID_PTR_CAST_TO_MPI_AINT(win_ptr->base);
+	    *attr_int = MPIR_VOID_PTR_CAST_TO_MPI_AINT(win_ptr->base);
 	    break;
-        case MPIR_ATTR_C_TO_FORTRAN(MPI_WIN_SIZE):
+        case MPII_ATTR_C_TO_FORTRAN(MPI_WIN_SIZE):
 	    /* We do not need to copy because we return the value,
 	       not a pointer to the value */
 	    *attr_int = win_ptr->size;
 	    break;
-	case MPIR_ATTR_C_TO_FORTRAN(MPI_WIN_DISP_UNIT):
+	case MPII_ATTR_C_TO_FORTRAN(MPI_WIN_DISP_UNIT):
 	    /* We do not need to copy because we return the value,
 	       not a pointer to the value */
 	    *attr_int = win_ptr->disp_unit;
 	    break;
-	case MPIR_ATTR_C_TO_FORTRAN(MPI_WIN_CREATE_FLAVOR):
+	case MPII_ATTR_C_TO_FORTRAN(MPI_WIN_CREATE_FLAVOR):
 	    *attr_int = win_ptr->create_flavor;
 	    break;
-	case MPIR_ATTR_C_TO_FORTRAN(MPI_WIN_MODEL):
+	case MPII_ATTR_C_TO_FORTRAN(MPI_WIN_MODEL):
 	    *attr_int = win_ptr->model;
 	    break;
 #endif
         default:
-            MPIU_Assert(FALSE);
+            MPIR_Assert(FALSE);
             break;
 	}
     }
@@ -197,7 +197,7 @@ int MPIR_WinGetAttr( MPI_Win win, int win_keyval, void *attribute_val,
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_WIN_GET_ATTR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_WIN_GET_ATTR);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
@@ -257,20 +257,20 @@ int MPI_Win_get_attr(MPI_Win win, int win_keyval, void *attribute_val,
 #ifdef HAVE_ERROR_CHECKING
     MPIR_Win *win_ptr = NULL;
 #endif
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_GET_ATTR);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_GET_ATTR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_GET_ATTR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_GET_ATTR);
 
     /* ... body of routine ...  */
-    mpi_errno = MPIR_WinGetAttr( win, win_keyval, attribute_val, flag, 
+    mpi_errno = MPII_Win_get_attr( win, win_keyval, attribute_val, flag,
 				 MPIR_ATTR_PTR );
     if (mpi_errno) goto fn_fail;
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_GET_ATTR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_GET_ATTR);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/attr/win_set_attr.c b/src/mpi/attr/win_set_attr.c
index 2509d43..d9fa7c7 100644
--- a/src/mpi/attr/win_set_attr.c
+++ b/src/mpi/attr/win_set_attr.c
@@ -27,23 +27,23 @@ int MPI_Win_set_attr(MPI_Win win, int win_keyval, void *attribute_val) __attribu
 #define MPI_Win_set_attr PMPI_Win_set_attr
 
 #undef FUNCNAME
-#define FUNCNAME MPIR_WinSetAttr
-int MPIR_WinSetAttr( MPI_Win win, int win_keyval, void *attribute_val, 
-		     MPIR_AttrType attrType )
+#define FUNCNAME MPII_Win_set_attr
+int MPII_Win_set_attr( MPI_Win win, int win_keyval, void *attribute_val,
+		     MPIR_Attr_type attrType )
 {
     static const char FCNAME[] = "MPI_Win_set_attr";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPIR_Keyval *keyval_ptr = NULL;
+    MPII_Keyval *keyval_ptr = NULL;
     MPIR_Attribute *p, **old_p;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_WIN_SET_ATTR);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_WIN_SET_ATTR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     /* The thread lock prevents a valid attr delete on the same window
        but in a different thread from causing problems */
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_WIN_SET_ATTR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_WIN_SET_ATTR);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -60,7 +60,7 @@ int MPIR_WinSetAttr( MPI_Win win, int win_keyval, void *attribute_val,
 
     /* Convert MPI object handles to object pointers */
     MPIR_Win_get_ptr( win, win_ptr );
-    MPIR_Keyval_get_ptr( win_keyval, keyval_ptr );
+    MPII_Keyval_get_ptr( win_keyval, keyval_ptr );
 
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -71,7 +71,7 @@ int MPIR_WinSetAttr( MPI_Win win, int win_keyval, void *attribute_val,
             MPIR_Win_valid_ptr( win_ptr, mpi_errno );
 	    /* If win_ptr is not valid, it will be reset to null */
 	    /* Validate keyval */
-	    MPIR_Keyval_valid_ptr( keyval_ptr, mpi_errno );
+	    MPII_Keyval_valid_ptr( keyval_ptr, mpi_errno );
             if (mpi_errno) goto fn_fail;
 	}
         MPID_END_ERROR_CHECKS;
@@ -100,7 +100,7 @@ int MPIR_WinSetAttr( MPI_Win win, int win_keyval, void *attribute_val,
 		goto fn_fail;
 	    }
 	    /* --END ERROR HANDLING-- */
-	    p->value    = (MPIR_AttrVal_t)(intptr_t)attribute_val;
+	    p->value    = (MPII_Attr_val_t)(intptr_t)attribute_val;
 	    p->attrType = attrType;
 	    /* Does not change the reference count on the keyval */
 	    break;
@@ -112,10 +112,10 @@ int MPIR_WinSetAttr( MPI_Win win, int win_keyval, void *attribute_val,
 	    new_p->keyval	 = keyval_ptr;
 	    new_p->attrType      = attrType;
 	    new_p->pre_sentinal	 = 0;
-	    new_p->value	 = (MPIR_AttrVal_t)(intptr_t)attribute_val;
+	    new_p->value	 = (MPII_Attr_val_t)(intptr_t)attribute_val;
 	    new_p->post_sentinal = 0;
 	    new_p->next		 = p->next;
-	    MPIR_Keyval_add_ref( keyval_ptr );
+	    MPII_Keyval_add_ref( keyval_ptr );
 	    p->next		 = new_p;
 	    break;
 	}
@@ -131,10 +131,10 @@ int MPIR_WinSetAttr( MPI_Win win, int win_keyval, void *attribute_val,
 	new_p->attrType      = attrType;
 	new_p->keyval	     = keyval_ptr;
 	new_p->pre_sentinal  = 0;
-	new_p->value	     = (MPIR_AttrVal_t)(intptr_t)attribute_val;
+	new_p->value	     = (MPII_Attr_val_t)(intptr_t)attribute_val;
 	new_p->post_sentinal = 0;
 	new_p->next	     = 0;
-	MPIR_Keyval_add_ref( keyval_ptr );
+	MPII_Keyval_add_ref( keyval_ptr );
 	*old_p		     = new_p;
     }
     
@@ -146,7 +146,7 @@ int MPIR_WinSetAttr( MPI_Win win, int win_keyval, void *attribute_val,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_WIN_SET_ATTR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_WIN_SET_ATTR);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); 
     return mpi_errno;
 
@@ -199,19 +199,19 @@ corresponding keyval was created) will be called.
 int MPI_Win_set_attr(MPI_Win win, int win_keyval, void *attribute_val)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_SET_ATTR);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_SET_ATTR);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_SET_ATTR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_SET_ATTR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     /* ... body of routine ...  */
-    mpi_errno = MPIR_WinSetAttr( win, win_keyval, attribute_val, 
+    mpi_errno = MPII_Win_set_attr( win, win_keyval, attribute_val,
 				 MPIR_ATTR_PTR );
     if (mpi_errno) goto fn_fail;
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_SET_ATTR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_SET_ATTR);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/coll/allgather.c b/src/mpi/coll/allgather.c
index 99b5ddf..0208db5 100644
--- a/src/mpi/coll/allgather.c
+++ b/src/mpi/coll/allgather.c
@@ -135,7 +135,7 @@ int MPIR_Allgather_intra (
     int position, tmp_buf_size, nbytes;
 #endif
 
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     if (((sendcount == 0) && (sendbuf != MPI_IN_PLACE)) || (recvcount == 0))
         return MPI_SUCCESS;
@@ -147,7 +147,7 @@ int MPIR_Allgather_intra (
     MPID_Datatype_get_size_macro( recvtype, type_size );
 
     /* This is the largest offset we add to recvbuf */
-    MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+    MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
 				     (comm_size * recvcount * recvtype_extent));
 
     tot_bytes = (MPI_Aint)recvcount * comm_size * type_size;
@@ -316,7 +316,7 @@ int MPIR_Allgather_intra (
             
             MPIR_Pack_size_impl(recvcount*comm_size, recvtype, &tmp_buf_size);
             
-            MPIU_CHKLMEM_MALLOC(tmp_buf, void*, tmp_buf_size, mpi_errno, "tmp_buf");
+            MPIR_CHKLMEM_MALLOC(tmp_buf, void*, tmp_buf_size, mpi_errno, "tmp_buf");
             
             /* calculate the value of nbytes, the number of bytes in packed
                representation that each process contributes. We can't simply divide
@@ -488,7 +488,7 @@ int MPIR_Allgather_intra (
         recvbuf_extent = recvcount * comm_size *
             (MPL_MAX(recvtype_true_extent, recvtype_extent));
 
-        MPIU_CHKLMEM_MALLOC(tmp_buf, void*, recvbuf_extent, mpi_errno, "tmp_buf");
+        MPIR_CHKLMEM_MALLOC(tmp_buf, void*, recvbuf_extent, mpi_errno, "tmp_buf");
             
         /* adjust for potential negative lower bound in datatype */
         tmp_buf = (void *)((char*)tmp_buf - recvtype_true_lb);
@@ -622,7 +622,7 @@ int MPIR_Allgather_intra (
     }
 
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -663,7 +663,7 @@ int MPIR_Allgather_inter (
     void *tmp_buf=NULL;
     MPIR_Comm *newcomm_ptr = NULL;
 
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     local_size = comm_ptr->local_size; 
     remote_size = comm_ptr->remote_size;
@@ -677,8 +677,8 @@ int MPIR_Allgather_inter (
         MPID_Datatype_get_extent_macro( sendtype, send_extent );
         extent = MPL_MAX(send_extent, true_extent);
 
-	MPIU_Ensure_Aint_fits_in_pointer(extent * sendcount * local_size);
-        MPIU_CHKLMEM_MALLOC(tmp_buf, void*, extent*sendcount*local_size, mpi_errno, "tmp_buf");
+	MPIR_Ensure_Aint_fits_in_pointer(extent * sendcount * local_size);
+        MPIR_CHKLMEM_MALLOC(tmp_buf, void*, extent*sendcount*local_size, mpi_errno, "tmp_buf");
 
         /* adjust for potential negative lower bound in datatype */
         tmp_buf = (void *)((char*)tmp_buf - true_lb);
@@ -686,7 +686,7 @@ int MPIR_Allgather_inter (
 
     /* Get the local intracommunicator */
     if (!comm_ptr->local_comm)
-	MPIR_Setup_intercomm_localcomm( comm_ptr );
+	MPII_Setup_intercomm_localcomm( comm_ptr );
 
     newcomm_ptr = comm_ptr->local_comm;
 
@@ -759,7 +759,7 @@ int MPIR_Allgather_inter (
     }
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -898,12 +898,12 @@ int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLGATHER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ALLGATHER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_ALLGATHER);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_ALLGATHER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -983,7 +983,7 @@ int MPI_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_ALLGATHER);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_ALLGATHER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/allgatherv.c b/src/mpi/coll/allgatherv.c
index dff4ed2..ef108ee 100644
--- a/src/mpi/coll/allgatherv.c
+++ b/src/mpi/coll/allgatherv.c
@@ -115,7 +115,7 @@ int MPIR_Allgatherv_intra (
 #ifdef MPID_HAS_HETERO
     int tmp_buf_size, nbytes;
 #endif
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
     
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -146,9 +146,9 @@ int MPIR_Allgatherv_intra (
 
             MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &recvtype_true_extent);
 
-            MPIU_Ensure_Aint_fits_in_pointer(total_count *
+            MPIR_Ensure_Aint_fits_in_pointer(total_count *
                            (MPL_MAX(recvtype_true_extent, recvtype_extent)));
-            MPIU_CHKLMEM_MALLOC(tmp_buf, void *, total_count*(MPL_MAX(recvtype_true_extent,recvtype_extent)), mpi_errno, "tmp_buf");
+            MPIR_CHKLMEM_MALLOC(tmp_buf, void *, total_count*(MPL_MAX(recvtype_true_extent,recvtype_extent)), mpi_errno, "tmp_buf");
 
             /* adjust for potential negative lower bound in datatype */
             tmp_buf = (void *)((char*)tmp_buf - recvtype_true_lb);
@@ -344,7 +344,7 @@ int MPIR_Allgatherv_intra (
         else {
             /* heterogeneous. need to use temp. buffer. */
             MPIR_Pack_size_impl(total_count, recvtype, &tmp_buf_size);
-            MPIU_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
+            MPIR_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
             
             /* calculate the value of nbytes, the number of bytes in packed
                representation corresponding to a single recvtype. Since
@@ -536,12 +536,12 @@ int MPIR_Allgatherv_intra (
         /* get true extent of recvtype */
         MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &recvtype_true_extent);
             
-        MPIU_Ensure_Aint_fits_in_pointer(total_count *
+        MPIR_Ensure_Aint_fits_in_pointer(total_count *
                         MPL_MAX(recvtype_true_extent, recvtype_extent));
         recvbuf_extent = total_count *
             (MPL_MAX(recvtype_true_extent, recvtype_extent));
 
-        MPIU_CHKLMEM_MALLOC(tmp_buf, void *, recvbuf_extent, mpi_errno, "tmp_buf");
+        MPIR_CHKLMEM_MALLOC(tmp_buf, void *, recvbuf_extent, mpi_errno, "tmp_buf");
             
         /* adjust for potential negative lower bound in datatype */
         tmp_buf = (void *)((char*)tmp_buf - recvtype_true_lb);
@@ -737,7 +737,7 @@ int MPIR_Allgatherv_intra (
     }
 
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -838,7 +838,7 @@ int MPIR_Allgatherv_inter (
 
     /* Get the local intracommunicator */
     if (!comm_ptr->local_comm) {
-	mpi_errno = MPIR_Setup_intercomm_localcomm( comm_ptr );
+	mpi_errno = MPII_Setup_intercomm_localcomm( comm_ptr );
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
@@ -1009,12 +1009,12 @@ int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLGATHERV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ALLGATHERV);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_ALLGATHERV);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_ALLGATHERV);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -1105,7 +1105,7 @@ int MPI_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_ALLGATHERV);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_ALLGATHERV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/allred_group.c b/src/mpi/coll/allred_group.c
index cb4a29a..8fcba3c 100644
--- a/src/mpi/coll/allred_group.c
+++ b/src/mpi/coll/allred_group.c
@@ -18,7 +18,7 @@ int MPIR_Allreduce_group_intra(void *sendbuf, void *recvbuf, int count,
         int gr_tmp_ = (gr_);                                                                                  \
         mpi_errno = MPIR_Group_translate_ranks_impl(group_ptr, 1, &(gr_tmp_), comm_ptr->local_group, &(cr_)); \
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);                                                               \
-        MPIU_Assert((cr_) != MPI_UNDEFINED);                                                                  \
+        MPIR_Assert((cr_) != MPI_UNDEFINED);                                                                  \
     } while (0)
 
 #undef FUNCNAME
@@ -39,11 +39,11 @@ int MPIR_Allreduce_group_intra(void *sendbuf, void *recvbuf, int count,
     void *tmp_buf;
     int group_rank, group_size;
     int cdst, csrc;
-    MPIU_CHKLMEM_DECL(3);
+    MPIR_CHKLMEM_DECL(3);
 
 #ifdef MPID_HAS_HETERO
     if (comm_ptr->is_hetero)
-        MPIU_Assert_fmt_msg(FALSE,("heterogeneous support for Allreduce_group_intra not yet implemented"));
+        MPIR_Assert_fmt_msg(FALSE,("heterogeneous support for Allreduce_group_intra not yet implemented"));
 #endif
 
     /* homogeneous case */
@@ -58,8 +58,8 @@ int MPIR_Allreduce_group_intra(void *sendbuf, void *recvbuf, int count,
     MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
     MPID_Datatype_get_extent_macro(datatype, extent);
 
-    MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
-    MPIU_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "temporary buffer");
+    MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+    MPIR_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "temporary buffer");
 
     /* adjust for potential negative lower bound in datatype */
     tmp_buf = (void *)((char*)tmp_buf - true_lb);
@@ -197,8 +197,8 @@ int MPIR_Allreduce_group_intra(void *sendbuf, void *recvbuf, int count,
                each process receives and the displacement within
                the buffer */
 
-            MPIU_CHKLMEM_MALLOC(cnts, int *, pof2*sizeof(int), mpi_errno, "counts");
-            MPIU_CHKLMEM_MALLOC(disps, int *, pof2*sizeof(int), mpi_errno, "displacements");
+            MPIR_CHKLMEM_MALLOC(cnts, int *, pof2*sizeof(int), mpi_errno, "counts");
+            MPIR_CHKLMEM_MALLOC(disps, int *, pof2*sizeof(int), mpi_errno, "displacements");
 
             for (i=0; i<(pof2-1); i++)
                 cnts[i] = count/pof2;
@@ -349,7 +349,7 @@ int MPIR_Allreduce_group_intra(void *sendbuf, void *recvbuf, int count,
     }
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
diff --git a/src/mpi/coll/allreduce.c b/src/mpi/coll/allreduce.c
index 27d9460..61f9190 100644
--- a/src/mpi/coll/allreduce.c
+++ b/src/mpi/coll/allreduce.c
@@ -198,7 +198,7 @@ int MPIR_Allreduce_intra (
         send_idx, recv_idx, last_idx, send_cnt, recv_cnt, *cnts, *disps; 
     MPI_Aint true_extent, true_lb, extent;
     void *tmp_buf;
-    MPIU_CHKLMEM_DECL(3);
+    MPIR_CHKLMEM_DECL(3);
     
     if (count == 0) goto fn_exit;
 
@@ -312,8 +312,8 @@ int MPIR_Allreduce_intra (
         MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
         MPID_Datatype_get_extent_macro(datatype, extent);
 
-        MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
-        MPIU_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "temporary buffer");
+        MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+        MPIR_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "temporary buffer");
 	
         /* adjust for potential negative lower bound in datatype */
         tmp_buf = (void *)((char*)tmp_buf - true_lb);
@@ -445,8 +445,8 @@ int MPIR_Allreduce_intra (
                    each process receives and the displacement within
                    the buffer */
 
-		MPIU_CHKLMEM_MALLOC(cnts, int *, pof2*sizeof(int), mpi_errno, "counts");
-		MPIU_CHKLMEM_MALLOC(disps, int *, pof2*sizeof(int), mpi_errno, "displacements");
+		MPIR_CHKLMEM_MALLOC(cnts, int *, pof2*sizeof(int), mpi_errno, "counts");
+		MPIR_CHKLMEM_MALLOC(disps, int *, pof2*sizeof(int), mpi_errno, "displacements");
 
                 for (i=0; i<(pof2-1); i++) 
                     cnts[i] = count/pof2;
@@ -595,7 +595,7 @@ int MPIR_Allreduce_intra (
     }
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     return (mpi_errno);
@@ -630,23 +630,23 @@ int MPIR_Allreduce_inter (
     MPI_Aint true_extent, true_lb, extent;
     void *tmp_buf=NULL;
     MPIR_Comm *newcomm_ptr = NULL;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     if (comm_ptr->rank == 0) {
         MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
         MPID_Datatype_get_extent_macro(datatype, extent);
         /* I think this is the worse case, so we can avoid an assert()
          * inside the for loop */
-        /* Should MPIU_CHKLMEM_MALLOC do this? */
-        MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
-        MPIU_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "temporary buffer");
+        /* Should MPIR_CHKLMEM_MALLOC do this? */
+        MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+        MPIR_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "temporary buffer");
         /* adjust for potential negative lower bound in datatype */
         tmp_buf = (void *)((char*)tmp_buf - true_lb);
     }
 
     /* Get the local intracommunicator */
     if (!comm_ptr->local_comm)
-        MPIR_Setup_intercomm_localcomm( comm_ptr );
+        MPII_Setup_intercomm_localcomm( comm_ptr );
 
     newcomm_ptr = comm_ptr->local_comm;
 
@@ -684,7 +684,7 @@ int MPIR_Allreduce_inter (
     }
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -808,12 +808,12 @@ int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLREDUCE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ALLREDUCE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_ALLREDUCE);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_ALLREDUCE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -886,7 +886,7 @@ int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_ALLREDUCE);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_ALLREDUCE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/alltoall.c b/src/mpi/coll/alltoall.c
index 2a65c9c..309a6c3 100644
--- a/src/mpi/coll/alltoall.c
+++ b/src/mpi/coll/alltoall.c
@@ -149,7 +149,7 @@ int MPIR_Alltoall_intra(
     void *tmp_buf;
     MPIR_Request **reqarray;
     MPI_Status *starray;
-    MPIU_CHKLMEM_DECL(6);
+    MPIR_CHKLMEM_DECL(6);
 
     if (recvcount == 0) return MPI_SUCCESS;
 
@@ -214,7 +214,7 @@ int MPIR_Alltoall_intra(
 
         /* allocate temporary buffer */
         MPIR_Pack_size_impl(recvcount*comm_size, recvtype, &pack_size);
-        MPIU_CHKLMEM_MALLOC(tmp_buf, void *, pack_size, mpi_errno, "tmp_buf");
+        MPIR_CHKLMEM_MALLOC(tmp_buf, void *, pack_size, mpi_errno, "tmp_buf");
 
         /* Do Phase 1 of the algorithim. Shift the data blocks on process i
          * upwards by a distance of i blocks. Store the result in recvbuf. */
@@ -238,7 +238,7 @@ int MPIR_Alltoall_intra(
         /* allocate displacements array for indexed datatype used in
            communication */
 
-        MPIU_CHKLMEM_MALLOC(displs, int *, comm_size * sizeof(int), mpi_errno, "displs");
+        MPIR_CHKLMEM_MALLOC(displs, int *, comm_size * sizeof(int), mpi_errno, "displs");
 
         pof2 = 1;
         while (pof2 < comm_size) {
@@ -291,7 +291,7 @@ int MPIR_Alltoall_intra(
 
         recvbuf_extent = recvcount * comm_size *
             (MPL_MAX(recvtype_true_extent, recvtype_extent));
-        MPIU_CHKLMEM_MALLOC(tmp_buf, void *, recvbuf_extent, mpi_errno, "tmp_buf");
+        MPIR_CHKLMEM_MALLOC(tmp_buf, void *, recvbuf_extent, mpi_errno, "tmp_buf");
         /* adjust for potential negative lower bound in datatype */
         tmp_buf = (void *)((char*)tmp_buf - recvtype_true_lb);
 
@@ -335,9 +335,9 @@ int MPIR_Alltoall_intra(
         bblock = MPIR_CVAR_ALLTOALL_THROTTLE;
         if (bblock == 0) bblock = comm_size;
 
-        MPIU_CHKLMEM_MALLOC(reqarray, MPIR_Request **, 2*bblock*sizeof(MPIR_Request*), mpi_errno, "reqarray");
+        MPIR_CHKLMEM_MALLOC(reqarray, MPIR_Request **, 2*bblock*sizeof(MPIR_Request*), mpi_errno, "reqarray");
 
-        MPIU_CHKLMEM_MALLOC(starray, MPI_Status *, 2*bblock*sizeof(MPI_Status), mpi_errno, "starray");
+        MPIR_CHKLMEM_MALLOC(starray, MPI_Status *, 2*bblock*sizeof(MPI_Status), mpi_errno, "starray");
 
         for (ii=0; ii<comm_size; ii+=bblock) {
             ss = comm_size-ii < bblock ? comm_size-ii : bblock;
@@ -436,7 +436,7 @@ int MPIR_Alltoall_intra(
     }
 
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -493,9 +493,9 @@ int MPIR_Alltoall_inter(
     
     /* Do the pairwise exchanges */
     max_size = MPL_MAX(local_size, remote_size);
-    MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+    MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
 				     max_size*recvcount*recvtype_extent);
-    MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
+    MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
 				     max_size*sendcount*sendtype_extent);
     for (i=0; i<max_size; i++) {
         src = (rank - i + max_size) % max_size;
@@ -636,12 +636,12 @@ int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLTOALL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ALLTOALL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_ALLTOALL);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_ALLTOALL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -715,7 +715,7 @@ int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_ALLTOALL);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_ALLTOALL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/alltoallv.c b/src/mpi/coll/alltoallv.c
index 276b6aa..46f1809 100644
--- a/src/mpi/coll/alltoallv.c
+++ b/src/mpi/coll/alltoallv.c
@@ -76,7 +76,7 @@ int MPIR_Alltoallv_intra(const void *sendbuf, const int *sendcounts, const int *
     int ii, ss, bblock;
     int type_size;
 
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -135,8 +135,8 @@ int MPIR_Alltoallv_intra(const void *sendbuf, const int *sendcounts, const int *
 
         MPID_Datatype_get_extent_macro(sendtype, send_extent);
 
-        MPIU_CHKLMEM_MALLOC(starray,  MPI_Status*,  2*bblock*sizeof(MPI_Status),  mpi_errno, "starray");
-        MPIU_CHKLMEM_MALLOC(reqarray, MPIR_Request**, 2*bblock*sizeof(MPIR_Request *), mpi_errno, "reqarray");
+        MPIR_CHKLMEM_MALLOC(starray,  MPI_Status*,  2*bblock*sizeof(MPI_Status),  mpi_errno, "starray");
+        MPIR_CHKLMEM_MALLOC(reqarray, MPIR_Request**, 2*bblock*sizeof(MPIR_Request *), mpi_errno, "reqarray");
 
         /* post only bblock isends/irecvs at a time as suggested by Tony Ladd */
         for (ii=0; ii<comm_size; ii+=bblock) {
@@ -149,7 +149,7 @@ int MPIR_Alltoallv_intra(const void *sendbuf, const int *sendcounts, const int *
                 if (recvcounts[dst]) {
                     MPID_Datatype_get_size_macro(recvtype, type_size);
                     if (type_size) {
-                        MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+                        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
                                                          rdispls[dst]*recv_extent);
                         mpi_errno = MPIC_Irecv((char *)recvbuf+rdispls[dst]*recv_extent,
                                                   recvcounts[dst], recvtype, dst,
@@ -171,7 +171,7 @@ int MPIR_Alltoallv_intra(const void *sendbuf, const int *sendcounts, const int *
                 if (sendcounts[dst]) {
                     MPID_Datatype_get_size_macro(sendtype, type_size);
                     if (type_size) {
-                        MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
+                        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
                                                          sdispls[dst]*send_extent);
                         mpi_errno = MPIC_Isend((char *)sendbuf+sdispls[dst]*send_extent,
                                                   sendcounts[dst], sendtype, dst,
@@ -210,7 +210,7 @@ int MPIR_Alltoallv_intra(const void *sendbuf, const int *sendcounts, const int *
     }
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
 
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
@@ -274,7 +274,7 @@ int MPIR_Alltoallv_inter(const void *sendbuf, const int *sendcounts, const int *
             recvcount = 0;
         }
         else {
-            MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+            MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
 					     rdispls[src]*recv_extent);
             recvaddr = (char *)recvbuf + rdispls[src]*recv_extent;
             recvcount = recvcounts[src];
@@ -285,7 +285,7 @@ int MPIR_Alltoallv_inter(const void *sendbuf, const int *sendcounts, const int *
             sendcount = 0;
         }
         else {
-            MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
+            MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
 					     sdispls[dst]*send_extent);
             sendaddr = (char *)sendbuf + sdispls[dst]*send_extent;
             sendcount = sendcounts[dst];
@@ -427,12 +427,12 @@ int MPI_Alltoallv(const void *sendbuf, const int *sendcounts,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLTOALLV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ALLTOALLV);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_ALLTOALLV);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_ALLTOALLV);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -522,7 +522,7 @@ int MPI_Alltoallv(const void *sendbuf, const int *sendcounts,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_ALLTOALLV);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_ALLTOALLV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/alltoallw.c b/src/mpi/coll/alltoallw.c
index fb625a8..02eba11 100644
--- a/src/mpi/coll/alltoallw.c
+++ b/src/mpi/coll/alltoallw.c
@@ -69,7 +69,7 @@ int MPIR_Alltoallw_intra(const void *sendbuf, const int sendcounts[], const int
     int outstanding_requests;
     int ii, ss, bblock;
     int type_size;
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -122,8 +122,8 @@ int MPIR_Alltoallw_intra(const void *sendbuf, const int sendcounts[], const int
         bblock = MPIR_CVAR_ALLTOALL_THROTTLE;
         if (bblock == 0) bblock = comm_size;
 
-        MPIU_CHKLMEM_MALLOC(starray,  MPI_Status*,  2*bblock*sizeof(MPI_Status),  mpi_errno, "starray");
-        MPIU_CHKLMEM_MALLOC(reqarray, MPIR_Request**, 2*bblock*sizeof(MPIR_Request *), mpi_errno, "reqarray");
+        MPIR_CHKLMEM_MALLOC(starray,  MPI_Status*,  2*bblock*sizeof(MPI_Status),  mpi_errno, "starray");
+        MPIR_CHKLMEM_MALLOC(reqarray, MPIR_Request**, 2*bblock*sizeof(MPIR_Request *), mpi_errno, "reqarray");
 
         /* post only bblock isends/irecvs at a time as suggested by Tony Ladd */
         for (ii=0; ii<comm_size; ii+=bblock) {
@@ -213,7 +213,7 @@ int MPIR_Alltoallw_intra(const void *sendbuf, const int sendcounts[], const int
     }
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -422,12 +422,12 @@ int MPI_Alltoallw(const void *sendbuf, const int sendcounts[],
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLTOALLW);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ALLTOALLW);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_ALLTOALLW);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_ALLTOALLW);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -528,7 +528,7 @@ int MPI_Alltoallw(const void *sendbuf, const int sendcounts[],
     
 
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_ALLTOALLW);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_ALLTOALLW);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/barrier.c b/src/mpi/coll/barrier.c
index b52efd6..54347c5 100644
--- a/src/mpi/coll/barrier.c
+++ b/src/mpi/coll/barrier.c
@@ -72,7 +72,7 @@ static int barrier_smp_intra(MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag)
     int mpi_errno=MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
 
-    MPIU_Assert(MPIR_CVAR_ENABLE_SMP_COLLECTIVES && MPIR_CVAR_ENABLE_SMP_BARRIER &&
+    MPIR_Assert(MPIR_CVAR_ENABLE_SMP_COLLECTIVES && MPIR_CVAR_ENABLE_SMP_BARRIER &&
                 MPIR_Comm_is_node_aware(comm_ptr));
 
     /* do the intranode barrier on all nodes */
@@ -195,7 +195,7 @@ int MPIR_Barrier_inter( MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag )
 
     /* Get the local intracommunicator */
     if (!comm_ptr->local_comm) {
-	mpi_errno = MPIR_Setup_intercomm_localcomm( comm_ptr );
+	mpi_errno = MPII_Setup_intercomm_localcomm( comm_ptr );
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
@@ -365,12 +365,12 @@ int MPI_Barrier( MPI_Comm comm )
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_BARRIER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_BARRIER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_BARRIER);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_BARRIER);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -407,7 +407,7 @@ int MPI_Barrier( MPI_Comm comm )
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_BARRIER);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_BARRIER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/bcast.c b/src/mpi/coll/bcast.c
index a82b545..b024d5e 100644
--- a/src/mpi/coll/bcast.c
+++ b/src/mpi/coll/bcast.c
@@ -141,7 +141,7 @@ static int MPIR_Bcast_binomial(
     MPI_Aint position;
     void *tmp_buf=NULL;
     MPIR_Datatype *dtp;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -182,7 +182,7 @@ static int MPIR_Bcast_binomial(
 
     if (!is_contig || !is_homogeneous)
     {
-        MPIU_CHKLMEM_MALLOC(tmp_buf, void *, nbytes, mpi_errno, "tmp_buf");
+        MPIR_CHKLMEM_MALLOC(tmp_buf, void *, nbytes, mpi_errno, "tmp_buf");
 
         /* TODO: Pipeline the packing and communication */
         position = 0;
@@ -302,7 +302,7 @@ static int MPIR_Bcast_binomial(
     }
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     /* --BEGIN ERROR HANDLING-- */
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
@@ -493,7 +493,7 @@ static int MPIR_Bcast_scatter_doubling_allgather(
     int relative_dst, dst_tree_root, my_tree_root, send_offset;
     int recv_offset, tree_root, nprocs_completed, offset;
     MPI_Aint position;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
     MPIR_Datatype *dtp;
     MPI_Aint true_extent, true_lb;
     void *tmp_buf;
@@ -545,7 +545,7 @@ static int MPIR_Bcast_scatter_doubling_allgather(
     }
     else
     {
-        MPIU_CHKLMEM_MALLOC(tmp_buf, void *, nbytes, mpi_errno, "tmp_buf");
+        MPIR_CHKLMEM_MALLOC(tmp_buf, void *, nbytes, mpi_errno, "tmp_buf");
 
         /* TODO: Pipeline the packing and communication */
         position = 0;
@@ -750,7 +750,7 @@ static int MPIR_Bcast_scatter_doubling_allgather(
     }
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     /* --BEGIN ERROR HANDLING-- */
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
@@ -803,7 +803,7 @@ static int MPIR_Bcast_scatter_ring_allgather(
     MPI_Status status;
     MPIR_Datatype *dtp;
     MPI_Aint true_extent, true_lb;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -851,7 +851,7 @@ static int MPIR_Bcast_scatter_ring_allgather(
     }
     else
     {
-        MPIU_CHKLMEM_MALLOC(tmp_buf, void *, nbytes, mpi_errno, "tmp_buf");
+        MPIR_CHKLMEM_MALLOC(tmp_buf, void *, nbytes, mpi_errno, "tmp_buf");
 
         /* TODO: Pipeline the packing and communication */
         position = 0;
@@ -939,7 +939,7 @@ static int MPIR_Bcast_scatter_ring_allgather(
     }
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     /* --BEGIN ERROR HANDLING-- */
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
@@ -1004,9 +1004,9 @@ static int MPIR_SMP_Bcast(
     int recvd_size;
 
     if (!MPIR_CVAR_ENABLE_SMP_COLLECTIVES || !MPIR_CVAR_ENABLE_SMP_BCAST) {
-        MPIU_Assert(0);
+        MPIR_Assert(0);
     }
-    MPIU_Assert(MPIR_Comm_is_node_aware(comm_ptr));
+    MPIR_Assert(MPIR_Comm_is_node_aware(comm_ptr));
 
     is_homogeneous = 1;
 #ifdef MPID_HAS_HETERO
@@ -1036,7 +1036,7 @@ static int MPIR_SMP_Bcast(
     {
         /* send to intranode-rank 0 on the root's node */
         if (comm_ptr->node_comm != NULL &&
-            MPIU_Get_intranode_rank(comm_ptr, root) > 0) /* is not the node root (0) */ 
+            MPIR_Get_intranode_rank(comm_ptr, root) > 0) /* is not the node root (0) */
         {                                                /* and is on our node (!-1) */
             if (root == comm_ptr->rank) {
                 mpi_errno = MPIC_Send(buffer,count,datatype,0,
@@ -1049,7 +1049,7 @@ static int MPIR_SMP_Bcast(
                 }
             }
             else if (0 == comm_ptr->node_comm->rank) {
-                mpi_errno = MPIC_Recv(buffer,count,datatype,MPIU_Get_intranode_rank(comm_ptr, root),
+                mpi_errno = MPIC_Recv(buffer,count,datatype,MPIR_Get_intranode_rank(comm_ptr, root),
                                          MPIR_BCAST_TAG,comm_ptr->node_comm, &status, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
@@ -1077,7 +1077,7 @@ static int MPIR_SMP_Bcast(
         {
             MPIR_Bcast_fn_or_override(MPIR_Bcast_binomial, mpi_errno_ret,
                                       buffer, count, datatype,
-                                      MPIU_Get_internode_rank(comm_ptr, root),
+                                      MPIR_Get_internode_rank(comm_ptr, root),
                                       comm_ptr->node_roots_comm, errflag);
         }
 
@@ -1099,14 +1099,14 @@ static int MPIR_SMP_Bcast(
 
             /* perform the intranode broadcast on the root's node */
             if (comm_ptr->node_comm != NULL &&
-                MPIU_Get_intranode_rank(comm_ptr, root) > 0) /* is not the node root (0) */ 
+                MPIR_Get_intranode_rank(comm_ptr, root) > 0) /* is not the node root (0) */
             {                                                /* and is on our node (!-1) */
                 /* FIXME binomial may not be the best algorithm for on-node
                    bcast.  We need a more comprehensive system for selecting the
                    right algorithms here. */
                 MPIR_Bcast_fn_or_override(MPIR_Bcast_binomial, mpi_errno_ret,
                                           buffer, count, datatype,
-                                          MPIU_Get_intranode_rank(comm_ptr, root),
+                                          MPIR_Get_intranode_rank(comm_ptr, root),
                                           comm_ptr->node_comm, errflag);
             }
 
@@ -1117,21 +1117,21 @@ static int MPIR_SMP_Bcast(
                 {
                     MPIR_Bcast_fn_or_override(MPIR_Bcast_scatter_doubling_allgather, mpi_errno_ret,
                                               buffer, count, datatype,
-                                              MPIU_Get_internode_rank(comm_ptr, root),
+                                              MPIR_Get_internode_rank(comm_ptr, root),
                                               comm_ptr->node_roots_comm, errflag);
                 }
                 else
                 {
                     MPIR_Bcast_fn_or_override(MPIR_Bcast_scatter_ring_allgather, mpi_errno_ret,
                                               buffer, count, datatype,
-                                              MPIU_Get_internode_rank(comm_ptr, root),
+                                              MPIR_Get_internode_rank(comm_ptr, root),
                                               comm_ptr->node_roots_comm, errflag);
                 }
             }
 
             /* perform the intranode broadcast on all except for the root's node */
             if (comm_ptr->node_comm != NULL &&
-                MPIU_Get_intranode_rank(comm_ptr, root) <= 0) /* 0 if root was local root too */
+                MPIR_Get_intranode_rank(comm_ptr, root) <= 0) /* 0 if root was local root too */
             {                                                 /* -1 if different node than root */
                 /* FIXME binomial may not be the best algorithm for on-node
                    bcast.  We need a more comprehensive system for selecting the
@@ -1229,9 +1229,9 @@ int MPIR_Bcast_intra (
     int nbytes=0;
     int is_homogeneous;
     MPI_Aint type_size;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_BCAST);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_BCAST);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_BCAST);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_BCAST);
 
     if (count == 0) goto fn_exit;
 
@@ -1313,7 +1313,7 @@ int MPIR_Bcast_intra (
     }
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_BCAST);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_BCAST);
 
     /* --BEGIN ERROR HANDLING-- */
     if (mpi_errno_ret)
@@ -1349,9 +1349,9 @@ int MPIR_Bcast_inter (
     int mpi_errno_ret = MPI_SUCCESS;
     MPI_Status status;
     MPIR_Comm *newcomm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_BCAST_INTER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_BCAST_INTER);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_BCAST_INTER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_BCAST_INTER);
 
 
     if (root == MPI_PROC_NULL)
@@ -1391,7 +1391,7 @@ int MPIR_Bcast_inter (
         
         /* Get the local intracommunicator */
         if (!comm_ptr->local_comm)
-            MPIR_Setup_intercomm_localcomm( comm_ptr );
+            MPII_Setup_intercomm_localcomm( comm_ptr );
 
         newcomm_ptr = comm_ptr->local_comm;
 
@@ -1407,7 +1407,7 @@ int MPIR_Bcast_inter (
     }
 
 fn_fail:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_BCAST_INTER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_BCAST_INTER);
     /* --BEGIN ERROR HANDLING-- */
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
@@ -1518,12 +1518,12 @@ int MPI_Bcast( void *buffer, int count, MPI_Datatype datatype, int root,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_BCAST);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_BCAST);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_BCAST);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_BCAST);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -1580,7 +1580,7 @@ int MPI_Bcast( void *buffer, int count, MPI_Datatype datatype, int root,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_BCAST);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_BCAST);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/exscan.c b/src/mpi/coll/exscan.c
index c6e5eda..5191157 100644
--- a/src/mpi/coll/exscan.c
+++ b/src/mpi/coll/exscan.c
@@ -101,7 +101,7 @@ int MPIR_Exscan (
     MPI_Aint true_extent, true_lb, extent;
     void *partial_scan, *tmp_buf;
     MPIR_Op *op_ptr;
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
     
     if (count == 0) return MPI_SUCCESS;
 
@@ -115,7 +115,7 @@ int MPIR_Exscan (
 
         MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                      MPIR_Per_thread, per_thread, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
         per_thread->op_errno = 0;
     }
 
@@ -135,12 +135,12 @@ int MPIR_Exscan (
 
     MPID_Datatype_get_extent_macro( datatype, extent );
 
-    MPIU_CHKLMEM_MALLOC(partial_scan, void *, (count*(MPL_MAX(true_extent,extent))), mpi_errno, "partial_scan");
+    MPIR_CHKLMEM_MALLOC(partial_scan, void *, (count*(MPL_MAX(true_extent,extent))), mpi_errno, "partial_scan");
     /* adjust for potential negative lower bound in datatype */
     partial_scan = (void *)((char*)partial_scan - true_lb);
 
     /* need to allocate temporary buffer to store incoming data*/
-    MPIU_CHKLMEM_MALLOC(tmp_buf, void *, (count*(MPL_MAX(true_extent,extent))), mpi_errno, "tmp_buf");
+    MPIR_CHKLMEM_MALLOC(tmp_buf, void *, (count*(MPL_MAX(true_extent,extent))), mpi_errno, "tmp_buf");
     /* adjust for potential negative lower bound in datatype */
     tmp_buf = (void *)((char*)tmp_buf - true_lb);
 
@@ -220,14 +220,14 @@ int MPIR_Exscan (
 
         MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                      MPIR_Per_thread, per_thread, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
 
         if (per_thread->op_errno)
             mpi_errno = per_thread->op_errno;
     }
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -314,12 +314,12 @@ int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datat
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_EXSCAN);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_EXSCAN);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_EXSCAN);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_EXSCAN);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -396,7 +396,7 @@ int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datat
     /* ... end of body of routine ... */
     
   fn_exit:    
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_EXSCAN);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_EXSCAN);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/gather.c b/src/mpi/coll/gather.c
index f2e6351..1a1afac 100644
--- a/src/mpi/coll/gather.c
+++ b/src/mpi/coll/gather.c
@@ -106,7 +106,7 @@ int MPIR_Gather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     MPI_Aint struct_displs[2];
     MPI_Datatype types[2], tmp_type;
     int copy_offset = 0, copy_blks = 0;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
 #ifdef MPID_HAS_HETERO
     int position, recv_size;
@@ -132,7 +132,7 @@ int MPIR_Gather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     if (rank == root) 
     {
         MPID_Datatype_get_extent_macro(recvtype, extent);
-        MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf+
+        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf+
 					 (extent*recvcount*comm_size));
     }
 
@@ -172,7 +172,7 @@ int MPIR_Gather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
 	    tmp_buf_size = 0;
 
 	if (tmp_buf_size) {
-            MPIU_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
+            MPIR_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
 	}
 
         if (rank == root)
@@ -230,7 +230,7 @@ int MPIR_Gather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
 			}
 			else if (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE) {
 			    /* small transfer size case. cast ok */
-			    MPIU_Assert(recvblks*nbytes == (int)(recvblks*nbytes));
+			    MPIR_Assert(recvblks*nbytes == (int)(recvblks*nbytes));
 			    mpi_errno = MPIC_Recv(tmp_buf, (int)(recvblks * nbytes),
 					    MPI_BYTE, src, MPIR_GATHER_TAG,
 					    comm_ptr, &status, errflag);
@@ -324,7 +324,7 @@ int MPIR_Gather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
 		}
 		else {
 		    blocks[0] = sendcount;
-		    struct_displs[0] = MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf;
+		    struct_displs[0] = MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf;
 		    types[0] = sendtype;
 		    /* check for overflow.  work around int limits if needed*/
 		    if (curr_cnt - nbytes != (int)(curr_cnt - nbytes)) {
@@ -332,10 +332,10 @@ int MPIR_Gather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
 			    MPIR_Type_contiguous_x_impl(curr_cnt - nbytes,
 					    MPI_BYTE, &(types[1]));
 		    } else {
-			    MPIU_Assign_trunc(blocks[1],  curr_cnt - nbytes, int);
+			    MPIR_Assign_trunc(blocks[1],  curr_cnt - nbytes, int);
 			    types[1] = MPI_BYTE;
 		    }
-		    struct_displs[1] = MPIU_VOID_PTR_CAST_TO_MPI_AINT tmp_buf;
+		    struct_displs[1] = MPIR_VOID_PTR_CAST_TO_MPI_AINT tmp_buf;
 		    mpi_errno = MPIR_Type_create_struct_impl(2, blocks, struct_displs, types, &tmp_type);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                     
@@ -385,7 +385,7 @@ int MPIR_Gather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
         else
             MPIR_Pack_size_impl(sendcount*(comm_size/2), sendtype, &tmp_buf_size);
 
-        MPIU_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
+        MPIR_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
 
         position = 0;
         if (sendbuf != MPI_IN_PLACE)
@@ -479,7 +479,7 @@ int MPIR_Gather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
 #endif /* MPID_HAS_HETERO */
 
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -519,7 +519,7 @@ int MPIR_Gather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     MPI_Aint extent, true_extent, true_lb = 0;
     void *tmp_buf=NULL;
     MPIR_Comm *newcomm_ptr = NULL;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     if (root == MPI_PROC_NULL)
     {
@@ -570,16 +570,16 @@ int MPIR_Gather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                 MPIR_Type_get_true_extent_impl(sendtype, &true_lb, &true_extent);
                 MPID_Datatype_get_extent_macro(sendtype, extent);
  
-		MPIU_Ensure_Aint_fits_in_pointer(sendcount*local_size*
+		MPIR_Ensure_Aint_fits_in_pointer(sendcount*local_size*
 						 (MPL_MAX(extent, true_extent)));
-                MPIU_CHKLMEM_MALLOC(tmp_buf, void *, sendcount*local_size*(MPL_MAX(extent,true_extent)), mpi_errno, "tmp_buf");
+                MPIR_CHKLMEM_MALLOC(tmp_buf, void *, sendcount*local_size*(MPL_MAX(extent,true_extent)), mpi_errno, "tmp_buf");
                 /* adjust for potential negative lower bound in datatype */
                 tmp_buf = (void *)((char*)tmp_buf - true_lb);
             }
             
             /* all processes in remote group form new intracommunicator */
             if (!comm_ptr->local_comm) {
-                mpi_errno = MPIR_Setup_intercomm_localcomm( comm_ptr );
+                mpi_errno = MPII_Setup_intercomm_localcomm( comm_ptr );
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             }
 
@@ -616,7 +616,7 @@ int MPIR_Gather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
         if (root == MPI_ROOT)
 	{
             MPID_Datatype_get_extent_macro(recvtype, extent);
-            MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+            MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
 					     (recvcount*remote_size*extent));
 
             for (i=0; i<remote_size; i++)
@@ -646,7 +646,7 @@ int MPIR_Gather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     }
 
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -767,12 +767,12 @@ int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GATHER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GATHER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_GATHER);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_GATHER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -884,7 +884,7 @@ int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_GATHER);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_GATHER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/gatherv.c b/src/mpi/coll/gatherv.c
index c0f496e..1d5d184 100644
--- a/src/mpi/coll/gatherv.c
+++ b/src/mpi/coll/gatherv.c
@@ -93,7 +93,7 @@ int MPIR_Gatherv (
     int min_procs;
     MPIR_Request **reqarray;
     MPI_Status *starray;
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
 
     rank = comm_ptr->rank;
     
@@ -107,11 +107,11 @@ int MPIR_Gatherv (
 
         MPID_Datatype_get_extent_macro(recvtype, extent);
 	/* each node can make sure it is not going to overflow aint */
-        MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
 					 displs[rank] * extent);
 
-        MPIU_CHKLMEM_MALLOC(reqarray, MPIR_Request **, comm_size * sizeof(MPIR_Request *), mpi_errno, "reqarray");
-        MPIU_CHKLMEM_MALLOC(starray, MPI_Status *, comm_size * sizeof(MPI_Status), mpi_errno, "starray");
+        MPIR_CHKLMEM_MALLOC(reqarray, MPIR_Request **, comm_size * sizeof(MPIR_Request *), mpi_errno, "reqarray");
+        MPIR_CHKLMEM_MALLOC(starray, MPI_Status *, comm_size * sizeof(MPI_Status), mpi_errno, "starray");
 
         reqs = 0;
         for (i = 0; i < comm_size; i++) {
@@ -192,7 +192,7 @@ int MPIR_Gatherv (
     
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -283,12 +283,12 @@ int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GATHERV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GATHERV);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_GATHERV);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_GATHERV);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -418,7 +418,7 @@ int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_GATHERV);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_GATHERV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/helper_fns.c b/src/mpi/coll/helper_fns.c
index d2f015f..2c38cdc 100644
--- a/src/mpi/coll/helper_fns.c
+++ b/src/mpi/coll/helper_fns.c
@@ -54,10 +54,10 @@ int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtyp
     int sendtype_iscontig, recvtype_iscontig;
     MPI_Aint sendsize, recvsize, sdata_sz, rdata_sz, copy_sz;
     MPI_Aint true_extent, sendtype_true_lb, recvtype_true_lb;
-    MPIU_CHKLMEM_DECL(1);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_LOCALCOPY);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_LOCALCOPY);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_LOCALCOPY);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_LOCALCOPY);
 
     MPID_Datatype_get_size_macro(sendtype, sendsize);
     MPID_Datatype_get_size_macro(recvtype, recvsize);
@@ -81,7 +81,7 @@ int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtyp
     /* Builtin types is the common case; optimize for it */
     if ((HANDLE_GET_KIND(sendtype) == HANDLE_KIND_BUILTIN) &&
         HANDLE_GET_KIND(recvtype) == HANDLE_KIND_BUILTIN) {
-        MPIU_Memcpy(recvbuf, sendbuf, copy_sz);
+        MPIR_Memcpy(recvbuf, sendbuf, copy_sz);
         goto fn_exit;
     }
 
@@ -99,7 +99,7 @@ int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtyp
                                   ((char *)sendbuf + sendtype_true_lb),
                                   copy_sz);
 #endif
-        MPIU_Memcpy(((char *) recvbuf + recvtype_true_lb),
+        MPIR_Memcpy(((char *) recvbuf + recvtype_true_lb),
                ((char *) sendbuf + sendtype_true_lb),
                copy_sz);
     }
@@ -132,7 +132,7 @@ int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtyp
 	MPID_Segment rseg;
 	intptr_t rfirst;
 
-        MPIU_CHKLMEM_MALLOC(buf, char *, COPY_BUFFER_SZ, mpi_errno, "buf");
+        MPIR_CHKLMEM_MALLOC(buf, char *, COPY_BUFFER_SZ, mpi_errno, "buf");
 
 	MPID_Segment_init(sendbuf, sendcount, sendtype, &sseg, 0);
 	MPID_Segment_init(recvbuf, recvcount, recvtype, &rseg, 0);
@@ -156,13 +156,13 @@ int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtyp
 	    }
 	    
 	    MPID_Segment_pack(&sseg, sfirst, &last, buf + buf_off);
-	    MPIU_Assert(last > sfirst);
+	    MPIR_Assert(last > sfirst);
 	    
 	    buf_end = buf + buf_off + (last - sfirst);
 	    sfirst = last;
 	    
 	    MPID_Segment_unpack(&rseg, rfirst, &last, buf);
-	    MPIU_Assert(last > rfirst);
+	    MPIR_Assert(last > rfirst);
 
 	    rfirst = last;
 
@@ -186,8 +186,8 @@ int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtyp
     
     
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_LOCALCOPY);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_LOCALCOPY);
     return mpi_errno;
 
   fn_fail:
@@ -206,9 +206,9 @@ int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtyp
 int MPIC_Wait(MPIR_Request * request_ptr, MPIR_Errflag_t *errflag)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIC_WAIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIC_WAIT);
 
-    MPIDI_PT2PT_FUNC_ENTER(MPID_STATE_MPIC_WAIT);
+    MPIR_FUNC_VERBOSE_PT2PT_ENTER(MPID_STATE_MPIC_WAIT);
 
     MPL_DBG_MSG_S(MPIR_DBG_PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
 
@@ -235,7 +235,7 @@ int MPIC_Wait(MPIR_Request * request_ptr, MPIR_Errflag_t *errflag)
 
  fn_exit:
     MPL_DBG_MSG_D(MPIR_DBG_PT2PT, TYPICAL, "OUT: errflag = %d", *errflag);
-    MPIDI_PT2PT_FUNC_EXIT(MPID_STATE_MPIC_WAIT);
+    MPIR_FUNC_VERBOSE_PT2PT_EXIT(MPID_STATE_MPIC_WAIT);
     return mpi_errno;
  fn_fail:
     /* --BEGIN ERROR HANDLING-- */
@@ -274,9 +274,9 @@ int MPIC_Send(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
     int mpi_errno = MPI_SUCCESS;
     int context_id;
     MPIR_Request *request_ptr = NULL;
-    MPIDI_STATE_DECL(MPID_STATE_MPIC_SEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIC_SEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIC_SEND);
 
     MPL_DBG_MSG_D(MPIR_DBG_PT2PT, TYPICAL, "IN: errflag = %d", *errflag);
 
@@ -306,7 +306,7 @@ int MPIC_Send(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
 
  fn_exit:
     MPL_DBG_MSG_D(MPIR_DBG_PT2PT, TYPICAL, "OUT: errflag = %d", *errflag);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIC_SEND);
     return mpi_errno;
  fn_fail:
     /* --BEGIN ERROR HANDLING-- */
@@ -333,9 +333,9 @@ int MPIC_Recv(void *buf, MPI_Aint count, MPI_Datatype datatype, int source, int
     int context_id;
     MPI_Status mystatus;
     MPIR_Request *request_ptr = NULL;
-    MPIDI_STATE_DECL(MPID_STATE_MPIC_RECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIC_RECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_RECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIC_RECV);
 
     MPL_DBG_MSG_D(MPIR_DBG_PT2PT, TYPICAL, "IN: errflag = %d", *errflag);
 
@@ -366,12 +366,12 @@ int MPIC_Recv(void *buf, MPI_Aint count, MPI_Datatype datatype, int source, int
     }
 
     if (MPI_SUCCESS == MPIR_ERR_GET_CLASS(status->MPI_ERROR)) {
-        MPIU_Assert(status->MPI_TAG == tag);
+        MPIR_Assert(status->MPI_TAG == tag);
     }
 
  fn_exit:
     MPL_DBG_MSG_D(MPIR_DBG_PT2PT, TYPICAL, "OUT: errflag = %d", *errflag);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_RECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIC_RECV);
     return mpi_errno;
  fn_fail:
     /* --BEGIN ERROR HANDLING-- */
@@ -390,9 +390,9 @@ int MPIC_Ssend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
     int mpi_errno = MPI_SUCCESS;
     int context_id;
     MPIR_Request *request_ptr = NULL;
-    MPIDI_STATE_DECL(MPID_STATE_MPIC_SSEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIC_SSEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SSEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIC_SSEND);
 
     MPL_DBG_MSG_D(MPIR_DBG_PT2PT, TYPICAL, "IN: errflag = %d", *errflag);
 
@@ -422,7 +422,7 @@ int MPIC_Ssend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
 
  fn_exit:
     MPL_DBG_MSG_D(MPIR_DBG_PT2PT, TYPICAL, "OUT: errflag = %d", *errflag);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SSEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIC_SSEND);
     return mpi_errno;
  fn_fail:
     /* --BEGIN ERROR HANDLING-- */
@@ -451,9 +451,9 @@ int MPIC_Sendrecv(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype
     int context_id;
     MPI_Status mystatus;
     MPIR_Request *recv_req_ptr = NULL, *send_req_ptr = NULL;
-    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIC_SENDRECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SENDRECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIC_SENDRECV);
 
     MPL_DBG_MSG_S(MPIR_DBG_PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
 
@@ -503,7 +503,7 @@ int MPIC_Sendrecv(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype
  fn_exit:
     MPL_DBG_MSG_D(MPIR_DBG_PT2PT, TYPICAL, "OUT: errflag = %d", *errflag);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SENDRECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIC_SENDRECV);
     return mpi_errno;
  fn_fail:
     if (send_req_ptr)
@@ -527,16 +527,16 @@ int MPIC_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype,
 {
     int mpi_errno = MPI_SUCCESS;
     MPI_Status mystatus;
-    MPIU_Context_id_t context_id_offset;
+    MPIR_Context_id_t context_id_offset;
     MPIR_Request *sreq = NULL;
     MPIR_Request *rreq = NULL;
     void *tmpbuf = NULL;
     MPI_Aint tmpbuf_size = 0;
     MPI_Aint tmpbuf_count = 0;
-    MPIU_CHKLMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_REPLACE);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIC_SENDRECV_REPLACE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SENDRECV_REPLACE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIC_SENDRECV_REPLACE);
 
     MPL_DBG_MSG_D(MPIR_DBG_PT2PT, TYPICAL, "IN: errflag = %d", *errflag);
 
@@ -558,7 +558,7 @@ int MPIC_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype,
 
     if (count > 0 && dest != MPI_PROC_NULL) {
         MPIR_Pack_size_impl(count, datatype, &tmpbuf_size);
-        MPIU_CHKLMEM_MALLOC(tmpbuf, void *, tmpbuf_size, mpi_errno, "temporary send buffer");
+        MPIR_CHKLMEM_MALLOC(tmpbuf, void *, tmpbuf_size, mpi_errno, "temporary send buffer");
 
         mpi_errno = MPIR_Pack_impl(buf, count, datatype, tmpbuf, tmpbuf_size, &tmpbuf_count);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -598,9 +598,9 @@ int MPIC_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype,
     MPIR_Request_free(rreq);
 
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     MPL_DBG_MSG_D(MPIR_DBG_PT2PT, TYPICAL, "OUT: errflag = %d", *errflag);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SENDRECV_REPLACE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIC_SENDRECV_REPLACE);
     return mpi_errno;
  fn_fail:
      if (sreq)
@@ -619,9 +619,9 @@ int MPIC_Isend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
 {
     int mpi_errno = MPI_SUCCESS;
     int context_id;
-    MPIDI_STATE_DECL(MPID_STATE_MPIC_ISEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIC_ISEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_ISEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIC_ISEND);
 
     MPL_DBG_MSG_D(MPIR_DBG_PT2PT, TYPICAL, "IN: errflag = %d", *errflag);
 
@@ -645,7 +645,7 @@ int MPIC_Isend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_ISEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIC_ISEND);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -660,9 +660,9 @@ int MPIC_Issend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest
 {
     int mpi_errno = MPI_SUCCESS;
     int context_id;
-    MPIDI_STATE_DECL(MPID_STATE_MPIC_ISSEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIC_ISSEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_ISSEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIC_ISSEND);
 
     MPL_DBG_MSG_D(MPIR_DBG_PT2PT, TYPICAL, "IN: errflag = %d", *errflag);
 
@@ -686,7 +686,7 @@ int MPIC_Issend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_ISSEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIC_ISSEND);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -701,9 +701,9 @@ int MPIC_Irecv(void *buf, MPI_Aint count, MPI_Datatype datatype, int source,
 {
     int mpi_errno = MPI_SUCCESS;
     int context_id;
-    MPIDI_STATE_DECL(MPID_STATE_MPIC_IRECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIC_IRECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_IRECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIC_IRECV);
 
     MPIR_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                          "**countneg", "**countneg %d", count);
@@ -716,7 +716,7 @@ int MPIC_Irecv(void *buf, MPI_Aint count, MPI_Datatype datatype, int source,
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_IRECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIC_IRECV);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -735,10 +735,10 @@ int MPIC_Waitall(int numreq, MPIR_Request *requests[], MPI_Status statuses[], MP
     MPI_Request *request_ptrs = request_ptr_array;
     MPI_Status status_static_array[MPIC_REQUEST_PTR_ARRAY_SIZE];
     MPI_Status *status_array = statuses;
-    MPIDI_STATE_DECL(MPID_STATE_MPIC_WAITALL);
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIC_WAITALL);
+    MPIR_CHKLMEM_DECL(2);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_WAITALL);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIC_WAITALL);
 
     MPL_DBG_MSG_S(MPIR_DBG_PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
 
@@ -747,8 +747,8 @@ int MPIC_Waitall(int numreq, MPIR_Request *requests[], MPI_Status statuses[], MP
     }
 
     if (numreq > MPIC_REQUEST_PTR_ARRAY_SIZE) {
-        MPIU_CHKLMEM_MALLOC(request_ptrs, MPI_Request *, numreq * sizeof(MPI_Request), mpi_errno, "request pointers");
-        MPIU_CHKLMEM_MALLOC(status_array, MPI_Status *, numreq * sizeof(MPI_Status), mpi_errno, "status objects");
+        MPIR_CHKLMEM_MALLOC(request_ptrs, MPI_Request *, numreq * sizeof(MPI_Request), mpi_errno, "request pointers");
+        MPIR_CHKLMEM_MALLOC(status_array, MPI_Status *, numreq * sizeof(MPI_Status), mpi_errno, "status objects");
     }
 
     for (i = 0; i < numreq; ++i) {
@@ -775,10 +775,10 @@ int MPIC_Waitall(int numreq, MPIR_Request *requests[], MPI_Status statuses[], MP
 
  fn_exit:
     if (numreq > MPIC_REQUEST_PTR_ARRAY_SIZE)
-        MPIU_CHKLMEM_FREEALL();
+        MPIR_CHKLMEM_FREEALL();
 
     MPL_DBG_MSG_D(MPIR_DBG_PT2PT, TYPICAL, "OUT: errflag = %d", *errflag);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_WAITALL);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIC_WAITALL);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpi/coll/iallgather.c b/src/mpi/coll/iallgather.c
index b352a6f..ef0f2ae 100644
--- a/src/mpi/coll/iallgather.c
+++ b/src/mpi/coll/iallgather.c
@@ -57,7 +57,7 @@ static int dtp_release_ref(MPIR_Comm *comm, int tag, void *state)
 #define FUNCNAME MPIR_Iallgather_rec_dbl
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Iallgather_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Iallgather_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     struct shared_state *ss = NULL;
@@ -81,16 +81,16 @@ int MPIR_Iallgather_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sen
     MPID_Datatype_get_extent_macro( recvtype, recvtype_extent );
 
     /* This is the largest offset we add to recvbuf */
-    MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+    MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
                                      (comm_size * recvcount * recvtype_extent));
 
     /*  copy local data into recvbuf */
     if (sendbuf != MPI_IN_PLACE) {
-        mpi_errno = MPID_Sched_copy(sendbuf, sendcount, sendtype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, sendcount, sendtype,
                                     ((char *)recvbuf + rank*recvcount*recvtype_extent),
                                     recvcount, recvtype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     MPIR_SCHED_CHKPMEM_MALLOC(ss, struct shared_state *, sizeof(struct shared_state), mpi_errno, "ss");
@@ -121,19 +121,19 @@ int MPIR_Iallgather_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sen
         recv_offset = dst_tree_root * recvcount * recvtype_extent;
 
         if (dst < comm_size) {
-            mpi_errno = MPID_Sched_send_defer(((char *)recvbuf + send_offset),
+            mpi_errno = MPIR_Sched_send_defer(((char *)recvbuf + send_offset),
                                               &ss->curr_count, recvtype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             /* send-recv, no sched barrier here */
-            mpi_errno = MPID_Sched_recv_status(((char *)recvbuf + recv_offset),
+            mpi_errno = MPIR_Sched_recv_status(((char *)recvbuf + recv_offset),
                                                ((comm_size-dst_tree_root)*recvcount),
                                                recvtype, dst, comm_ptr, &ss->status, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
-            mpi_errno = MPID_Sched_cb(&get_count, ss, s);
+            mpi_errno = MPIR_Sched_cb(&get_count, ss, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
 
         /* if some processes in this process's subtree in this step
@@ -186,11 +186,11 @@ int MPIR_Iallgather_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sen
                     /* last_recv_count was set in the previous
                        receive. that's the amount of data to be
                        sent now. */
-                    mpi_errno = MPID_Sched_send_defer(((char *)recvbuf + offset),
+                    mpi_errno = MPIR_Sched_send_defer(((char *)recvbuf + offset),
                                                       &ss->last_recv_count,
                                                       recvtype, dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
                 /* recv only if this proc. doesn't have data and sender
                    has data */
@@ -200,13 +200,13 @@ int MPIR_Iallgather_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sen
                 {
                     /* nprocs_completed is also equal to the
                        no. of processes whose data we don't have */
-                    mpi_errno = MPID_Sched_recv_status(((char *)recvbuf + offset),
+                    mpi_errno = MPIR_Sched_recv_status(((char *)recvbuf + offset),
                                                        ((comm_size - (my_tree_root + mask))*recvcount),
                                                        recvtype, dst, comm_ptr, &ss->status, s);
-                    MPID_SCHED_BARRIER(s);
-                    mpi_errno = MPID_Sched_cb(&get_count, ss, s);
+                    MPIR_SCHED_BARRIER(s);
+                    mpi_errno = MPIR_Sched_cb(&get_count, ss, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
 
                 tmp_mask >>= 1;
@@ -220,7 +220,7 @@ int MPIR_Iallgather_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sen
     }
 
     if (recv_dtp) {
-        mpi_errno = MPID_Sched_cb(dtp_release_ref, recv_dtp, s);
+        mpi_errno = MPIR_Sched_cb(dtp_release_ref, recv_dtp, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
@@ -236,7 +236,7 @@ fn_fail:
 #define FUNCNAME MPIR_Iallgather_bruck
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Iallgather_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Iallgather_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int pof2, curr_cnt, rem, src, dst;
@@ -252,7 +252,7 @@ int MPIR_Iallgather_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendt
     MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &recvtype_true_extent);
 
     /* This is the largest offset we add to recvbuf */
-    MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+    MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
                                      (comm_size * recvcount * recvtype_extent));
 
     /* allocate a temporary buffer of the same size as recvbuf. */
@@ -267,17 +267,17 @@ int MPIR_Iallgather_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendt
 
     /* copy local data to the top of tmp_buf */
     if (sendbuf != MPI_IN_PLACE) {
-        mpi_errno = MPID_Sched_copy(sendbuf, sendcount, sendtype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, sendcount, sendtype,
                                     tmp_buf, recvcount, recvtype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
     else {
-        mpi_errno = MPID_Sched_copy(((char *)recvbuf + rank * recvcount * recvtype_extent),
+        mpi_errno = MPIR_Sched_copy(((char *)recvbuf + rank * recvcount * recvtype_extent),
                                     recvcount, recvtype, tmp_buf,
                                     recvcount, recvtype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     /* do the first \floor(\lg p) steps */
@@ -288,13 +288,13 @@ int MPIR_Iallgather_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendt
         src = (rank + pof2) % comm_size;
         dst = (rank - pof2 + comm_size) % comm_size;
 
-        mpi_errno = MPID_Sched_send(tmp_buf, curr_cnt, recvtype, dst, comm_ptr, s);
+        mpi_errno = MPIR_Sched_send(tmp_buf, curr_cnt, recvtype, dst, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         /* logically sendrecv, so no barrier here */
-        mpi_errno = MPID_Sched_recv(((char *)tmp_buf + curr_cnt*recvtype_extent),
+        mpi_errno = MPIR_Sched_recv(((char *)tmp_buf + curr_cnt*recvtype_extent),
                                     curr_cnt, recvtype, src, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         curr_cnt *= 2;
         pof2 *= 2;
@@ -307,26 +307,26 @@ int MPIR_Iallgather_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendt
         src = (rank + pof2) % comm_size;
         dst = (rank - pof2 + comm_size) % comm_size;
 
-        mpi_errno = MPID_Sched_send(tmp_buf, rem * recvcount, recvtype, dst, comm_ptr, s);
+        mpi_errno = MPIR_Sched_send(tmp_buf, rem * recvcount, recvtype, dst, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         /* logically sendrecv, so no barrier here */
-        mpi_errno = MPID_Sched_recv(((char *)tmp_buf + curr_cnt*recvtype_extent),
+        mpi_errno = MPIR_Sched_recv(((char *)tmp_buf + curr_cnt*recvtype_extent),
                                     (rem * recvcount), recvtype, src, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     /* Rotate blocks in tmp_buf down by (rank) blocks and store
      * result in recvbuf. */
 
-    mpi_errno = MPID_Sched_copy(tmp_buf, ((comm_size-rank)*recvcount), recvtype,
+    mpi_errno = MPIR_Sched_copy(tmp_buf, ((comm_size-rank)*recvcount), recvtype,
                                 ((char *) recvbuf + rank*recvcount*recvtype_extent),
                                 ((comm_size-rank)*recvcount), recvtype, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
     if (rank) {
-        mpi_errno = MPID_Sched_copy(((char *) tmp_buf + (comm_size-rank)*recvcount*recvtype_extent),
+        mpi_errno = MPIR_Sched_copy(((char *) tmp_buf + (comm_size-rank)*recvcount*recvtype_extent),
                                    rank*recvcount, recvtype,
                                    recvbuf, rank*recvcount, recvtype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -344,7 +344,7 @@ fn_fail:
 #define FUNCNAME MPIR_Iallgather_ring
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Iallgather_ring(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Iallgather_ring(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int rank, comm_size;
@@ -357,16 +357,16 @@ int MPIR_Iallgather_ring(const void *sendbuf, int sendcount, MPI_Datatype sendty
     MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);
 
     /* This is the largest offset we add to recvbuf */
-    MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+    MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
                                      (comm_size * recvcount * recvtype_extent));
 
     /* First, load the "local" version in the recvbuf. */
     if (sendbuf != MPI_IN_PLACE) {
-        mpi_errno = MPID_Sched_copy(sendbuf, sendcount, sendtype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, sendcount, sendtype,
                                     ((char *)recvbuf + rank*recvcount*recvtype_extent),
                                     recvcount, recvtype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     /* Now, send left to right.  This fills in the receive area in
@@ -377,14 +377,14 @@ int MPIR_Iallgather_ring(const void *sendbuf, int sendcount, MPI_Datatype sendty
     j     = rank;
     jnext = left;
     for (i=1; i<comm_size; i++) {
-        mpi_errno = MPID_Sched_send(((char *)recvbuf + j*recvcount*recvtype_extent),
+        mpi_errno = MPIR_Sched_send(((char *)recvbuf + j*recvcount*recvtype_extent),
                                      recvcount, recvtype, right, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         /* concurrent, no barrier here */
-        mpi_errno = MPID_Sched_recv(((char *)recvbuf + jnext*recvcount*recvtype_extent),
+        mpi_errno = MPIR_Sched_recv(((char *)recvbuf + jnext*recvcount*recvtype_extent),
                                      recvcount, recvtype, left, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         j = jnext;
         jnext = (comm_size + jnext - 1) % comm_size;
@@ -445,7 +445,7 @@ fn_fail:
 #define FUNCNAME MPIR_Iallgather_intra
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Iallgather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Iallgather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int comm_size, recvtype_size;
@@ -488,7 +488,7 @@ fn_fail:
 #define FUNCNAME MPIR_Iallgather_inter
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     /* Intercommunicator Allgather.
        Each group does a gather to local root with the local
@@ -513,7 +513,7 @@ int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendt
         MPID_Datatype_get_extent_macro( sendtype, send_extent );
         extent = MPL_MAX(send_extent, true_extent);
 
-        MPIU_Ensure_Aint_fits_in_pointer(extent * sendcount * local_size);
+        MPIR_Ensure_Aint_fits_in_pointer(extent * sendcount * local_size);
         MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void*, extent*sendcount*local_size, mpi_errno, "tmp_buf");
 
         /* adjust for potential negative lower bound in datatype */
@@ -522,22 +522,22 @@ int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendt
 
     /* Get the local intracommunicator */
     if (!comm_ptr->local_comm)
-        MPIR_Setup_intercomm_localcomm( comm_ptr );
+        MPII_Setup_intercomm_localcomm( comm_ptr );
 
     newcomm_ptr = comm_ptr->local_comm;
 
     if (sendcount != 0) {
-        MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Igather_sched);
+        MPIR_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Igather_sched);
         mpi_errno = newcomm_ptr->coll_fns->Igather_sched(sendbuf, sendcount, sendtype,
                                                    tmp_buf, sendcount, sendtype,
                                                    0, newcomm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     /* first broadcast from left to right group, then from right to
        left group */
-    MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ibcast_sched);
+    MPIR_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ibcast_sched);
     if (comm_ptr->is_low_group) {
         /* bcast to right*/
         if (sendcount != 0) {
@@ -556,7 +556,7 @@ int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendt
                                                    recvtype, root, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
     else {
         /* receive bcast from left */
@@ -576,7 +576,7 @@ int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendt
                                                    sendtype, root, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     MPIR_SCHED_CHKPMEM_COMMIT(s);
@@ -596,21 +596,21 @@ int MPIR_Iallgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendty
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Iallgather_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Iallgather_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Iallgather_sched(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -655,10 +655,10 @@ int MPI_Iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IALLGATHER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IALLGATHER);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IALLGATHER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IALLGATHER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -728,7 +728,7 @@ int MPI_Iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IALLGATHER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IALLGATHER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/iallgatherv.c b/src/mpi/coll/iallgatherv.c
index 1631d33..064fff6 100644
--- a/src/mpi/coll/iallgatherv.c
+++ b/src/mpi/coll/iallgatherv.c
@@ -33,7 +33,7 @@ int MPI_Iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, v
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Iallgatherv_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                              void *recvbuf, const int recvcounts[], const int displs[],
-                             MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+                             MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int comm_size, rank, i, j, k;
@@ -52,7 +52,7 @@ int MPIR_Iallgatherv_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype se
     if (comm_ptr->is_hetero)
         is_homogeneous = 0;
 #endif
-    MPIU_Assert(is_homogeneous); /* we only handle the homogeneous for now */
+    MPIR_Assert(is_homogeneous); /* we only handle the homogeneous for now */
 
     /* need to receive contiguously into tmp_buf because
        displs could make the recvbuf noncontiguous */
@@ -66,7 +66,7 @@ int MPIR_Iallgatherv_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype se
     if (total_count == 0)
         goto fn_exit;
 
-    MPIU_Ensure_Aint_fits_in_pointer(total_count*(MPL_MAX(recvtype_true_extent, recvtype_extent)));
+    MPIR_Ensure_Aint_fits_in_pointer(total_count*(MPL_MAX(recvtype_true_extent, recvtype_extent)));
     MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, total_count*(MPL_MAX(recvtype_true_extent,recvtype_extent)), mpi_errno, "tmp_buf");
 
     /* adjust for potential negative lower bound in datatype */
@@ -77,7 +77,7 @@ int MPIR_Iallgatherv_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype se
     for (i=0; i<rank; i++) position += recvcounts[i];
     if (sendbuf != MPI_IN_PLACE)
     {
-        mpi_errno = MPID_Sched_copy(sendbuf, sendcount, sendtype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, sendcount, sendtype,
                                    ((char *)tmp_buf + position*recvtype_extent),
                                    recvcounts[rank], recvtype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -85,7 +85,7 @@ int MPIR_Iallgatherv_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype se
     else
     {
         /* if in_place specified, local data is found in recvbuf */
-        mpi_errno = MPID_Sched_copy(((char *)recvbuf + displs[rank]*recvtype_extent),
+        mpi_errno = MPIR_Sched_copy(((char *)recvbuf + displs[rank]*recvtype_extent),
                                    recvcounts[rank], recvtype,
                                    ((char *)tmp_buf + position*recvtype_extent),
                                    recvcounts[rank], recvtype, s);
@@ -135,14 +135,14 @@ int MPIR_Iallgatherv_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype se
             for (j = dst_tree_root; j < (dst_tree_root + mask) && j < comm_size; ++j)
                 incoming_count += recvcounts[j];
 
-            mpi_errno = MPID_Sched_send(((char *)tmp_buf + send_offset * recvtype_extent),
+            mpi_errno = MPIR_Sched_send(((char *)tmp_buf + send_offset * recvtype_extent),
                                         curr_count, recvtype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             /* sendrecv, no barrier here */
-            mpi_errno = MPID_Sched_recv(((char *)tmp_buf + recv_offset * recvtype_extent),
+            mpi_errno = MPIR_Sched_recv(((char *)tmp_buf + recv_offset * recvtype_extent),
                                         incoming_count, recvtype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             curr_count += incoming_count;
         }
@@ -204,10 +204,10 @@ int MPIR_Iallgatherv_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype se
                     /* incoming_count was set in the previous
                        receive. that's the amount of data to be
                        sent now. */
-                    mpi_errno = MPID_Sched_send(((char *)tmp_buf + offset),
+                    mpi_errno = MPIR_Sched_send(((char *)tmp_buf + offset),
                                                 incoming_count, recvtype, dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
                 /* recv only if this proc. doesn't have data and sender
                    has data */
@@ -226,11 +226,11 @@ int MPIR_Iallgatherv_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype se
                     for (j = dst_tree_root; j < (dst_tree_root + mask) && j < comm_size; ++j)
                         incoming_count += recvcounts[j];
 
-                    mpi_errno = MPID_Sched_recv(((char *)tmp_buf + offset * recvtype_extent),
+                    mpi_errno = MPIR_Sched_recv(((char *)tmp_buf + offset * recvtype_extent),
                                                 incoming_count, recvtype,
                                                 dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                     curr_count += incoming_count;
                 }
                 tmp_mask >>= 1;
@@ -244,7 +244,7 @@ int MPIR_Iallgatherv_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype se
     }
 
     /* sanity check that we got all of the data blocks */
-    MPIU_Assert(curr_count == total_count);
+    MPIR_Assert(curr_count == total_count);
 
     /* copy data from tmp_buf to recvbuf */
     position = 0;
@@ -252,7 +252,7 @@ int MPIR_Iallgatherv_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype se
         if ((sendbuf != MPI_IN_PLACE) || (j != rank)) {
             /* not necessary to copy if in_place and
                j==rank. otherwise copy. */
-            mpi_errno = MPID_Sched_copy(((char *)tmp_buf + position*recvtype_extent),
+            mpi_errno = MPIR_Sched_copy(((char *)tmp_buf + position*recvtype_extent),
                                        recvcounts[j], recvtype,
                                        ((char *)recvbuf + displs[j]*recvtype_extent),
                                        recvcounts[j], recvtype, s);
@@ -275,7 +275,7 @@ fn_fail:
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Iallgatherv_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                            void *recvbuf, const int recvcounts[], const int displs[],
-                           MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+                           MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int comm_size, rank, j, i;
@@ -302,7 +302,7 @@ int MPIR_Iallgatherv_bruck(const void *sendbuf, int sendcount, MPI_Datatype send
     /* get true extent of recvtype */
     MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &recvtype_true_extent);
 
-    MPIU_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(recvtype_true_extent, recvtype_extent));
+    MPIR_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(recvtype_true_extent, recvtype_extent));
     recvbuf_extent = total_count * (MPL_MAX(recvtype_true_extent, recvtype_extent));
 
     MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, recvbuf_extent, mpi_errno, "tmp_buf");
@@ -312,17 +312,17 @@ int MPIR_Iallgatherv_bruck(const void *sendbuf, int sendcount, MPI_Datatype send
 
     /* copy local data to the top of tmp_buf */
     if (sendbuf != MPI_IN_PLACE) {
-        mpi_errno = MPID_Sched_copy(sendbuf, sendcount, sendtype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, sendcount, sendtype,
                                     tmp_buf, recvcounts[rank], recvtype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
     else {
-        mpi_errno = MPID_Sched_copy(((char *)recvbuf + displs[rank]*recvtype_extent),
+        mpi_errno = MPIR_Sched_copy(((char *)recvbuf + displs[rank]*recvtype_extent),
                                     recvcounts[rank], recvtype,
                                     tmp_buf, recvcounts[rank], recvtype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     /* \floor(\lg p) send-recv rounds */
@@ -345,13 +345,13 @@ int MPIR_Iallgatherv_bruck(const void *sendbuf, int sendcount, MPI_Datatype send
             incoming_count += recvcounts[(src + i) % comm_size];
         }
 
-        mpi_errno = MPID_Sched_send(tmp_buf, curr_count, recvtype, dst, comm_ptr, s);
+        mpi_errno = MPIR_Sched_send(tmp_buf, curr_count, recvtype, dst, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         /* sendrecv, no barrier */
-        mpi_errno = MPID_Sched_recv(((char *)tmp_buf + curr_count*recvtype_extent),
+        mpi_errno = MPIR_Sched_recv(((char *)tmp_buf + curr_count*recvtype_extent),
                                     incoming_count, recvtype, src, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         /* we will send everything we had plus what we just got to next round's dst */
         curr_count += incoming_count;
@@ -368,13 +368,13 @@ int MPIR_Iallgatherv_bruck(const void *sendbuf, int sendcount, MPI_Datatype send
         for (i=0; i<rem; i++)
             send_cnt += recvcounts[(rank+i)%comm_size];
 
-        mpi_errno = MPID_Sched_send(tmp_buf, send_cnt, recvtype, dst, comm_ptr, s);
+        mpi_errno = MPIR_Sched_send(tmp_buf, send_cnt, recvtype, dst, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         /* sendrecv, no barrier */
-        mpi_errno = MPID_Sched_recv(((char *)tmp_buf + curr_count*recvtype_extent),
+        mpi_errno = MPIR_Sched_recv(((char *)tmp_buf + curr_count*recvtype_extent),
                                      (total_count - curr_count), recvtype, src, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     /* Rotate blocks in tmp_buf down by (rank) blocks and store
@@ -383,7 +383,7 @@ int MPIR_Iallgatherv_bruck(const void *sendbuf, int sendcount, MPI_Datatype send
     send_cnt = 0;
     for (i=0; i < (comm_size-rank); i++) {
         j = (rank+i)%comm_size;
-        mpi_errno = MPID_Sched_copy(((char *)tmp_buf + send_cnt*recvtype_extent),
+        mpi_errno = MPIR_Sched_copy(((char *)tmp_buf + send_cnt*recvtype_extent),
                                     recvcounts[j], recvtype,
                                     ((char *)recvbuf + displs[j]*recvtype_extent),
                                     recvcounts[j], recvtype, s);
@@ -392,7 +392,7 @@ int MPIR_Iallgatherv_bruck(const void *sendbuf, int sendcount, MPI_Datatype send
     }
 
     for (i=0; i<rank; i++) {
-        mpi_errno = MPID_Sched_copy(((char *)tmp_buf + send_cnt*recvtype_extent),
+        mpi_errno = MPIR_Sched_copy(((char *)tmp_buf + send_cnt*recvtype_extent),
                                     recvcounts[i], recvtype,
                                     ((char *)recvbuf + displs[i]*recvtype_extent),
                                     recvcounts[i], recvtype, s);
@@ -414,7 +414,7 @@ fn_fail:
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Iallgatherv_ring(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                           void *recvbuf, const int recvcounts[], const int displs[],
-                          MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+                          MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int i, total_count;
@@ -441,11 +441,11 @@ int MPIR_Iallgatherv_ring(const void *sendbuf, int sendcount, MPI_Datatype sendt
 
     if (sendbuf != MPI_IN_PLACE) {
         /* First, load the "local" version in the recvbuf. */
-        mpi_errno = MPID_Sched_copy(sendbuf, sendcount, sendtype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, sendcount, sendtype,
                                     ((char *)recvbuf + displs[rank]*recvtype_extent),
                                     recvcounts[rank], recvtype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     left  = (comm_size + rank - 1) % comm_size;
@@ -483,16 +483,16 @@ int MPIR_Iallgatherv_ring(const void *sendbuf, int sendcount, MPI_Datatype sendt
 
         /* Communicate */
         if (recvnow) { /* If there's no data to send, just do a recv call */
-            mpi_errno = MPID_Sched_recv(rbuf, recvnow, recvtype, left, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(rbuf, recvnow, recvtype, left, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             torecv -= recvnow;
         }
         if (sendnow) { /* If there's no data to receive, just do a send call */
-            mpi_errno = MPID_Sched_send(sbuf, sendnow, recvtype, right, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(sbuf, sendnow, recvtype, right, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             tosend -= sendnow;
         }
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         soffset += sendnow;
         roffset += recvnow;
@@ -556,7 +556,7 @@ fn_fail:
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Iallgatherv_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                            void *recvbuf, const int recvcounts[], const int displs[],
-                           MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+                           MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int i, comm_size, total_count, recvtype_size;
@@ -604,7 +604,7 @@ fn_fail:
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Iallgatherv_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                            void *recvbuf, const int recvcounts[], const int displs[],
-                           MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+                           MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
 /* Intercommunicator Allgatherv.
    This is done differently from the intercommunicator allgather
@@ -622,7 +622,7 @@ int MPIR_Iallgatherv_inter(const void *sendbuf, int sendcount, MPI_Datatype send
     remote_size = comm_ptr->remote_size;
     rank = comm_ptr->rank;
 
-    MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Igatherv_sched);
+    MPIR_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Igatherv_sched);
 
     /* first do an intercommunicator gatherv from left to right group,
        then from right to left group */
@@ -655,19 +655,19 @@ int MPIR_Iallgatherv_inter(const void *sendbuf, int sendcount, MPI_Datatype send
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
     /* now do an intracommunicator broadcast within each group. we use
        a derived datatype to handle the displacements */
 
     /* Get the local intracommunicator */
     if (!comm_ptr->local_comm) {
-        mpi_errno = MPIR_Setup_intercomm_localcomm( comm_ptr );
+        mpi_errno = MPII_Setup_intercomm_localcomm( comm_ptr );
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
     newcomm_ptr = comm_ptr->local_comm;
-    MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Ibcast_sched);
+    MPIR_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Ibcast_sched);
 
     mpi_errno = MPIR_Type_indexed_impl(remote_size, recvcounts, displs, recvtype, &newtype);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -697,21 +697,21 @@ int MPIR_Iallgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendt
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Iallgatherv_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Iallgatherv_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Iallgatherv_sched(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -757,10 +757,10 @@ int MPI_Iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, v
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IALLGATHERV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IALLGATHERV);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IALLGATHERV);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IALLGATHERV);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -838,7 +838,7 @@ int MPI_Iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, v
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IALLGATHERV);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IALLGATHERV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/iallreduce.c b/src/mpi/coll/iallreduce.c
index 3539d04..24eabc1 100644
--- a/src/mpi/coll/iallreduce.c
+++ b/src/mpi/coll/iallreduce.c
@@ -34,7 +34,7 @@ int MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype d
 #define FUNCNAME MPIR_Iallreduce_naive
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Iallreduce_naive(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Iallreduce_naive(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int rank;
@@ -50,7 +50,7 @@ int MPIR_Iallreduce_naive(const void *sendbuf, void *recvbuf, int count, MPI_Dat
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
     mpi_errno = MPIR_Ibcast_intra(recvbuf, count, datatype, 0, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -66,7 +66,7 @@ fn_fail:
 #define FUNCNAME MPIR_Iallreduce_redscat_allgather
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int comm_size, rank, newrank, pof2, rem;
@@ -75,11 +75,11 @@ int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int co
     void *tmp_buf = NULL;
     int *cnts, *disps;
     MPIR_SCHED_CHKPMEM_DECL(1);
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
 
     /* we only support builtin datatypes for now, breaking up user types to do
      * the reduce-scatter is tricky */
-    MPIU_Assert(HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN);
+    MPIR_Assert(HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN);
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -88,7 +88,7 @@ int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int co
     MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
     MPID_Datatype_get_extent_macro(datatype, extent);
 
-    MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+    MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
     MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "temporary buffer");
 
     /* adjust for potential negative lower bound in datatype */
@@ -96,10 +96,10 @@ int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int co
 
     /* copy local data into recvbuf */
     if (sendbuf != MPI_IN_PLACE) {
-        mpi_errno = MPID_Sched_copy(sendbuf, count, datatype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, count, datatype,
                                     recvbuf, count, datatype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     /* find nearest power-of-two less than or equal to comm_size */
@@ -117,9 +117,9 @@ int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int co
 
     if (rank < 2*rem) {
         if (rank % 2 == 0) { /* even */
-            mpi_errno = MPID_Sched_send(recvbuf, count, datatype, rank+1, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(recvbuf, count, datatype, rank+1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* temporarily set the rank to -1 so that this
                process does not pariticipate in recursive
@@ -127,16 +127,16 @@ int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int co
             newrank = -1;
         }
         else { /* odd */
-            mpi_errno = MPID_Sched_recv(tmp_buf, count, datatype, rank-1, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(tmp_buf, count, datatype, rank-1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* do the reduction on received data. since the
                ordering is right, it doesn't matter whether
                the operation is commutative or not. */
-            mpi_errno = MPID_Sched_reduce(tmp_buf, recvbuf, count, datatype, op, s);
+            mpi_errno = MPIR_Sched_reduce(tmp_buf, recvbuf, count, datatype, op, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* change the rank */
             newrank = rank / 2;
@@ -153,10 +153,10 @@ int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int co
          * calculated directly during the loop, rather than requiring a less-scalable
          * "2*pof2"-sized memory allocation */
 
-        MPIU_CHKLMEM_MALLOC(cnts, int *, pof2*sizeof(int), mpi_errno, "counts");
-        MPIU_CHKLMEM_MALLOC(disps, int *, pof2*sizeof(int), mpi_errno, "displacements");
+        MPIR_CHKLMEM_MALLOC(cnts, int *, pof2*sizeof(int), mpi_errno, "counts");
+        MPIR_CHKLMEM_MALLOC(disps, int *, pof2*sizeof(int), mpi_errno, "displacements");
 
-        MPIU_Assert(count >= pof2); /* the cnts calculations assume this */
+        MPIR_Assert(count >= pof2); /* the cnts calculations assume this */
         for (i=0; i<(pof2-1); i++)
             cnts[i] = count/pof2;
         cnts[pof2-1] = count - (count/pof2)*(pof2-1);
@@ -190,25 +190,25 @@ int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int co
             }
 
             /* Send data from recvbuf. Recv into tmp_buf */
-            mpi_errno = MPID_Sched_recv(((char *)tmp_buf + disps[recv_idx]*extent),
+            mpi_errno = MPIR_Sched_recv(((char *)tmp_buf + disps[recv_idx]*extent),
                                         recv_cnt, datatype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             /* sendrecv, no barrier here */
-            mpi_errno = MPID_Sched_send(((char *)recvbuf + disps[send_idx]*extent),
+            mpi_errno = MPIR_Sched_send(((char *)recvbuf + disps[send_idx]*extent),
                                         send_cnt, datatype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* tmp_buf contains data received in this step.
                recvbuf contains data accumulated so far */
 
             /* This algorithm is used only for predefined ops
                and predefined ops are always commutative. */
-            mpi_errno = MPID_Sched_reduce(((char *)tmp_buf + disps[recv_idx]*extent),
+            mpi_errno = MPIR_Sched_reduce(((char *)tmp_buf + disps[recv_idx]*extent),
                                           ((char *)recvbuf + disps[recv_idx]*extent),
                                           recv_cnt, datatype, op, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* update send_idx for next iteration */
             send_idx = recv_idx;
@@ -249,14 +249,14 @@ int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int co
                     recv_cnt += cnts[i];
             }
 
-            mpi_errno = MPID_Sched_recv(((char *)recvbuf + disps[recv_idx]*extent),
+            mpi_errno = MPIR_Sched_recv(((char *)recvbuf + disps[recv_idx]*extent),
                                         recv_cnt, datatype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             /* sendrecv, no barrier here */
-            mpi_errno = MPID_Sched_send(((char *)recvbuf + disps[send_idx]*extent),
+            mpi_errno = MPIR_Sched_send(((char *)recvbuf + disps[send_idx]*extent),
                                         send_cnt, datatype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             if (newrank > newdst) send_idx = recv_idx;
 
@@ -269,18 +269,18 @@ int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int co
        (rank-1), the ranks who didn't participate above. */
     if (rank < 2*rem) {
         if (rank % 2) { /* odd */
-            mpi_errno = MPID_Sched_send(recvbuf, count, datatype, rank-1, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(recvbuf, count, datatype, rank-1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
         else { /* even */
-            mpi_errno = MPID_Sched_recv(recvbuf, count, datatype, rank+1, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(recvbuf, count, datatype, rank+1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
     }
 
     MPIR_SCHED_CHKPMEM_COMMIT(s);
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
 fn_fail:
     MPIR_SCHED_CHKPMEM_REAP(s);
@@ -291,7 +291,7 @@ fn_fail:
 #define FUNCNAME MPIR_Iallreduce_rec_dbl
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Iallreduce_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Iallreduce_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int pof2, rem, comm_size, is_commutative, rank;
@@ -309,7 +309,7 @@ int MPIR_Iallreduce_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_D
     MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
     MPID_Datatype_get_extent_macro(datatype, extent);
 
-    MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+    MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
     MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "temporary buffer");
 
     /* adjust for potential negative lower bound in datatype */
@@ -317,10 +317,10 @@ int MPIR_Iallreduce_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_D
 
     /* copy local data into recvbuf */
     if (sendbuf != MPI_IN_PLACE) {
-        mpi_errno = MPID_Sched_copy(sendbuf, count, datatype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, count, datatype,
                                     recvbuf, count, datatype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     /* find nearest power-of-two less than or equal to comm_size */
@@ -338,9 +338,9 @@ int MPIR_Iallreduce_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_D
 
     if (rank < 2*rem) {
         if (rank % 2 == 0) { /* even */
-            mpi_errno = MPID_Sched_send(recvbuf, count, datatype, rank+1, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(recvbuf, count, datatype, rank+1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* temporarily set the rank to -1 so that this
                process does not pariticipate in recursive
@@ -348,16 +348,16 @@ int MPIR_Iallreduce_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_D
             newrank = -1;
         }
         else { /* odd */
-            mpi_errno = MPID_Sched_recv(tmp_buf, count, datatype, rank-1, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(tmp_buf, count, datatype, rank-1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* do the reduction on received data. since the
                ordering is right, it doesn't matter whether
                the operation is commutative or not. */
-            mpi_errno = MPID_Sched_reduce(tmp_buf, recvbuf, count, datatype, op, s);
+            mpi_errno = MPIR_Sched_reduce(tmp_buf, recvbuf, count, datatype, op, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* change the rank */
             newrank = rank / 2;
@@ -375,34 +375,34 @@ int MPIR_Iallreduce_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_D
 
             /* Send the most current data, which is in recvbuf. Recv
                into tmp_buf */
-            mpi_errno = MPID_Sched_recv(tmp_buf, count, datatype, dst, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(tmp_buf, count, datatype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             /* sendrecv, no barrier here */
-            mpi_errno = MPID_Sched_send(recvbuf, count, datatype,
+            mpi_errno = MPIR_Sched_send(recvbuf, count, datatype,
                                         dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* tmp_buf contains data received in this step.
                recvbuf contains data accumulated so far */
 
             if (is_commutative  || (dst < rank)) {
                 /* op is commutative OR the order is already right */
-                mpi_errno = MPID_Sched_reduce(tmp_buf, recvbuf, count, datatype, op, s);
+                mpi_errno = MPIR_Sched_reduce(tmp_buf, recvbuf, count, datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
             else {
                 /* op is noncommutative and the order is not right */
-                mpi_errno = MPID_Sched_reduce(recvbuf, tmp_buf, count, datatype, op, s);
+                mpi_errno = MPIR_Sched_reduce(recvbuf, tmp_buf, count, datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
 
                 /* copy result back into recvbuf */
-                mpi_errno = MPID_Sched_copy(tmp_buf, count, datatype,
+                mpi_errno = MPIR_Sched_copy(tmp_buf, count, datatype,
                                             recvbuf, count, datatype, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
             mask <<= 1;
         }
@@ -413,11 +413,11 @@ int MPIR_Iallreduce_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_D
        (rank-1), the ranks who didn't participate above. */
     if (rank < 2*rem) {
         if (rank % 2) { /* odd */
-            mpi_errno = MPID_Sched_send(recvbuf, count, datatype, rank-1, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(recvbuf, count, datatype, rank-1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
         else { /* even */
-            mpi_errno = MPID_Sched_recv(recvbuf, count, datatype, rank+1, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(recvbuf, count, datatype, rank+1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
     }
@@ -434,12 +434,12 @@ fn_fail:
 #define FUNCNAME MPIR_Iallreduce_intra
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Iallreduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Iallreduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int comm_size, is_homogeneous, pof2, type_size;
 
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
 
     is_homogeneous = TRUE;
 #ifdef MPID_HAS_HETERO
@@ -495,7 +495,7 @@ fn_fail:
 #define FUNCNAME MPIR_Iallreduce_inter
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Iallreduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Iallreduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
 /* Intercommunicator Allreduce.
    We first do an intercommunicator reduce to rank 0 on left group,
@@ -509,7 +509,7 @@ int MPIR_Iallreduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Dat
     int rank, root;
     MPIR_Comm *lcomm_ptr = NULL;
 
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM);
 
     rank = comm_ptr->rank;
 
@@ -543,17 +543,17 @@ int MPIR_Iallreduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Dat
     }
 
     /* don't bcast until the reductions have finished */
-    mpi_errno = MPID_Sched_barrier(s);
+    mpi_errno = MPIR_Sched_barrier(s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
     /* Get the local intracommunicator */
     if (!comm_ptr->local_comm) {
-        MPIR_Setup_intercomm_localcomm( comm_ptr );
+        MPII_Setup_intercomm_localcomm( comm_ptr );
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
     lcomm_ptr = comm_ptr->local_comm;
 
-    MPIU_Assert(lcomm_ptr->coll_fns && lcomm_ptr->coll_fns->Ibcast_sched);
+    MPIR_Assert(lcomm_ptr->coll_fns && lcomm_ptr->coll_fns->Ibcast_sched);
     mpi_errno = lcomm_ptr->coll_fns->Ibcast_sched(recvbuf, count, datatype, 0, lcomm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
@@ -568,7 +568,7 @@ fn_fail:
 #define FUNCNAME MPIR_Iallreduce_SMP
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Iallreduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Iallreduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int is_commutative;
@@ -577,7 +577,7 @@ int MPIR_Iallreduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datat
 
     if (!MPIR_CVAR_ENABLE_SMP_COLLECTIVES || !MPIR_CVAR_ENABLE_SMP_ALLREDUCE)
         MPID_Abort(comm_ptr, MPI_ERR_OTHER, 1, "SMP collectives are disabled!");
-    MPIU_Assert(MPIR_Comm_is_node_aware(comm_ptr));
+    MPIR_Assert(MPIR_Comm_is_node_aware(comm_ptr));
 
     nc = comm_ptr->node_comm;
     nrc = comm_ptr->node_roots_comm;
@@ -597,7 +597,7 @@ int MPIR_Iallreduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datat
         /* take care of the MPI_IN_PLACE case. For reduce,
            MPI_IN_PLACE is specified only on the root;
            for allreduce it is specified on all processes. */
-        MPIU_Assert(nc->coll_fns && nc->coll_fns->Ireduce_sched);
+        MPIR_Assert(nc->coll_fns && nc->coll_fns->Ireduce_sched);
 
         if ((sendbuf == MPI_IN_PLACE) && (comm_ptr->node_comm->rank != 0)) {
             /* IN_PLACE and not root of reduce. Data supplied to this
@@ -608,31 +608,31 @@ int MPIR_Iallreduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datat
             mpi_errno = nc->coll_fns->Ireduce_sched(sendbuf, recvbuf, count, datatype, op, 0, nc, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     } else {
         /* only one process on the node. copy sendbuf to recvbuf */
         if (sendbuf != MPI_IN_PLACE) {
-            mpi_errno = MPID_Sched_copy(sendbuf, count, datatype,
+            mpi_errno = MPIR_Sched_copy(sendbuf, count, datatype,
                                         recvbuf, count, datatype, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     /* now do an IN_PLACE allreduce among the local roots of all nodes */
     if (nrc != NULL) {
-        MPIU_Assert(nrc->coll_fns && nrc->coll_fns->Iallreduce_sched);
+        MPIR_Assert(nrc->coll_fns && nrc->coll_fns->Iallreduce_sched);
         mpi_errno = nrc->coll_fns->Iallreduce_sched(MPI_IN_PLACE, recvbuf, count, datatype, op, nrc, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     /* now broadcast the result among local processes */
     if (comm_ptr->node_comm != NULL) {
-        MPIU_Assert(nc->coll_fns && nc->coll_fns->Ibcast_sched);
+        MPIR_Assert(nc->coll_fns && nc->coll_fns->Ibcast_sched);
         mpi_errno = nc->coll_fns->Ibcast_sched(recvbuf, count, datatype, 0, nc, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
 fn_exit:
@@ -651,21 +651,21 @@ int MPIR_Iallreduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Data
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Iallreduce_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Iallreduce_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Iallreduce_sched(sendbuf, recvbuf, count, datatype, op, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -709,10 +709,10 @@ int MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count,
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IALLREDUCE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IALLREDUCE);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IALLREDUCE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IALLREDUCE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -783,7 +783,7 @@ int MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count,
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IALLREDUCE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IALLREDUCE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/ialltoall.c b/src/mpi/coll/ialltoall.c
index 21b9868..8555aff 100644
--- a/src/mpi/coll/ialltoall.c
+++ b/src/mpi/coll/ialltoall.c
@@ -44,7 +44,7 @@ int MPI_Ialltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, voi
 #define FUNCNAME MPIR_Ialltoall_inplace
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ialltoall_inplace(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ialltoall_inplace(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     void *tmp_buf = NULL;
@@ -55,7 +55,7 @@ int MPIR_Ialltoall_inplace(const void *sendbuf, int sendcount, MPI_Datatype send
     int peer;
     MPIR_SCHED_CHKPMEM_DECL(1);
 
-    MPIU_Assert(sendbuf == MPI_IN_PLACE);
+    MPIR_Assert(sendbuf == MPI_IN_PLACE);
 
     if (recvcount == 0)
         goto fn_exit;
@@ -81,19 +81,19 @@ int MPIR_Ialltoall_inplace(const void *sendbuf, int sendcount, MPI_Datatype send
                     peer = i;
 
                 /* pack to tmp_buf */
-                mpi_errno = MPID_Sched_copy(((char *)recvbuf + peer*recvcount*recvtype_extent),
+                mpi_errno = MPIR_Sched_copy(((char *)recvbuf + peer*recvcount*recvtype_extent),
                                             recvcount, recvtype,
                                             tmp_buf, nbytes, MPI_BYTE, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
 
                 /* now simultaneously send from tmp_buf and recv to recvbuf */
-                mpi_errno = MPID_Sched_send(tmp_buf, nbytes, MPI_BYTE, peer, comm_ptr, s);
+                mpi_errno = MPIR_Sched_send(tmp_buf, nbytes, MPI_BYTE, peer, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                mpi_errno = MPID_Sched_recv(((char *)recvbuf + peer*recvcount*recvtype_extent),
+                mpi_errno = MPIR_Sched_recv(((char *)recvbuf + peer*recvcount*recvtype_extent),
                                             recvcount, recvtype, peer, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
         }
     }
@@ -110,7 +110,7 @@ fn_fail:
 #define FUNCNAME MPIR_Ialltoall_bruck
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ialltoall_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ialltoall_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
@@ -122,10 +122,10 @@ int MPIR_Ialltoall_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendty
     int count, block;
     MPI_Datatype newtype;
     int *displs;
-    MPIU_CHKLMEM_DECL(1); /* displs */
+    MPIR_CHKLMEM_DECL(1); /* displs */
     MPIR_SCHED_CHKPMEM_DECL(2); /* tmp_buf (2x) */
 
-    MPIU_Assert(sendbuf != MPI_IN_PLACE); /* we do not handle in-place */
+    MPIR_Assert(sendbuf != MPI_IN_PLACE); /* we do not handle in-place */
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -141,15 +141,15 @@ int MPIR_Ialltoall_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendty
 
     /* Do Phase 1 of the algorithim. Shift the data blocks on process i
      * upwards by a distance of i blocks. Store the result in recvbuf. */
-    mpi_errno = MPID_Sched_copy(((char *) sendbuf + rank*sendcount*sendtype_extent),
+    mpi_errno = MPIR_Sched_copy(((char *) sendbuf + rank*sendcount*sendtype_extent),
                                 (comm_size - rank)*sendcount, sendtype,
                                 recvbuf, (comm_size - rank)*recvcount, recvtype, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_copy(sendbuf, rank*sendcount, sendtype,
+    mpi_errno = MPIR_Sched_copy(sendbuf, rank*sendcount, sendtype,
                                 ((char *) recvbuf + (comm_size-rank)*recvcount*recvtype_extent),
                                 rank*recvcount, recvtype, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
     /* Input data is now stored in recvbuf with datatype recvtype */
 
     /* Now do Phase 2, the communication phase. It takes
@@ -160,7 +160,7 @@ int MPIR_Ialltoall_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendty
     /* allocate displacements array for indexed datatype used in
        communication */
 
-    MPIU_CHKLMEM_MALLOC(displs, int *, comm_size * sizeof(int), mpi_errno, "displs");
+    MPIR_CHKLMEM_MALLOC(displs, int *, comm_size * sizeof(int), mpi_errno, "displs");
 
     pof2 = 1;
     while (pof2 < comm_size) {
@@ -187,16 +187,16 @@ int MPIR_Ialltoall_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendty
         MPID_Datatype_get_size_macro(newtype, newtype_size);
 
         /* we will usually copy much less than nbytes */
-        mpi_errno = MPID_Sched_copy(recvbuf, 1, newtype, tmp_buf, newtype_size, MPI_BYTE, s);
+        mpi_errno = MPIR_Sched_copy(recvbuf, 1, newtype, tmp_buf, newtype_size, MPI_BYTE, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         /* now send and recv in parallel */
-        mpi_errno = MPID_Sched_send(tmp_buf, newtype_size, MPI_BYTE, dst, comm_ptr, s);
+        mpi_errno = MPIR_Sched_send(tmp_buf, newtype_size, MPI_BYTE, dst, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        mpi_errno = MPID_Sched_recv(recvbuf, 1, newtype, src, comm_ptr, s);
+        mpi_errno = MPIR_Sched_recv(recvbuf, 1, newtype, src, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         MPIR_Type_free_impl(&newtype);
 
@@ -215,21 +215,21 @@ int MPIR_Ialltoall_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendty
     /* adjust for potential negative lower bound in datatype */
     tmp_buf = (void *)((char*)tmp_buf - recvtype_true_lb);
 
-    mpi_errno = MPID_Sched_copy(((char *) recvbuf + (rank+1)*recvcount*recvtype_extent),
+    mpi_errno = MPIR_Sched_copy(((char *) recvbuf + (rank+1)*recvcount*recvtype_extent),
                                 (comm_size - rank - 1)*recvcount, recvtype,
                                 tmp_buf, (comm_size - rank - 1)*recvcount, recvtype, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_copy(recvbuf, (rank+1)*recvcount, recvtype,
+    mpi_errno = MPIR_Sched_copy(recvbuf, (rank+1)*recvcount, recvtype,
                                 ((char *) tmp_buf + (comm_size-rank-1)*recvcount*recvtype_extent),
                                 (rank+1)*recvcount, recvtype, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
     /* Blocks are in the reverse order now (comm_size-1 to 0).
      * Reorder them to (0 to comm_size-1) and store them in recvbuf. */
 
     for (i = 0; i < comm_size; i++){
-        mpi_errno = MPID_Sched_copy(((char *) tmp_buf + i*recvcount*recvtype_extent),
+        mpi_errno = MPIR_Sched_copy(((char *) tmp_buf + i*recvcount*recvtype_extent),
                                     recvcount, recvtype,
                                     ((char *) recvbuf + (comm_size-i-1)*recvcount*recvtype_extent),
                                     recvcount, recvtype, s);
@@ -238,7 +238,7 @@ int MPIR_Ialltoall_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendty
 
     MPIR_SCHED_CHKPMEM_COMMIT(s);
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
 fn_fail:
     MPIR_SCHED_CHKPMEM_REAP(s);
@@ -254,7 +254,7 @@ fn_fail:
 #define FUNCNAME MPIR_Ialltoall_perm_sr
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ialltoall_perm_sr(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ialltoall_perm_sr(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
@@ -262,7 +262,7 @@ int MPIR_Ialltoall_perm_sr(const void *sendbuf, int sendcount, MPI_Datatype send
     int ii, ss, bblock, dst;
     MPI_Aint sendtype_extent, recvtype_extent;
 
-    MPIU_Assert(sendbuf != MPI_IN_PLACE); /* we do not handle in-place */
+    MPIR_Assert(sendbuf != MPI_IN_PLACE); /* we do not handle in-place */
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -278,20 +278,20 @@ int MPIR_Ialltoall_perm_sr(const void *sendbuf, int sendcount, MPI_Datatype send
         /* do the communication -- post ss sends and receives: */
         for (i = 0; i < ss; i++) {
             dst = (rank+i+ii) % comm_size;
-            mpi_errno = MPID_Sched_recv(((char *)recvbuf + dst*recvcount*recvtype_extent),
+            mpi_errno = MPIR_Sched_recv(((char *)recvbuf + dst*recvcount*recvtype_extent),
                                         recvcount, recvtype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
 
         for (i = 0; i < ss; i++) {
             dst = (rank-i-ii+comm_size) % comm_size;
-            mpi_errno = MPID_Sched_send(((char *)sendbuf + dst*sendcount*sendtype_extent),
+            mpi_errno = MPIR_Sched_send(((char *)sendbuf + dst*sendcount*sendtype_extent),
                                         sendcount, sendtype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
 
         /* force the (2*ss) sends/recvs above to complete before posting additional ops */
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
 fn_exit:
@@ -307,7 +307,7 @@ fn_fail:
 #define FUNCNAME MPIR_Ialltoall_pairwise
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ialltoall_pairwise(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ialltoall_pairwise(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
@@ -315,7 +315,7 @@ int MPIR_Ialltoall_pairwise(const void *sendbuf, int sendcount, MPI_Datatype sen
     int rank, comm_size;
     MPI_Aint sendtype_extent, recvtype_extent;
 
-    MPIU_Assert(sendbuf != MPI_IN_PLACE); /* we do not handle in-place */
+    MPIR_Assert(sendbuf != MPI_IN_PLACE); /* we do not handle in-place */
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -324,7 +324,7 @@ int MPIR_Ialltoall_pairwise(const void *sendbuf, int sendcount, MPI_Datatype sen
     MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);
 
     /* Make local copy first */
-    mpi_errno = MPID_Sched_copy(((char *)sendbuf + rank*sendcount*sendtype_extent),
+    mpi_errno = MPIR_Sched_copy(((char *)sendbuf + rank*sendcount*sendtype_extent),
                                 sendcount, sendtype,
                                 ((char *)recvbuf + rank*recvcount*recvtype_extent),
                                 recvcount, recvtype, s);
@@ -343,13 +343,13 @@ int MPIR_Ialltoall_pairwise(const void *sendbuf, int sendcount, MPI_Datatype sen
             dst = (rank + i) % comm_size;
         }
 
-        mpi_errno = MPID_Sched_send(((char *)sendbuf + dst*sendcount*sendtype_extent),
+        mpi_errno = MPIR_Sched_send(((char *)sendbuf + dst*sendcount*sendtype_extent),
                                     sendcount, sendtype, dst, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        mpi_errno = MPID_Sched_recv(((char *)recvbuf + src*recvcount*recvtype_extent),
+        mpi_errno = MPIR_Sched_recv(((char *)recvbuf + src*recvcount*recvtype_extent),
                                     recvcount, recvtype, src, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
 fn_exit:
@@ -407,7 +407,7 @@ fn_fail:
 #define FUNCNAME MPIR_Ialltoall_intra
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ialltoall_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ialltoall_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int nbytes, comm_size, sendtype_size;
@@ -453,7 +453,7 @@ fn_fail:
 #define FUNCNAME MPIR_Ialltoall_inter
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ialltoall_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ialltoall_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
 /* Intercommunicator alltoall. We use a pairwise exchange algorithm
    similar to the one used in intracommunicator alltoall for long
@@ -480,9 +480,9 @@ int MPIR_Ialltoall_inter(const void *sendbuf, int sendcount, MPI_Datatype sendty
 
     /* Do the pairwise exchanges */
     max_size = MPL_MAX(local_size, remote_size);
-    MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+    MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
                                      max_size*recvcount*recvtype_extent);
-    MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
+    MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
                                      max_size*sendcount*sendtype_extent);
     for (i = 0; i < max_size; i++) {
         src = (rank - i + max_size) % max_size;
@@ -502,11 +502,11 @@ int MPIR_Ialltoall_inter(const void *sendbuf, int sendcount, MPI_Datatype sendty
             sendaddr = (char *)sendbuf + dst*sendcount*sendtype_extent;
         }
 
-        mpi_errno = MPID_Sched_send(sendaddr, sendcount, sendtype, dst, comm_ptr, s);
+        mpi_errno = MPIR_Sched_send(sendaddr, sendcount, sendtype, dst, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        mpi_errno = MPID_Sched_recv(recvaddr, recvcount, recvtype, src, comm_ptr, s);
+        mpi_errno = MPIR_Sched_recv(recvaddr, recvcount, recvtype, src, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
 fn_exit:
@@ -524,21 +524,21 @@ int MPIR_Ialltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ialltoall_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Ialltoall_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Ialltoall_sched(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -582,10 +582,10 @@ int MPI_Ialltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IALLTOALL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IALLTOALL);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IALLTOALL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IALLTOALL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -657,7 +657,7 @@ int MPI_Ialltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IALLTOALL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IALLTOALL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/ialltoallv.c b/src/mpi/coll/ialltoallv.c
index 5ee8033..c0da8cc 100644
--- a/src/mpi/coll/ialltoallv.c
+++ b/src/mpi/coll/ialltoallv.c
@@ -37,7 +37,7 @@ int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispl
 int MPIR_Ialltoallv_intra(const void *sendbuf, const int sendcounts[], const int sdispls[],
                           MPI_Datatype sendtype, void *recvbuf, const int recvcounts[],
                           const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
-                          MPID_Sched_t s)
+                          MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int comm_size;
@@ -47,7 +47,7 @@ int MPIR_Ialltoallv_intra(const void *sendbuf, const int sendcounts[], const int
     int dst, rank;
     MPIR_SCHED_CHKPMEM_DECL(1);
 
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -91,23 +91,23 @@ int MPIR_Ialltoallv_intra(const void *sendbuf, const int sendcounts[], const int
                     else
                         dst = i;
 
-                    mpi_errno = MPID_Sched_send(((char *)recvbuf + rdispls[dst]*recv_extent),
+                    mpi_errno = MPIR_Sched_send(((char *)recvbuf + rdispls[dst]*recv_extent),
                                                 recvcounts[dst], recvtype, dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    mpi_errno = MPID_Sched_recv(tmp_buf, recvcounts[dst], recvtype, dst, comm_ptr, s);
+                    mpi_errno = MPIR_Sched_recv(tmp_buf, recvcounts[dst], recvtype, dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
 
-                    mpi_errno = MPID_Sched_copy(tmp_buf, recvcounts[dst], recvtype,
+                    mpi_errno = MPIR_Sched_copy(tmp_buf, recvcounts[dst], recvtype,
                                                 ((char *)recvbuf + rdispls[dst]*recv_extent),
                                                 recvcounts[dst], recvtype, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
             }
         }
 
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
     else {
         bblock = MPIR_CVAR_ALLTOALL_THROTTLE;
@@ -126,9 +126,9 @@ int MPIR_Ialltoallv_intra(const void *sendbuf, const int sendcounts[], const int
             for (i=0; i < ss; i++) {
                 dst = (rank+i+ii) % comm_size;
                 if (recvcounts[dst] && recvtype_size) {
-                    MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+                    MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
                                                      rdispls[dst]*recv_extent);
-                    mpi_errno = MPID_Sched_recv((char *)recvbuf+rdispls[dst]*recv_extent,
+                    mpi_errno = MPIR_Sched_recv((char *)recvbuf+rdispls[dst]*recv_extent,
                                                 recvcounts[dst], recvtype, dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                 }
@@ -137,16 +137,16 @@ int MPIR_Ialltoallv_intra(const void *sendbuf, const int sendcounts[], const int
             for (i=0; i < ss; i++) {
                 dst = (rank-i-ii+comm_size) % comm_size;
                 if (sendcounts[dst] && sendtype_size) {
-                    MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
+                    MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
                                                      sdispls[dst]*send_extent);
-                    mpi_errno = MPID_Sched_send((char *)sendbuf+sdispls[dst]*send_extent,
+                    mpi_errno = MPIR_Sched_send((char *)sendbuf+sdispls[dst]*send_extent,
                                                 sendcounts[dst], sendtype, dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                 }
             }
 
             /* force our block of sends/recvs to complete before starting the next block */
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
     }
 
@@ -165,7 +165,7 @@ fn_fail:
 int MPIR_Ialltoallv_inter(const void *sendbuf, const int sendcounts[], const int sdispls[],
                           MPI_Datatype sendtype, void *recvbuf, const int recvcounts[],
                           const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
-                          MPID_Sched_t s)
+                          MPIR_Sched_t s)
 {
 /* Intercommunicator alltoallv. We use a pairwise exchange algorithm
    similar to the one used in intracommunicator alltoallv. Since the
@@ -182,7 +182,7 @@ int MPIR_Ialltoallv_inter(const void *sendbuf, const int sendcounts[], const int
     int src, dst, rank, sendcount, recvcount;
     char *sendaddr, *recvaddr;
 
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM);
 
     local_size = comm_ptr->local_size;
     remote_size = comm_ptr->remote_size;
@@ -205,7 +205,7 @@ int MPIR_Ialltoallv_inter(const void *sendbuf, const int sendcounts[], const int
             recvcount = 0;
         }
         else {
-            MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+            MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
                                              rdispls[src]*recv_extent);
             recvaddr = (char *)recvbuf + rdispls[src]*recv_extent;
             recvcount = recvcounts[src];
@@ -216,7 +216,7 @@ int MPIR_Ialltoallv_inter(const void *sendbuf, const int sendcounts[], const int
             sendcount = 0;
         }
         else {
-            MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
+            MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
                                              sdispls[dst]*send_extent);
             sendaddr = (char *)sendbuf + sdispls[dst]*send_extent;
             sendcount = sendcounts[dst];
@@ -227,11 +227,11 @@ int MPIR_Ialltoallv_inter(const void *sendbuf, const int sendcounts[], const int
         if (recvcount * recvtype_size == 0)
             src = MPI_PROC_NULL;
 
-        mpi_errno = MPID_Sched_send(sendaddr, sendcount, sendtype, dst, comm_ptr, s);
+        mpi_errno = MPIR_Sched_send(sendaddr, sendcount, sendtype, dst, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        mpi_errno = MPID_Sched_recv(recvaddr, recvcount, recvtype, src, comm_ptr, s);
+        mpi_errno = MPIR_Sched_recv(recvaddr, recvcount, recvtype, src, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        mpi_errno = MPID_Sched_barrier(s);
+        mpi_errno = MPIR_Sched_barrier(s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
@@ -253,21 +253,21 @@ int MPIR_Ialltoallv_impl(const void *sendbuf, const int sendcounts[], const int
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ialltoallv_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Ialltoallv_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Ialltoallv_sched(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -315,10 +315,10 @@ int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispl
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IALLTOALLV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IALLTOALLV);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IALLTOALLV);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IALLTOALLV);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -392,7 +392,7 @@ int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispl
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IALLTOALLV);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IALLTOALLV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/ialltoallw.c b/src/mpi/coll/ialltoallw.c
index 20a0321..eaa6548 100644
--- a/src/mpi/coll/ialltoallw.c
+++ b/src/mpi/coll/ialltoallw.c
@@ -58,7 +58,7 @@ int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispl
 int MPIR_Ialltoallw_intra(const void *sendbuf, const int sendcounts[], const int sdispls[],
                           const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[],
                           const int rdispls[], const MPI_Datatype recvtypes[],
-                          MPIR_Comm *comm_ptr, MPID_Sched_t s)
+                          MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int comm_size, i, j;
@@ -112,18 +112,18 @@ int MPIR_Ialltoallw_intra(const void *sendbuf, const int sendcounts[], const int
                     MPIR_Type_get_true_extent_impl(recvtypes[i], &true_lb, &true_extent);
                     adj_tmp_buf = (void *)((char*)tmp_buf - true_lb);
 
-                    mpi_errno = MPID_Sched_send(((char *)recvbuf + rdispls[dst]),
+                    mpi_errno = MPIR_Sched_send(((char *)recvbuf + rdispls[dst]),
                                                 recvcounts[dst], recvtypes[dst], dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    mpi_errno = MPID_Sched_recv(adj_tmp_buf, recvcounts[dst], recvtypes[dst], dst, comm_ptr, s);
+                    mpi_errno = MPIR_Sched_recv(adj_tmp_buf, recvcounts[dst], recvtypes[dst], dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
 
-                    mpi_errno = MPID_Sched_copy(adj_tmp_buf, recvcounts[dst], recvtypes[dst],
+                    mpi_errno = MPIR_Sched_copy(adj_tmp_buf, recvcounts[dst], recvtypes[dst],
                                                 ((char *)recvbuf + rdispls[dst]),
                                                 recvcounts[dst], recvtypes[dst], s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
             }
         }
@@ -142,7 +142,7 @@ int MPIR_Ialltoallw_intra(const void *sendbuf, const int sendcounts[], const int
                 if (recvcounts[dst]) {
                     MPID_Datatype_get_size_macro(recvtypes[dst], type_size);
                     if (type_size) {
-                        mpi_errno = MPID_Sched_recv((char *)recvbuf+rdispls[dst],
+                        mpi_errno = MPIR_Sched_recv((char *)recvbuf+rdispls[dst],
                                                     recvcounts[dst], recvtypes[dst],
                                                     dst, comm_ptr, s);
                         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -155,7 +155,7 @@ int MPIR_Ialltoallw_intra(const void *sendbuf, const int sendcounts[], const int
                 if (sendcounts[dst]) {
                     MPID_Datatype_get_size_macro(sendtypes[dst], type_size);
                     if (type_size) {
-                        mpi_errno = MPID_Sched_send((char *)sendbuf+sdispls[dst],
+                        mpi_errno = MPIR_Sched_send((char *)sendbuf+sdispls[dst],
                                                     sendcounts[dst], sendtypes[dst],
                                                     dst, comm_ptr, s);
                         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -164,7 +164,7 @@ int MPIR_Ialltoallw_intra(const void *sendbuf, const int sendcounts[], const int
             }
 
             /* force our block of sends/recvs to complete before starting the next block */
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
     }
 
@@ -183,7 +183,7 @@ fn_fail:
 int MPIR_Ialltoallw_inter(const void *sendbuf, const int sendcounts[], const int sdispls[],
                           const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[],
                           const int rdispls[], const MPI_Datatype recvtypes[],
-                          MPIR_Comm *comm_ptr, MPID_Sched_t s)
+                          MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
 /* Intercommunicator alltoallw. We use a pairwise exchange algorithm
    similar to the one used in intracommunicator alltoallw. Since the local and
@@ -232,12 +232,12 @@ int MPIR_Ialltoallw_inter(const void *sendbuf, const int sendcounts[], const int
             sendtype = sendtypes[dst];
         }
 
-        mpi_errno = MPID_Sched_send(sendaddr, sendcount, sendtype, dst, comm_ptr, s);
+        mpi_errno = MPIR_Sched_send(sendaddr, sendcount, sendtype, dst, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         /* sendrecv, no barrier here */
-        mpi_errno = MPID_Sched_recv(recvaddr, recvcount, recvtype, src, comm_ptr, s);
+        mpi_errno = MPIR_Sched_recv(recvaddr, recvcount, recvtype, src, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
 fn_exit:
@@ -258,21 +258,21 @@ int MPIR_Ialltoallw_impl(const void *sendbuf, const int sendcounts[], const int
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ialltoallw_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Ialltoallw_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Ialltoallw_sched(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -320,10 +320,10 @@ int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispl
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IALLTOALLW);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IALLTOALLW);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IALLTOALLW);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IALLTOALLW);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -380,7 +380,7 @@ int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispl
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IALLTOALLW);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IALLTOALLW);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/ibarrier.c b/src/mpi/coll/ibarrier.c
index 9ee44e9..9af99a4 100644
--- a/src/mpi/coll/ibarrier.c
+++ b/src/mpi/coll/ibarrier.c
@@ -53,12 +53,12 @@ int MPI_Ibarrier(MPI_Comm comm, MPI_Request *request) __attribute__((weak,alias(
 #define FUNCNAME MPIR_Ibarrier_intra
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ibarrier_intra(MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ibarrier_intra(MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int size, rank, src, dst, mask;
 
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
 
     size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -71,13 +71,13 @@ int MPIR_Ibarrier_intra(MPIR_Comm *comm_ptr, MPID_Sched_t s)
         dst = (rank + mask) % size;
         src = (rank - mask + size) % size;
 
-        mpi_errno = MPID_Sched_send(NULL, 0, MPI_BYTE, dst, comm_ptr, s);
+        mpi_errno = MPIR_Sched_send(NULL, 0, MPI_BYTE, dst, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-        mpi_errno = MPID_Sched_recv(NULL, 0, MPI_BYTE, src, comm_ptr, s);
+        mpi_errno = MPIR_Sched_recv(NULL, 0, MPI_BYTE, src, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-        mpi_errno = MPID_Sched_barrier(s);
+        mpi_errno = MPIR_Sched_barrier(s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
         mask <<= 1;
@@ -96,29 +96,29 @@ fn_fail:
 #define FUNCNAME MPIR_Ibarrier_inter
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ibarrier_inter(MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ibarrier_inter(MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int rank, root;
     MPIR_SCHED_CHKPMEM_DECL(1);
     char *buf = NULL;
 
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM);
 
     rank = comm_ptr->rank;
 
     /* Get the local intracommunicator */
     if (!comm_ptr->local_comm) {
-        mpi_errno = MPIR_Setup_intercomm_localcomm(comm_ptr);
+        mpi_errno = MPII_Setup_intercomm_localcomm(comm_ptr);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
     /* do a barrier on the local intracommunicator */
-    MPIU_Assert(comm_ptr->local_comm->coll_fns && comm_ptr->local_comm->coll_fns->Ibarrier_sched);
+    MPIR_Assert(comm_ptr->local_comm->coll_fns && comm_ptr->local_comm->coll_fns->Ibarrier_sched);
     if(comm_ptr->local_size != 1) {
         mpi_errno = comm_ptr->local_comm->coll_fns->Ibarrier_sched(comm_ptr->local_comm, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
     /* rank 0 on each group does an intercommunicator broadcast to the
        remote group to indicate that all processes in the local group
@@ -130,13 +130,13 @@ int MPIR_Ibarrier_inter(MPIR_Comm *comm_ptr, MPID_Sched_t s)
 
     /* first broadcast from left to right group, then from right to
        left group */
-    MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ibcast_sched);
+    MPIR_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ibcast_sched);
     if (comm_ptr->is_low_group) {
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
         mpi_errno = comm_ptr->coll_fns->Ibcast_sched(buf, 1, MPI_BYTE, root, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         /* receive bcast from right */
         root = 0;
@@ -149,7 +149,7 @@ int MPIR_Ibarrier_inter(MPIR_Comm *comm_ptr, MPID_Sched_t s)
         mpi_errno = comm_ptr->coll_fns->Ibcast_sched(buf, 1, MPI_BYTE, root, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         /* bcast to left */
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
@@ -174,21 +174,21 @@ int MPIR_Ibarrier_impl(MPIR_Comm *comm_ptr, MPI_Request *request)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
     if (comm_ptr->local_size != 1 || comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) {
-        mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+        mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        mpi_errno = MPID_Sched_create(&s);
+        mpi_errno = MPIR_Sched_create(&s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-        MPIU_Assert(comm_ptr->coll_fns->Ibarrier_sched != NULL);
+        MPIR_Assert(comm_ptr->coll_fns->Ibarrier_sched != NULL);
         mpi_errno = comm_ptr->coll_fns->Ibarrier_sched(comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-        mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+        mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
         if (reqp)
             *request = reqp->handle;
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -236,10 +236,10 @@ int MPI_Ibarrier(MPI_Comm comm, MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IBARRIER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IBARRIER);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IBARRIER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IBARRIER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -279,7 +279,7 @@ int MPI_Ibarrier(MPI_Comm comm, MPI_Request *request)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IBARRIER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IBARRIER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/ibcast.c b/src/mpi/coll/ibcast.c
index 6373a05..0079708 100644
--- a/src/mpi/coll/ibcast.c
+++ b/src/mpi/coll/ibcast.c
@@ -98,7 +98,7 @@ static int sched_add_length(MPIR_Comm * comm, int tag, void *state)
 #define FUNCNAME MPIR_Ibcast_binomial
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ibcast_binomial(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ibcast_binomial(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int mask;
@@ -152,9 +152,9 @@ int MPIR_Ibcast_binomial(void *buffer, int count, MPI_Datatype datatype, int roo
 
         /* TODO: Pipeline the packing and communication */
         if (rank == root) {
-            mpi_errno = MPID_Sched_copy(buffer, count, datatype, tmp_buf, nbytes, MPI_PACKED, s);
+            mpi_errno = MPIR_Sched_copy(buffer, count, datatype, tmp_buf, nbytes, MPI_PACKED, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
     }
 
@@ -191,18 +191,18 @@ int MPIR_Ibcast_binomial(void *buffer, int count, MPI_Datatype datatype, int roo
             src = rank - mask; 
             if (src < 0) src += comm_size;
             if (!is_contig || !is_homogeneous)
-                mpi_errno = MPID_Sched_recv_status(tmp_buf, nbytes, MPI_BYTE, src,
+                mpi_errno = MPIR_Sched_recv_status(tmp_buf, nbytes, MPI_BYTE, src,
                                                     comm_ptr, &status->status, s);
             else
-                mpi_errno = MPID_Sched_recv_status(buffer, count, datatype, src,
+                mpi_errno = MPIR_Sched_recv_status(buffer, count, datatype, src,
                                                    comm_ptr, &status->status, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
             if(is_homogeneous){
-                mpi_errno = MPID_Sched_cb(&sched_test_length, status, s);
+                mpi_errno = MPIR_Sched_cb(&sched_test_length, status, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
             break;
         }
@@ -226,23 +226,23 @@ int MPIR_Ibcast_binomial(void *buffer, int count, MPI_Datatype datatype, int roo
             dst = rank + mask;
             if (dst >= comm_size) dst -= comm_size;
             if (!is_contig || !is_homogeneous)
-                mpi_errno = MPID_Sched_send(tmp_buf, nbytes, MPI_BYTE, dst, comm_ptr, s);
+                mpi_errno = MPIR_Sched_send(tmp_buf, nbytes, MPI_BYTE, dst, comm_ptr, s);
             else
-                mpi_errno = MPID_Sched_send(buffer, count, datatype, dst, comm_ptr, s);
+                mpi_errno = MPIR_Sched_send(buffer, count, datatype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
             /* NOTE: This is departure from MPIR_Bcast_binomial.  A true analog
-             * would put an MPID_Sched_barrier here after every send. */
+             * would put an MPIR_Sched_barrier here after every send. */
         }
         mask >>= 1;
     }
 
     if (!is_contig || !is_homogeneous) {
         if (rank != root) {
-            MPID_SCHED_BARRIER(s);
-            mpi_errno = MPID_Sched_copy(tmp_buf, nbytes, MPI_PACKED, buffer, count, datatype, s);
+            MPIR_SCHED_BARRIER(s);
+            mpi_errno = MPIR_Sched_copy(tmp_buf, nbytes, MPI_PACKED, buffer, count, datatype, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
     }
 
@@ -264,7 +264,7 @@ fn_fail:
 #define FUNCNAME MPIR_Iscatter_for_bcast
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Iscatter_for_bcast(void *tmp_buf, int root, MPIR_Comm *comm_ptr, int nbytes, MPID_Sched_t s)
+int MPIR_Iscatter_for_bcast(void *tmp_buf, int root, MPIR_Comm *comm_ptr, int nbytes, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int rank, comm_size, src, dst;
@@ -303,10 +303,10 @@ int MPIR_Iscatter_for_bcast(void *tmp_buf, int root, MPIR_Comm *comm_ptr, int nb
             curr_size = recv_size;
 
             if (recv_size > 0) {
-                mpi_errno = MPID_Sched_recv(((char *)tmp_buf + relative_rank*scatter_size),
+                mpi_errno = MPIR_Sched_recv(((char *)tmp_buf + relative_rank*scatter_size),
                                             recv_size, MPI_BYTE, src, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
             break;
         }
@@ -328,7 +328,7 @@ int MPIR_Iscatter_for_bcast(void *tmp_buf, int root, MPIR_Comm *comm_ptr, int nb
                 dst = rank + mask;
                 if (dst >= comm_size)
                     dst -= comm_size;
-                mpi_errno = MPID_Sched_send(((char *)tmp_buf + scatter_size*(relative_rank+mask)),
+                mpi_errno = MPIR_Sched_send(((char *)tmp_buf + scatter_size*(relative_rank+mask)),
                                             send_size, MPI_BYTE, dst, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
@@ -386,7 +386,7 @@ fn_fail:
 #define FUNCNAME MPIR_Ibcast_scatter_rec_dbl_allgather
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int rank, comm_size, dst;
@@ -424,7 +424,7 @@ int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype
     if (comm_ptr->is_hetero)
         is_homogeneous = 0;
 #endif
-    MPIU_Assert(is_homogeneous); /* we don't handle the hetero case right now */
+    MPIR_Assert(is_homogeneous); /* we don't handle the hetero case right now */
 
     MPID_Datatype_get_size_macro(datatype, type_size);
 
@@ -442,10 +442,10 @@ int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype
 
         /* TODO: Pipeline the packing and communication */
         if (rank == root) {
-            mpi_errno = MPID_Sched_copy(buffer, count, datatype,
+            mpi_errno = MPIR_Sched_copy(buffer, count, datatype,
                                         tmp_buf, nbytes, MPI_BYTE, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
     }
 
@@ -495,18 +495,18 @@ int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype
             else
                 incoming_count = 0;
 
-            mpi_errno = MPID_Sched_send(((char *)tmp_buf + send_offset),
+            mpi_errno = MPIR_Sched_send(((char *)tmp_buf + send_offset),
                                         curr_size, MPI_BYTE, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             /* sendrecv, no barrier */
-            mpi_errno = MPID_Sched_recv_status(((char *)tmp_buf + recv_offset),
+            mpi_errno = MPIR_Sched_recv_status(((char *)tmp_buf + recv_offset),
                                         incoming_count,
                                         MPI_BYTE, dst, comm_ptr,&status->status, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
-            mpi_errno = MPID_Sched_cb(&sched_add_length, status, s);
+            MPIR_SCHED_BARRIER(s);
+            mpi_errno = MPIR_Sched_cb(&sched_add_length, status, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             curr_size += incoming_count;
         }
@@ -561,10 +561,10 @@ int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype
                     /* incoming_count was set in the previous
                        receive. that's the amount of data to be
                        sent now. */
-                    mpi_errno = MPID_Sched_send(((char *)tmp_buf + offset),
+                    mpi_errno = MPIR_Sched_send(((char *)tmp_buf + offset),
                                                 incoming_count, MPI_BYTE, dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
                 /* recv only if this proc. doesn't have data and sender
                    has data */
@@ -581,14 +581,14 @@ int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype
 
                     /* nprocs_completed is also equal to the no. of processes
                        whose data we don't have */
-                    mpi_errno = MPID_Sched_recv_status(((char *)tmp_buf + offset),
+                    mpi_errno = MPIR_Sched_recv_status(((char *)tmp_buf + offset),
                                                 incoming_count, MPI_BYTE, dst, comm_ptr,
                                                 &status->status, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
-                    mpi_errno = MPID_Sched_cb(&sched_add_length, status, s);
+                    MPIR_SCHED_BARRIER(s);
+                    mpi_errno = MPIR_Sched_cb(&sched_add_length, status, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
 
                     curr_size += incoming_count;
                 }
@@ -602,12 +602,12 @@ int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype
         i++;
     }
     if(is_homogeneous){
-        mpi_errno = MPID_Sched_cb(&sched_test_curr_length, status, s);
+        mpi_errno = MPIR_Sched_cb(&sched_test_curr_length, status, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
     if (!is_contig) {
         if (rank != root) {
-            mpi_errno = MPID_Sched_copy(tmp_buf, nbytes, MPI_BYTE,
+            mpi_errno = MPIR_Sched_copy(tmp_buf, nbytes, MPI_BYTE,
                                         buffer, count, datatype, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
@@ -641,7 +641,7 @@ fn_fail:
 #define FUNCNAME MPIR_Ibcast_scatter_ring_allgather
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ibcast_scatter_ring_allgather(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ibcast_scatter_ring_allgather(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int comm_size, rank;
@@ -674,7 +674,7 @@ int MPIR_Ibcast_scatter_ring_allgather(void *buffer, int count, MPI_Datatype dat
     if (comm_ptr->is_hetero)
         is_homogeneous = 0;
 #endif
-    MPIU_Assert(is_homogeneous); /* we don't handle the hetero case yet */
+    MPIR_Assert(is_homogeneous); /* we don't handle the hetero case yet */
     MPIR_SCHED_CHKPMEM_MALLOC(status, struct MPIR_Ibcast_status*,
                               sizeof(struct MPIR_Ibcast_status), mpi_errno, "MPI_Status");
     MPID_Datatype_get_size_macro(datatype, type_size);
@@ -692,9 +692,9 @@ int MPIR_Ibcast_scatter_ring_allgather(void *buffer, int count, MPI_Datatype dat
 
         /* TODO: Pipeline the packing and communication */
         if (rank == root) {
-            mpi_errno = MPID_Sched_copy(buffer, count, datatype, tmp_buf, nbytes, MPI_BYTE, s);
+            mpi_errno = MPIR_Sched_copy(buffer, count, datatype, tmp_buf, nbytes, MPI_BYTE, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
     }
 
@@ -725,26 +725,26 @@ int MPIR_Ibcast_scatter_ring_allgather(void *buffer, int count, MPI_Datatype dat
             right_count = 0;
         right_disp = rel_j * scatter_size;
 
-        mpi_errno = MPID_Sched_send(((char *)tmp_buf + right_disp),
+        mpi_errno = MPIR_Sched_send(((char *)tmp_buf + right_disp),
                                     right_count, MPI_BYTE, right, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         /* sendrecv, no barrier here */
-        mpi_errno = MPID_Sched_recv_status(((char *)tmp_buf + left_disp),
+        mpi_errno = MPIR_Sched_recv_status(((char *)tmp_buf + left_disp),
                                     left_count, MPI_BYTE, left, comm_ptr, &status->status, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
-        mpi_errno = MPID_Sched_cb(&sched_add_length, status, s);
+        MPIR_SCHED_BARRIER(s);
+        mpi_errno = MPIR_Sched_cb(&sched_add_length, status, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         j     = jnext;
         jnext = (comm_size + jnext - 1) % comm_size;
     }
-    mpi_errno = MPID_Sched_cb(&sched_test_curr_length, status, s);
+    mpi_errno = MPIR_Sched_cb(&sched_test_curr_length, status, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
     if (!is_contig && rank != root) {
-        mpi_errno = MPID_Sched_copy(tmp_buf, nbytes, MPI_BYTE, buffer, count, datatype, s);
+        mpi_errno = MPIR_Sched_copy(tmp_buf, nbytes, MPI_BYTE, buffer, count, datatype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
@@ -764,7 +764,7 @@ fn_fail:
 #define FUNCNAME MPIR_Ibcast_SMP
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int is_homogeneous;
@@ -774,7 +774,7 @@ int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MP
 
     if (!MPIR_CVAR_ENABLE_SMP_COLLECTIVES || !MPIR_CVAR_ENABLE_SMP_BCAST)
         MPID_Abort(comm_ptr, MPI_ERR_OTHER, 1, "SMP collectives are disabled!");
-    MPIU_Assert(MPIR_Comm_is_node_aware(comm_ptr));
+    MPIR_Assert(MPIR_Comm_is_node_aware(comm_ptr));
     MPIR_SCHED_CHKPMEM_MALLOC(status, struct MPIR_Ibcast_status*,
                               sizeof(struct MPIR_Ibcast_status), mpi_errno, "MPI_Status");
 
@@ -784,14 +784,14 @@ int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MP
         is_homogeneous = 0;
 #endif
 
-    MPIU_Assert(is_homogeneous); /* we don't handle the hetero case yet */
+    MPIR_Assert(is_homogeneous); /* we don't handle the hetero case yet */
     if (comm_ptr->node_comm) {
-        MPIU_Assert(comm_ptr->node_comm->coll_fns);
-        MPIU_Assert(comm_ptr->node_comm->coll_fns->Ibcast_sched);
+        MPIR_Assert(comm_ptr->node_comm->coll_fns);
+        MPIR_Assert(comm_ptr->node_comm->coll_fns->Ibcast_sched);
     }
     if (comm_ptr->node_roots_comm) {
-        MPIU_Assert(comm_ptr->node_roots_comm->coll_fns);
-        MPIU_Assert(comm_ptr->node_roots_comm->coll_fns->Ibcast_sched);
+        MPIR_Assert(comm_ptr->node_roots_comm->coll_fns);
+        MPIR_Assert(comm_ptr->node_roots_comm->coll_fns->Ibcast_sched);
     }
 
     /* MPI_Type_size() might not give the accurate size of the packed
@@ -811,33 +811,33 @@ int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MP
 
     /* send to intranode-rank 0 on the root's node */
     if (comm_ptr->node_comm != NULL &&
-        MPIU_Get_intranode_rank(comm_ptr, root) > 0) /* is not the node root (0) */
+        MPIR_Get_intranode_rank(comm_ptr, root) > 0) /* is not the node root (0) */
     {                                                /* and is on our node (!-1) */
         if (root == comm_ptr->rank) {
-            mpi_errno = MPID_Sched_send(buffer, count, datatype, 0, comm_ptr->node_comm, s);
+            mpi_errno = MPIR_Sched_send(buffer, count, datatype, 0, comm_ptr->node_comm, s);
         }
         else if (0 == comm_ptr->node_comm->rank) {
-            mpi_errno = MPID_Sched_recv_status(buffer, count, datatype, MPIU_Get_intranode_rank(comm_ptr, root),
+            mpi_errno = MPIR_Sched_recv_status(buffer, count, datatype, MPIR_Get_intranode_rank(comm_ptr, root),
                                         comm_ptr->node_comm, &status->status, s);
         }
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
-        MPID_SCHED_BARRIER(s);
-        mpi_errno = MPID_Sched_cb(&sched_test_length, status, s);
+        MPIR_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
+        mpi_errno = MPIR_Sched_cb(&sched_test_length, status, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     /* perform the internode broadcast */
     if (comm_ptr->node_roots_comm != NULL)
     {
         mpi_errno = comm_ptr->node_roots_comm->coll_fns->Ibcast_sched(buffer, count, datatype,
-                                                                MPIU_Get_internode_rank(comm_ptr, root),
+                                                                MPIR_Get_internode_rank(comm_ptr, root),
                                                                 comm_ptr->node_roots_comm, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
         /* don't allow the local ops for the intranode phase to start until this has completed */
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
     /* perform the intranode broadcast on all except for the root's node */
     if (comm_ptr->node_comm != NULL)
@@ -861,20 +861,20 @@ fn_fail:
 #define FUNCNAME MPIR_Ibcast_intra
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ibcast_intra(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ibcast_intra(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int comm_size, is_homogeneous ATTRIBUTE((unused));
     MPI_Aint type_size, nbytes;
 
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
 
     is_homogeneous = 1;
 #ifdef MPID_HAS_HETERO
     if (comm_ptr->is_hetero)
         is_homogeneous = 0;
 #endif
-    MPIU_Assert(is_homogeneous); /* we don't handle the hetero case right now */
+    MPIR_Assert(is_homogeneous); /* we don't handle the hetero case right now */
 
     comm_size = comm_ptr->local_size;
     MPID_Datatype_get_size_macro(datatype, type_size);
@@ -912,11 +912,11 @@ fn_fail:
 #define FUNCNAME MPIR_Ibcast_inter
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM);
 
     /* Intercommunicator broadcast.
      * Root sends to rank 0 in remote group. Remote group does local
@@ -929,26 +929,26 @@ int MPIR_Ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root,
     else if (root == MPI_ROOT)
     {
         /* root sends to rank 0 on remote group and returns */
-        mpi_errno = MPID_Sched_send(buffer, count, datatype, 0, comm_ptr, s);
+        mpi_errno = MPIR_Sched_send(buffer, count, datatype, 0, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
     else
     {
         /* remote group. rank 0 on remote group receives from root */
         if (comm_ptr->rank == 0) {
-            mpi_errno = MPID_Sched_recv(buffer, count, datatype, root, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(buffer, count, datatype, root, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
 
         if (comm_ptr->local_comm == NULL) {
-            mpi_errno = MPIR_Setup_intercomm_localcomm(comm_ptr);
+            mpi_errno = MPII_Setup_intercomm_localcomm(comm_ptr);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
 
         /* now do the usual broadcast on this intracommunicator
            with rank 0 as root. */
-        MPIU_Assert(comm_ptr->local_comm->coll_fns && comm_ptr->local_comm->coll_fns->Ibcast_sched);
+        MPIR_Assert(comm_ptr->local_comm->coll_fns && comm_ptr->local_comm->coll_fns->Ibcast_sched);
         mpi_errno = comm_ptr->local_comm->coll_fns->Ibcast_sched(buffer, count, datatype, root, comm_ptr->local_comm, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
@@ -968,20 +968,20 @@ int MPIR_Ibcast_impl(void *buffer, int count, MPI_Datatype datatype, int root, M
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns->Ibcast_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Ibcast_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Ibcast_sched(buffer, count, datatype, root, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -1024,10 +1024,10 @@ int MPI_Ibcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Com
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IBCAST);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IBCAST);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IBCAST);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IBCAST);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -1079,7 +1079,7 @@ int MPI_Ibcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Com
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IBCAST);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IBCAST);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/iexscan.c b/src/mpi/coll/iexscan.c
index c1a9e20..b33c39b 100644
--- a/src/mpi/coll/iexscan.c
+++ b/src/mpi/coll/iexscan.c
@@ -76,7 +76,7 @@ int MPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype data
 #define FUNCNAME MPIR_Iexscan
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int rank, comm_size;
@@ -106,7 +106,7 @@ int MPIR_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat
     /* adjust for potential negative lower bound in datatype */
     tmp_buf = (void *)((char*)tmp_buf - true_lb);
 
-    mpi_errno = MPID_Sched_copy((sendbuf == MPI_IN_PLACE ? (const void *)recvbuf : sendbuf), count, datatype,
+    mpi_errno = MPIR_Sched_copy((sendbuf == MPI_IN_PLACE ? (const void *)recvbuf : sendbuf), count, datatype,
                                partial_scan, count, datatype, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
@@ -116,17 +116,17 @@ int MPIR_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat
         dst = rank ^ mask;
         if (dst < comm_size) {
             /* Send partial_scan to dst. Recv into tmp_buf */
-            mpi_errno = MPID_Sched_send(partial_scan, count, datatype, dst, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(partial_scan, count, datatype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             /* sendrecv, no barrier here */
-            mpi_errno = MPID_Sched_recv(tmp_buf, count, datatype, dst, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(tmp_buf, count, datatype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             if (rank > dst) {
-                mpi_errno = MPID_Sched_reduce(tmp_buf, partial_scan, count, datatype, op, s);
+                mpi_errno = MPIR_Sched_reduce(tmp_buf, partial_scan, count, datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
 
                 /* On rank 0, recvbuf is not defined.  For sendbuf==MPI_IN_PLACE
                    recvbuf must not change (per MPI-2.2).
@@ -137,35 +137,35 @@ int MPIR_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat
                 if (rank != 0) {
                     if (flag == 0) {
                         /* simply copy data recd from rank 0 into recvbuf */
-                        mpi_errno = MPID_Sched_copy(tmp_buf, count, datatype,
+                        mpi_errno = MPIR_Sched_copy(tmp_buf, count, datatype,
                                                     recvbuf, count, datatype, s);
                         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                        MPID_SCHED_BARRIER(s);
+                        MPIR_SCHED_BARRIER(s);
 
                         flag = 1;
                     }
                     else {
-                        mpi_errno = MPID_Sched_reduce(tmp_buf, recvbuf, count, datatype, op, s);
+                        mpi_errno = MPIR_Sched_reduce(tmp_buf, recvbuf, count, datatype, op, s);
                         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                        MPID_SCHED_BARRIER(s);
+                        MPIR_SCHED_BARRIER(s);
                     }
                 }
             }
             else {
                 if (is_commutative) {
-                    mpi_errno = MPID_Sched_reduce(tmp_buf, partial_scan, count, datatype, op, s);
+                    mpi_errno = MPIR_Sched_reduce(tmp_buf, partial_scan, count, datatype, op, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
                 else {
-                    mpi_errno = MPID_Sched_reduce(partial_scan, tmp_buf, count, datatype, op, s);
+                    mpi_errno = MPIR_Sched_reduce(partial_scan, tmp_buf, count, datatype, op, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
 
-                    mpi_errno = MPID_Sched_copy(tmp_buf, count, datatype,
+                    mpi_errno = MPIR_Sched_copy(tmp_buf, count, datatype,
                                                 partial_scan, count, datatype, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
             }
         }
@@ -189,21 +189,21 @@ int MPIR_Iexscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatyp
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Iexscan_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Iexscan_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Iexscan_sched(sendbuf, recvbuf, count, datatype, op, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -247,10 +247,10 @@ int MPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype data
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IEXSCAN);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IEXSCAN);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IEXSCAN);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IEXSCAN);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -314,7 +314,7 @@ int MPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype data
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IEXSCAN);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IEXSCAN);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/igather.c b/src/mpi/coll/igather.c
index 1110c1a..26f9d15 100644
--- a/src/mpi/coll/igather.c
+++ b/src/mpi/coll/igather.c
@@ -54,7 +54,7 @@ int MPI_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void
 #define FUNCNAME MPIR_Igather_binomial
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int comm_size, rank;
@@ -83,7 +83,7 @@ int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendt
     is_homogeneous = !comm_ptr->is_hetero;
 #endif
 
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
 
     /* Use binomial tree algorithm. */
 
@@ -92,7 +92,7 @@ int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendt
     if (rank == root)
     {
         MPID_Datatype_get_extent_macro(recvtype, extent);
-        MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf+
+        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf+
                                          (extent*recvcount*comm_size));
     }
 
@@ -168,15 +168,15 @@ int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendt
                              * should cover the case where the root is
                              * rank 0. */
                             char *rp = (char *)recvbuf + (((rank + mask) % comm_size)*recvcount*extent);
-                            mpi_errno = MPID_Sched_recv(rp, (recvblks * recvcount), recvtype, src, comm_ptr, s);
+                            mpi_errno = MPIR_Sched_recv(rp, (recvblks * recvcount), recvtype, src, comm_ptr, s);
                             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                            mpi_errno = MPID_Sched_barrier(s);
+                            mpi_errno = MPIR_Sched_barrier(s);
                             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                         }
                         else if (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE) {
-                            mpi_errno = MPID_Sched_recv(tmp_buf, (recvblks * nbytes), MPI_BYTE, src, comm_ptr, s);
+                            mpi_errno = MPIR_Sched_recv(tmp_buf, (recvblks * nbytes), MPI_BYTE, src, comm_ptr, s);
                             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                            mpi_errno = MPID_Sched_barrier(s);
+                            mpi_errno = MPIR_Sched_barrier(s);
                             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                             copy_offset = rank + mask;
                             copy_blks = recvblks;
@@ -192,9 +192,9 @@ int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendt
                             mpi_errno = MPIR_Type_commit_impl(&tmp_type);
                             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-                            mpi_errno = MPID_Sched_recv(recvbuf, 1, tmp_type, src, comm_ptr, s);
+                            mpi_errno = MPIR_Sched_recv(recvbuf, 1, tmp_type, src, comm_ptr, s);
                             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                            mpi_errno = MPID_Sched_barrier(s);
+                            mpi_errno = MPIR_Sched_barrier(s);
                             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
                             /* this "premature" free is safe b/c the sched holds an actual ref to keep it alive */
@@ -214,10 +214,10 @@ int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendt
                             offset = mask * nbytes;
                         else
                             offset = (mask - 1) * nbytes;
-                        mpi_errno = MPID_Sched_recv(((char *)tmp_buf + offset), (recvblks * nbytes),
+                        mpi_errno = MPIR_Sched_recv(((char *)tmp_buf + offset), (recvblks * nbytes),
                                                     MPI_BYTE, src, comm_ptr, s);
                         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                        mpi_errno = MPID_Sched_barrier(s);
+                        mpi_errno = MPIR_Sched_barrier(s);
                         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                         curr_cnt += (recvblks * nbytes);
                     }
@@ -229,20 +229,20 @@ int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendt
 
                 if (!tmp_buf_size) {
                     /* leaf nodes send directly from sendbuf */
-                    mpi_errno = MPID_Sched_send(sendbuf, sendcount, sendtype, dst, comm_ptr, s);
+                    mpi_errno = MPIR_Sched_send(sendbuf, sendcount, sendtype, dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    mpi_errno = MPID_Sched_barrier(s);
+                    mpi_errno = MPIR_Sched_barrier(s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                 }
                 else if (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE) {
-                    mpi_errno = MPID_Sched_send(tmp_buf, curr_cnt, MPI_BYTE, dst, comm_ptr, s);
+                    mpi_errno = MPIR_Sched_send(tmp_buf, curr_cnt, MPI_BYTE, dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    mpi_errno = MPID_Sched_barrier(s);
+                    mpi_errno = MPIR_Sched_barrier(s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                 }
                 else {
                     blocks[0] = sendcount;
-                    struct_displs[0] = MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf;
+                    struct_displs[0] = MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf;
                     types[0] = sendtype;
 		    /* check for overflow.  work around int limits if needed*/
 		    if (curr_cnt - nbytes != (int)(curr_cnt-nbytes)) {
@@ -250,19 +250,19 @@ int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendt
 			MPIR_Type_contiguous_x_impl(curr_cnt - nbytes,
 				MPI_BYTE, &(types[1]));
 		    } else {
-			MPIU_Assign_trunc(blocks[1], curr_cnt - nbytes, int);
+			MPIR_Assign_trunc(blocks[1], curr_cnt - nbytes, int);
 			types[1] = MPI_BYTE;
 		    }
-                    struct_displs[1] = MPIU_VOID_PTR_CAST_TO_MPI_AINT tmp_buf;
+                    struct_displs[1] = MPIR_VOID_PTR_CAST_TO_MPI_AINT tmp_buf;
 
                     mpi_errno = MPIR_Type_create_struct_impl(2, blocks, struct_displs, types, &tmp_type);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                     mpi_errno = MPIR_Type_commit_impl(&tmp_type);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-                    mpi_errno = MPID_Sched_send(MPI_BOTTOM, 1, tmp_type, dst, comm_ptr, s);
+                    mpi_errno = MPIR_Sched_send(MPI_BOTTOM, 1, tmp_type, dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
 
                     /* this "premature" free is safe b/c the sched holds an actual ref to keep it alive */
                     MPIR_Type_free_impl(&tmp_type);
@@ -276,11 +276,11 @@ int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendt
         if ((rank == root) && root && (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE) && copy_blks) {
             /* reorder and copy from tmp_buf into recvbuf */
             /* FIXME why are there two copies here? */
-            mpi_errno = MPID_Sched_copy(tmp_buf, nbytes * (comm_size - copy_offset), MPI_BYTE,
+            mpi_errno = MPIR_Sched_copy(tmp_buf, nbytes * (comm_size - copy_offset), MPI_BYTE,
                                        ((char *)recvbuf + extent * recvcount * copy_offset),
                                        recvcount * (comm_size - copy_offset), recvtype, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            mpi_errno = MPID_Sched_copy((char *)tmp_buf + nbytes * (comm_size - copy_offset),
+            mpi_errno = MPIR_Sched_copy((char *)tmp_buf + nbytes * (comm_size - copy_offset),
                                         nbytes * (copy_blks - comm_size + copy_offset), MPI_BYTE,
                                         recvbuf, recvcount * (copy_blks - comm_size + copy_offset),
                                         recvtype, s);
@@ -292,7 +292,7 @@ int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendt
         /* communicator is heterogeneous. pack data into tmp_buf. */
 
         /* currently unimplemented & untested */
-        MPIU_Assertp(FALSE);
+        MPIR_Assertp(FALSE);
 
 #if 0
         if (rank == root)
@@ -300,7 +300,7 @@ int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendt
         else
             MPIR_Pack_size_impl(sendcount*(comm_size/2), sendtype, &tmp_buf_size);
 
-        MPIU_CHKPMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
+        MPIR_CHKPMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
 
         position = 0;
         if (sendbuf != MPI_IN_PLACE) {
@@ -388,7 +388,7 @@ fn_fail:
 #define FUNCNAME MPIR_Igather_intra
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Igather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Igather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
 
@@ -405,7 +405,7 @@ fn_fail:
 #define FUNCNAME MPIR_Igather_inter
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Igather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Igather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int rank;
@@ -449,7 +449,7 @@ int MPIR_Igather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype
     if (nbytes < MPIR_CVAR_GATHER_INTER_SHORT_MSG_SIZE) {
         if (root == MPI_ROOT) {
             /* root receives data from rank 0 on remote group */
-            mpi_errno = MPID_Sched_recv(recvbuf, recvcount*remote_size, recvtype, 0, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(recvbuf, recvcount*remote_size, recvtype, 0, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
         else {
@@ -463,7 +463,7 @@ int MPIR_Igather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype
                 MPIR_Type_get_true_extent_impl(sendtype, &true_lb, &true_extent);
                 MPID_Datatype_get_extent_macro(sendtype, extent);
 
-                MPIU_Ensure_Aint_fits_in_pointer(sendcount*local_size*(MPL_MAX(extent, true_extent)));
+                MPIR_Ensure_Aint_fits_in_pointer(sendcount*local_size*(MPL_MAX(extent, true_extent)));
                 MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, sendcount*local_size*(MPL_MAX(extent,true_extent)),
                                           mpi_errno, "tmp_buf");
                 /* adjust for potential negative lower bound in datatype */
@@ -472,21 +472,21 @@ int MPIR_Igather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype
 
             /* all processes in remote group form new intracommunicator */
             if (!comm_ptr->local_comm) {
-                mpi_errno = MPIR_Setup_intercomm_localcomm( comm_ptr );
+                mpi_errno = MPII_Setup_intercomm_localcomm( comm_ptr );
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             }
 
             newcomm_ptr = comm_ptr->local_comm;
 
             /* now do the a local gather on this intracommunicator */
-            MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Igather_sched);
+            MPIR_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Igather_sched);
             mpi_errno = newcomm_ptr->coll_fns->Igather_sched(sendbuf, sendcount, sendtype,
                                                        tmp_buf, sendcount, sendtype, 0,
                                                        newcomm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
             if (rank == 0) {
-                mpi_errno = MPID_Sched_send(tmp_buf, sendcount*local_size, sendtype, root, comm_ptr, s);
+                mpi_errno = MPIR_Sched_send(tmp_buf, sendcount*local_size, sendtype, root, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             }
         }
@@ -495,17 +495,17 @@ int MPIR_Igather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype
         /* long message. use linear algorithm. */
         if (root == MPI_ROOT) {
             MPID_Datatype_get_extent_macro(recvtype, extent);
-            MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+            MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
                                              (recvcount*remote_size*extent));
 
             for (i=0; i<remote_size; i++) {
-                mpi_errno = MPID_Sched_recv(((char *)recvbuf+recvcount*i*extent),
+                mpi_errno = MPIR_Sched_recv(((char *)recvbuf+recvcount*i*extent),
                                             recvcount, recvtype, i, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             }
         }
         else {
-            mpi_errno = MPID_Sched_send(sendbuf, sendcount, sendtype, root, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(sendbuf, sendcount, sendtype, root, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
     }
@@ -527,21 +527,21 @@ int MPIR_Igather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Igather_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Igather_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Igather_sched(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -587,10 +587,10 @@ int MPI_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IGATHER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IGATHER);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IGATHER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IGATHER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -710,7 +710,7 @@ int MPI_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IGATHER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IGATHER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/igatherv.c b/src/mpi/coll/igatherv.c
index c31ad92..136a0f6 100644
--- a/src/mpi/coll/igatherv.c
+++ b/src/mpi/coll/igatherv.c
@@ -35,7 +35,7 @@ int MPI_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
                   const int recvcounts[], const int displs[], MPI_Datatype recvtype, int root,
-                  MPIR_Comm *comm_ptr, MPID_Sched_t s)
+                  MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
@@ -56,20 +56,20 @@ int MPIR_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, voi
 
         MPID_Datatype_get_extent_macro(recvtype, extent);
         /* each node can make sure it is not going to overflow aint */
-        MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf + displs[rank] * extent);
+        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf + displs[rank] * extent);
 
         for (i = 0; i < comm_size; i++) {
             if (recvcounts[i]) {
                 if ((comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) && (i == rank)) {
                     if (sendbuf != MPI_IN_PLACE) {
-                        mpi_errno = MPID_Sched_copy(sendbuf, sendcount, sendtype,
+                        mpi_errno = MPIR_Sched_copy(sendbuf, sendcount, sendtype,
                                                     ((char *)recvbuf+displs[rank]*extent),
                                                     recvcounts[rank], recvtype, s);
                         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                     }
                 }
                 else {
-                    mpi_errno = MPID_Sched_recv(((char *)recvbuf+displs[i]*extent),
+                    mpi_errno = MPIR_Sched_recv(((char *)recvbuf+displs[i]*extent),
                                                 recvcounts[i], recvtype, i, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                 }
@@ -92,9 +92,9 @@ int MPIR_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, voi
                 MPIR_CVAR_GET_DEFAULT_INT(GATHERV_INTER_SSEND_MIN_PROCS,&min_procs);
 
             if (comm_size >= min_procs)
-                mpi_errno = MPID_Sched_ssend(sendbuf, sendcount, sendtype, root, comm_ptr, s);
+                mpi_errno = MPIR_Sched_ssend(sendbuf, sendcount, sendtype, root, comm_ptr, s);
             else
-                mpi_errno = MPID_Sched_send(sendbuf, sendcount, sendtype, root, comm_ptr, s);
+                mpi_errno = MPIR_Sched_send(sendbuf, sendcount, sendtype, root, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
     }
@@ -116,21 +116,21 @@ int MPIR_Igatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Igatherv_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Igatherv_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Igatherv_sched(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -177,10 +177,10 @@ int MPI_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IGATHERV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IGATHERV);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IGATHERV);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IGATHERV);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -310,7 +310,7 @@ int MPI_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IGATHERV);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IGATHERV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/ired_scat.c b/src/mpi/coll/ired_scat.c
index d7b3c98..9a451c2 100644
--- a/src/mpi/coll/ired_scat.c
+++ b/src/mpi/coll/ired_scat.c
@@ -37,7 +37,7 @@ int MPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Ireduce_scatter_rec_hlv(const void *sendbuf, void *recvbuf, const int recvcounts[],
                                  MPI_Datatype datatype, MPI_Op op,
-                                 MPIR_Comm *comm_ptr, MPID_Sched_t s)
+                                 MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int rank, comm_size, i;
@@ -57,7 +57,7 @@ int MPIR_Ireduce_scatter_rec_hlv(const void *sendbuf, void *recvbuf, const int r
     MPID_Datatype_get_extent_macro(datatype, extent);
     MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
 
-    MPIU_Assert(MPIR_Op_is_commutative(op));
+    MPIR_Assert(MPIR_Op_is_commutative(op));
 
     MPIR_SCHED_CHKPMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps");
 
@@ -86,13 +86,13 @@ int MPIR_Ireduce_scatter_rec_hlv(const void *sendbuf, void *recvbuf, const int r
 
     /* copy sendbuf into tmp_results */
     if (sendbuf != MPI_IN_PLACE)
-        mpi_errno = MPID_Sched_copy(sendbuf, total_count, datatype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, total_count, datatype,
                                     tmp_results, total_count, datatype, s);
     else
-        mpi_errno = MPID_Sched_copy(recvbuf, total_count, datatype,
+        mpi_errno = MPIR_Sched_copy(recvbuf, total_count, datatype,
                                     tmp_results, total_count, datatype, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
     pof2 = 1;
     while (pof2 <= comm_size) pof2 <<= 1;
@@ -108,9 +108,9 @@ int MPIR_Ireduce_scatter_rec_hlv(const void *sendbuf, void *recvbuf, const int r
 
     if (rank < 2*rem) {
         if (rank % 2 == 0) { /* even */
-            mpi_errno = MPID_Sched_send(tmp_results, total_count, datatype, rank+1, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(tmp_results, total_count, datatype, rank+1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* temporarily set the rank to -1 so that this
                process does not pariticipate in recursive
@@ -118,16 +118,16 @@ int MPIR_Ireduce_scatter_rec_hlv(const void *sendbuf, void *recvbuf, const int r
             newrank = -1;
         }
         else { /* odd */
-            mpi_errno = MPID_Sched_recv(tmp_recvbuf, total_count, datatype, rank-1, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(tmp_recvbuf, total_count, datatype, rank-1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* do the reduction on received data. since the
                ordering is right, it doesn't matter whether
                the operation is commutative or not. */
-            mpi_errno = MPID_Sched_reduce(tmp_recvbuf, tmp_results, total_count, datatype, op, s);
+            mpi_errno = MPIR_Sched_reduce(tmp_recvbuf, tmp_results, total_count, datatype, op, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* change the rank */
             newrank = rank / 2;
@@ -191,22 +191,22 @@ int MPIR_Ireduce_scatter_rec_hlv(const void *sendbuf, void *recvbuf, const int r
                 int send_dst = (send_cnt ? dst : MPI_PROC_NULL);
                 int recv_dst = (recv_cnt ? dst : MPI_PROC_NULL);
 
-                mpi_errno = MPID_Sched_send(((char *)tmp_results + newdisps[send_idx]*extent),
+                mpi_errno = MPIR_Sched_send(((char *)tmp_results + newdisps[send_idx]*extent),
                                             send_cnt, datatype, send_dst, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                mpi_errno = MPID_Sched_recv(((char *) tmp_recvbuf + newdisps[recv_idx]*extent),
+                mpi_errno = MPIR_Sched_recv(((char *) tmp_recvbuf + newdisps[recv_idx]*extent),
                                             recv_cnt, datatype, recv_dst, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
 
             /* tmp_recvbuf contains data received in this step.
                tmp_results contains data accumulated so far */
             if (recv_cnt) {
-                mpi_errno = MPID_Sched_reduce(((char *)tmp_recvbuf + newdisps[recv_idx]*extent),
+                mpi_errno = MPIR_Sched_reduce(((char *)tmp_recvbuf + newdisps[recv_idx]*extent),
                                               ((char *)tmp_results + newdisps[recv_idx]*extent),
                                               recv_cnt, datatype, op, s);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
 
             /* update send_idx for next iteration */
@@ -217,11 +217,11 @@ int MPIR_Ireduce_scatter_rec_hlv(const void *sendbuf, void *recvbuf, const int r
 
         /* copy this process's result from tmp_results to recvbuf */
         if (recvcounts[rank]) {
-            mpi_errno = MPID_Sched_copy(((char *)tmp_results + disps[rank]*extent),
+            mpi_errno = MPIR_Sched_copy(((char *)tmp_results + disps[rank]*extent),
                                         recvcounts[rank], datatype,
                                         recvbuf, recvcounts[rank], datatype, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
 
     }
@@ -232,17 +232,17 @@ int MPIR_Ireduce_scatter_rec_hlv(const void *sendbuf, void *recvbuf, const int r
     if (rank < 2*rem) {
         if (rank % 2) { /* odd */
             if (recvcounts[rank-1]) {
-                mpi_errno = MPID_Sched_send(((char *)tmp_results + disps[rank-1]*extent),
+                mpi_errno = MPIR_Sched_send(((char *)tmp_results + disps[rank-1]*extent),
                                             recvcounts[rank-1], datatype, rank-1, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
         }
         else  {   /* even */
             if (recvcounts[rank]) {
-                mpi_errno = MPID_Sched_recv(recvbuf, recvcounts[rank], datatype, rank+1, comm_ptr, s);
+                mpi_errno = MPIR_Sched_recv(recvbuf, recvcounts[rank], datatype, rank+1, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
         }
     }
@@ -264,7 +264,7 @@ fn_fail:
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Ireduce_scatter_pairwise(const void *sendbuf, void *recvbuf, const int recvcounts[],
                                   MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr,
-                                  MPID_Sched_t s)
+                                  MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int   rank, comm_size, i;
@@ -283,7 +283,7 @@ int MPIR_Ireduce_scatter_pairwise(const void *sendbuf, void *recvbuf, const int
     MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
 
     is_commutative = MPIR_Op_is_commutative(op);
-    MPIU_Assert(is_commutative);
+    MPIR_Assert(is_commutative);
 
     MPIR_SCHED_CHKPMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps");
 
@@ -298,15 +298,15 @@ int MPIR_Ireduce_scatter_pairwise(const void *sendbuf, void *recvbuf, const int
     }
     /* total_count*extent eventually gets malloced. it isn't added to
      * a user-passed in buffer */
-    MPIU_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));
+    MPIR_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));
 
     if (sendbuf != MPI_IN_PLACE) {
         /* copy local data into recvbuf */
-        mpi_errno = MPID_Sched_copy(((char *)sendbuf+disps[rank]*extent),
+        mpi_errno = MPIR_Sched_copy(((char *)sendbuf+disps[rank]*extent),
                                     recvcounts[rank], datatype,
                                     recvbuf, recvcounts[rank], datatype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     /* allocate temporary buffer to store incoming data */
@@ -321,28 +321,28 @@ int MPIR_Ireduce_scatter_pairwise(const void *sendbuf, void *recvbuf, const int
         /* send the data that dst needs. recv data that this process
            needs from src into tmp_recvbuf */
         if (sendbuf != MPI_IN_PLACE) {
-            mpi_errno = MPID_Sched_send(((char *)sendbuf+disps[dst]*extent),
+            mpi_errno = MPIR_Sched_send(((char *)sendbuf+disps[dst]*extent),
                                         recvcounts[dst], datatype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
         else {
-            mpi_errno = MPID_Sched_send(((char *)recvbuf+disps[dst]*extent),
+            mpi_errno = MPIR_Sched_send(((char *)recvbuf+disps[dst]*extent),
                                         recvcounts[dst], datatype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
-        mpi_errno = MPID_Sched_recv(tmp_recvbuf, recvcounts[rank], datatype, src, comm_ptr, s);
+        mpi_errno = MPIR_Sched_recv(tmp_recvbuf, recvcounts[rank], datatype, src, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         /* FIXME does this algorithm actually work correctly for noncommutative ops?
          * If so, relax restriction in assert and comments... */
         if (is_commutative || (src < rank)) {
             if (sendbuf != MPI_IN_PLACE) {
-                mpi_errno = MPID_Sched_reduce(tmp_recvbuf, recvbuf, recvcounts[rank], datatype, op, s);
+                mpi_errno = MPIR_Sched_reduce(tmp_recvbuf, recvbuf, recvcounts[rank], datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             }
             else {
-                mpi_errno = MPID_Sched_reduce(tmp_recvbuf, ((char *)recvbuf+disps[rank]*extent),
+                mpi_errno = MPIR_Sched_reduce(tmp_recvbuf, ((char *)recvbuf+disps[rank]*extent),
                                               recvcounts[rank], datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                 /* We can't store the result at the beginning of
@@ -350,41 +350,41 @@ int MPIR_Ireduce_scatter_pairwise(const void *sendbuf, void *recvbuf, const int
                    other process/processes need.  At the end we will copy back
                    the result to the beginning of recvbuf. */
             }
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
         else {
             if (sendbuf != MPI_IN_PLACE) {
-                mpi_errno = MPID_Sched_reduce(recvbuf, tmp_recvbuf, recvcounts[rank], datatype, op, s);
+                mpi_errno = MPIR_Sched_reduce(recvbuf, tmp_recvbuf, recvcounts[rank], datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
                 /* copy result back into recvbuf */
-                mpi_errno = MPID_Sched_copy(tmp_recvbuf, recvcounts[rank], datatype,
+                mpi_errno = MPIR_Sched_copy(tmp_recvbuf, recvcounts[rank], datatype,
                                             recvbuf, recvcounts[rank], datatype, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             }
             else {
-                mpi_errno = MPID_Sched_reduce(((char *)recvbuf+disps[rank]*extent),
+                mpi_errno = MPIR_Sched_reduce(((char *)recvbuf+disps[rank]*extent),
                                               tmp_recvbuf, recvcounts[rank], datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
                 /* copy result back into recvbuf */
-                mpi_errno = MPID_Sched_copy(tmp_recvbuf, recvcounts[rank], datatype,
+                mpi_errno = MPIR_Sched_copy(tmp_recvbuf, recvcounts[rank], datatype,
                                             ((char *)recvbuf + disps[rank]*extent),
                                             recvcounts[rank], datatype, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             }
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
     }
 
     /* if MPI_IN_PLACE, move output data to the beginning of
        recvbuf. already done for rank 0. */
     if ((sendbuf == MPI_IN_PLACE) && (rank != 0)) {
-        mpi_errno = MPID_Sched_copy(((char *)recvbuf + disps[rank]*extent),
+        mpi_errno = MPIR_Sched_copy(((char *)recvbuf + disps[rank]*extent),
                                     recvcounts[rank], datatype,
                                     recvbuf, recvcounts[rank], datatype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     MPIR_SCHED_CHKPMEM_COMMIT(s);
@@ -403,7 +403,7 @@ fn_fail:
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Ireduce_scatter_rec_dbl(const void *sendbuf, void *recvbuf, const int recvcounts[],
                                  MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr,
-                                 MPID_Sched_t s)
+                                 MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int rank, comm_size, i;
@@ -440,7 +440,7 @@ int MPIR_Ireduce_scatter_rec_dbl(const void *sendbuf, void *recvbuf, const int r
 
     /* total_count*extent eventually gets malloced. it isn't added to
      * a user-passed in buffer */
-    MPIU_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));
+    MPIR_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));
 
 
     /* need to allocate temporary buffer to receive incoming data*/
@@ -456,14 +456,14 @@ int MPIR_Ireduce_scatter_rec_dbl(const void *sendbuf, void *recvbuf, const int r
 
     /* copy sendbuf into tmp_results */
     if (sendbuf != MPI_IN_PLACE)
-        mpi_errno = MPID_Sched_copy(sendbuf, total_count, datatype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, total_count, datatype,
                                     tmp_results, total_count, datatype, s);
     else
-        mpi_errno = MPID_Sched_copy(recvbuf, total_count, datatype,
+        mpi_errno = MPIR_Sched_copy(recvbuf, total_count, datatype,
                                     tmp_results, total_count, datatype, s);
 
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
     mask = 0x1;
     i = 0;
@@ -527,11 +527,11 @@ int MPIR_Ireduce_scatter_rec_dbl(const void *sendbuf, void *recvbuf, const int r
                received in tmp_recvbuf and then accumulated into
                tmp_results. accumulation is done later below.   */
 
-            mpi_errno = MPID_Sched_send(tmp_results, 1, sendtype, dst, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(tmp_results, 1, sendtype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            mpi_errno = MPID_Sched_recv(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
             received = 1;
         }
 
@@ -572,9 +572,9 @@ int MPIR_Ireduce_scatter_rec_dbl(const void *sendbuf, void *recvbuf, const int r
                     && (dst >= tree_root + nprocs_completed))
                 {
                     /* send the current result */
-                    mpi_errno = MPID_Sched_send(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
+                    mpi_errno = MPIR_Sched_send(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
                 /* recv only if this proc. doesn't have data and sender
                    has data */
@@ -582,9 +582,9 @@ int MPIR_Ireduce_scatter_rec_dbl(const void *sendbuf, void *recvbuf, const int r
                          (dst < tree_root + nprocs_completed) &&
                          (rank >= tree_root + nprocs_completed))
                 {
-                    mpi_errno = MPID_Sched_recv(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
+                    mpi_errno = MPIR_Sched_recv(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                     received = 1;
                 }
                 tmp_mask >>= 1;
@@ -606,28 +606,28 @@ int MPIR_Ireduce_scatter_rec_dbl(const void *sendbuf, void *recvbuf, const int r
            we do the reduce here. */
         if (received) {
             if (is_commutative || (dst_tree_root < my_tree_root)) {
-                mpi_errno = MPID_Sched_reduce(tmp_recvbuf, tmp_results, blklens[0], datatype, op, s);
+                mpi_errno = MPIR_Sched_reduce(tmp_recvbuf, tmp_results, blklens[0], datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                mpi_errno = MPID_Sched_reduce(((char *)tmp_recvbuf + dis[1]*extent),
+                mpi_errno = MPIR_Sched_reduce(((char *)tmp_recvbuf + dis[1]*extent),
                                               ((char *)tmp_results + dis[1]*extent),
                                               blklens[1], datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
             else {
-                mpi_errno = MPID_Sched_reduce(tmp_results, tmp_recvbuf, blklens[0], datatype, op, s);
+                mpi_errno = MPIR_Sched_reduce(tmp_results, tmp_recvbuf, blklens[0], datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                mpi_errno = MPID_Sched_reduce(((char *)tmp_results + dis[1]*extent),
+                mpi_errno = MPIR_Sched_reduce(((char *)tmp_results + dis[1]*extent),
                                               ((char *)tmp_recvbuf + dis[1]*extent),
                                               blklens[1], datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
 
                 /* copy result back into tmp_results */
-                mpi_errno = MPID_Sched_copy(tmp_recvbuf, 1, recvtype,
+                mpi_errno = MPIR_Sched_copy(tmp_recvbuf, 1, recvtype,
                                             tmp_results, 1, recvtype, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
         }
 
@@ -639,11 +639,11 @@ int MPIR_Ireduce_scatter_rec_dbl(const void *sendbuf, void *recvbuf, const int r
     }
 
     /* now copy final results from tmp_results to recvbuf */
-    mpi_errno = MPID_Sched_copy(((char *)tmp_results+disps[rank]*extent),
+    mpi_errno = MPIR_Sched_copy(((char *)tmp_results+disps[rank]*extent),
                                 recvcounts[rank], datatype,
                                 recvbuf, recvcounts[rank], datatype, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
     MPIR_SCHED_CHKPMEM_COMMIT(s);
 fn_exit:
@@ -664,7 +664,7 @@ fn_fail:
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static int MPIR_Ireduce_scatter_noncomm(const void *sendbuf, void *recvbuf,
                                         const int recvcounts[], MPI_Datatype datatype, MPI_Op op,
-                                        MPIR_Comm *comm_ptr, MPID_Sched_t s)
+                                        MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int comm_size = comm_ptr->local_size;
@@ -691,10 +691,10 @@ static int MPIR_Ireduce_scatter_noncomm(const void *sendbuf, void *recvbuf,
     }
 
     /* begin error checking */
-    MPIU_Assert(pof2 == comm_size); /* FIXME this version only works for power of 2 procs */
+    MPIR_Assert(pof2 == comm_size); /* FIXME this version only works for power of 2 procs */
 
     for (i = 0; i < (comm_size - 1); ++i) {
-        MPIU_Assert(recvcounts[i] == recvcounts[i+1]);
+        MPIR_Assert(recvcounts[i] == recvcounts[i+1]);
     }
     /* end error checking */
 
@@ -711,12 +711,12 @@ static int MPIR_Ireduce_scatter_noncomm(const void *sendbuf, void *recvbuf,
     /* Copy our send data to tmp_buf0.  We do this one block at a time and
        permute the blocks as we go according to the mirror permutation. */
     for (i = 0; i < comm_size; ++i) {
-        mpi_errno = MPID_Sched_copy(((char *)(sendbuf == MPI_IN_PLACE ? (const void *)recvbuf : sendbuf) + (i * true_extent * block_size)),
+        mpi_errno = MPIR_Sched_copy(((char *)(sendbuf == MPI_IN_PLACE ? (const void *)recvbuf : sendbuf) + (i * true_extent * block_size)),
                                     block_size, datatype,
                                     ((char *)tmp_buf0 + (MPIU_Mirror_permutation(i, log2_comm_size) * true_extent * block_size)),
                                     block_size, datatype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
     buf0_was_inout = 1;
 
@@ -739,43 +739,43 @@ static int MPIR_Ireduce_scatter_noncomm(const void *sendbuf, void *recvbuf,
             send_offset += size;
         }
 
-        mpi_errno = MPID_Sched_send((outgoing_data + send_offset*true_extent),
+        mpi_errno = MPIR_Sched_send((outgoing_data + send_offset*true_extent),
                                     size, datatype, peer, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        mpi_errno = MPID_Sched_recv((incoming_data + recv_offset*true_extent),
+        mpi_errno = MPIR_Sched_recv((incoming_data + recv_offset*true_extent),
                                     size, datatype, peer, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         /* always perform the reduction at recv_offset, the data at send_offset
            is now our peer's responsibility */
         if (rank > peer) {
             /* higher ranked value so need to call op(received_data, my_data) */
-            mpi_errno = MPID_Sched_reduce((incoming_data + recv_offset*true_extent),
+            mpi_errno = MPIR_Sched_reduce((incoming_data + recv_offset*true_extent),
                                           (outgoing_data + recv_offset*true_extent),
                                           size, datatype, op, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
         else {
             /* lower ranked value so need to call op(my_data, received_data) */
-            mpi_errno = MPID_Sched_reduce((outgoing_data + recv_offset*true_extent),
+            mpi_errno = MPIR_Sched_reduce((outgoing_data + recv_offset*true_extent),
                                           (incoming_data + recv_offset*true_extent),
                                           size, datatype, op, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             buf0_was_inout = !buf0_was_inout;
         }
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         /* the next round of send/recv needs to happen within the block (of size
            "size") that we just received and reduced */
         send_offset = recv_offset;
     }
 
-    MPIU_Assert(size == recvcounts[rank]);
+    MPIR_Assert(size == recvcounts[rank]);
 
     /* copy the reduced data to the recvbuf */
     result_ptr = (char *)(buf0_was_inout ? tmp_buf0 : tmp_buf1) + recv_offset * true_extent;
-    mpi_errno = MPID_Sched_copy(result_ptr, size, datatype,
+    mpi_errno = MPIR_Sched_copy(result_ptr, size, datatype,
                                 recvbuf, size, datatype, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     MPIR_SCHED_CHKPMEM_COMMIT(s);
@@ -838,7 +838,7 @@ fn_fail:
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Ireduce_scatter_intra(const void *sendbuf, void *recvbuf, const int recvcounts[],
                                MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr,
-                               MPID_Sched_t s)
+                               MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
@@ -901,7 +901,7 @@ fn_fail:
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Ireduce_scatter_inter(const void *sendbuf, void *recvbuf, const int recvcounts[],
                                MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr,
-                               MPID_Sched_t s)
+                               MPIR_Sched_t s)
 {
     /* Intercommunicator Reduce_scatter.
        We first do an intercommunicator reduce to rank 0 on left group,
@@ -946,7 +946,7 @@ int MPIR_Ireduce_scatter_inter(const void *sendbuf, void *recvbuf, const int rec
 
     /* first do a reduce from right group to rank 0 in left group,
        then from left group to rank 0 in right group*/
-    MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ireduce_sched);
+    MPIR_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ireduce_sched);
     if (comm_ptr->is_low_group) {
         /* reduce from right group to rank 0*/
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
@@ -979,17 +979,17 @@ int MPIR_Ireduce_scatter_inter(const void *sendbuf, void *recvbuf, const int rec
                                                 datatype, op, root, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
     /* Get the local intracommunicator */
     if (!comm_ptr->local_comm) {
-        mpi_errno = MPIR_Setup_intercomm_localcomm(comm_ptr);
+        mpi_errno = MPII_Setup_intercomm_localcomm(comm_ptr);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
     newcomm_ptr = comm_ptr->local_comm;
 
-    MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Iscatterv_sched);
+    MPIR_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Iscatterv_sched);
     mpi_errno = newcomm_ptr->coll_fns->Iscatterv_sched(tmp_buf, recvcounts, disps, datatype,
                                                  recvbuf, recvcounts[rank], datatype, 0,
                                                  newcomm_ptr, s);
@@ -1014,21 +1014,21 @@ int MPIR_Ireduce_scatter_impl(const void *sendbuf, void *recvbuf, const int recv
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ireduce_scatter_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Ireduce_scatter_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Ireduce_scatter_sched(sendbuf, recvbuf, recvcounts, datatype, op, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -1072,11 +1072,11 @@ int MPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts
     int mpi_errno = MPI_SUCCESS;
     int i;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IREDUCE_SCATTER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IREDUCE_SCATTER);
     i = 0;
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IREDUCE_SCATTER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IREDUCE_SCATTER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -1144,7 +1144,7 @@ int MPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IREDUCE_SCATTER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IREDUCE_SCATTER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/ired_scat_block.c b/src/mpi/coll/ired_scat_block.c
index f0eeb52..9d69e20 100644
--- a/src/mpi/coll/ired_scat_block.c
+++ b/src/mpi/coll/ired_scat_block.c
@@ -36,7 +36,7 @@ int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount,
 #define FUNCNAME MPIR_Ireduce_scatter_block_rec_hlv
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ireduce_scatter_block_rec_hlv(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ireduce_scatter_block_rec_hlv(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int rank, comm_size, i;
@@ -56,7 +56,7 @@ int MPIR_Ireduce_scatter_block_rec_hlv(const void *sendbuf, void *recvbuf, int r
     MPID_Datatype_get_extent_macro(datatype, extent);
     MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
 
-    MPIU_Assert(MPIR_Op_is_commutative(op));
+    MPIR_Assert(MPIR_Op_is_commutative(op));
 
     MPIR_SCHED_CHKPMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps");
 
@@ -85,13 +85,13 @@ int MPIR_Ireduce_scatter_block_rec_hlv(const void *sendbuf, void *recvbuf, int r
 
     /* copy sendbuf into tmp_results */
     if (sendbuf != MPI_IN_PLACE)
-        mpi_errno = MPID_Sched_copy(sendbuf, total_count, datatype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, total_count, datatype,
                                     tmp_results, total_count, datatype, s);
     else
-        mpi_errno = MPID_Sched_copy(recvbuf, total_count, datatype,
+        mpi_errno = MPIR_Sched_copy(recvbuf, total_count, datatype,
                                     tmp_results, total_count, datatype, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
     pof2 = 1;
     while (pof2 <= comm_size) pof2 <<= 1;
@@ -107,9 +107,9 @@ int MPIR_Ireduce_scatter_block_rec_hlv(const void *sendbuf, void *recvbuf, int r
 
     if (rank < 2*rem) {
         if (rank % 2 == 0) { /* even */
-            mpi_errno = MPID_Sched_send(tmp_results, total_count, datatype, rank+1, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(tmp_results, total_count, datatype, rank+1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* temporarily set the rank to -1 so that this
                process does not pariticipate in recursive
@@ -117,16 +117,16 @@ int MPIR_Ireduce_scatter_block_rec_hlv(const void *sendbuf, void *recvbuf, int r
             newrank = -1;
         }
         else { /* odd */
-            mpi_errno = MPID_Sched_recv(tmp_recvbuf, total_count, datatype, rank-1, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(tmp_recvbuf, total_count, datatype, rank-1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* do the reduction on received data. since the
                ordering is right, it doesn't matter whether
                the operation is commutative or not. */
-            mpi_errno = MPID_Sched_reduce(tmp_recvbuf, tmp_results, total_count, datatype, op, s);
+            mpi_errno = MPIR_Sched_reduce(tmp_recvbuf, tmp_results, total_count, datatype, op, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* change the rank */
             newrank = rank / 2;
@@ -190,23 +190,23 @@ int MPIR_Ireduce_scatter_block_rec_hlv(const void *sendbuf, void *recvbuf, int r
                 int send_dst = (send_cnt ? dst : MPI_PROC_NULL);
                 int recv_dst = (recv_cnt ? dst : MPI_PROC_NULL);
 
-                mpi_errno = MPID_Sched_send(((char *)tmp_results + newdisps[send_idx]*extent),
+                mpi_errno = MPIR_Sched_send(((char *)tmp_results + newdisps[send_idx]*extent),
                                             send_cnt, datatype, send_dst, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                mpi_errno = MPID_Sched_recv(((char *) tmp_recvbuf + newdisps[recv_idx]*extent),
+                mpi_errno = MPIR_Sched_recv(((char *) tmp_recvbuf + newdisps[recv_idx]*extent),
                                             recv_cnt, datatype, recv_dst, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
 
             /* tmp_recvbuf contains data received in this step.
                tmp_results contains data accumulated so far */
             if (recv_cnt) {
-                mpi_errno = MPID_Sched_reduce(((char *)tmp_recvbuf + newdisps[recv_idx]*extent),
+                mpi_errno = MPIR_Sched_reduce(((char *)tmp_recvbuf + newdisps[recv_idx]*extent),
                                               ((char *)tmp_results + newdisps[recv_idx]*extent),
                                               recv_cnt, datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
 
             /* update send_idx for next iteration */
@@ -216,11 +216,11 @@ int MPIR_Ireduce_scatter_block_rec_hlv(const void *sendbuf, void *recvbuf, int r
         }
 
         /* copy this process's result from tmp_results to recvbuf */
-        mpi_errno = MPID_Sched_copy(((char *)tmp_results + disps[rank]*extent),
+        mpi_errno = MPIR_Sched_copy(((char *)tmp_results + disps[rank]*extent),
                                     recvcount, datatype,
                                     recvbuf, recvcount, datatype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
     }
 
@@ -229,15 +229,15 @@ int MPIR_Ireduce_scatter_block_rec_hlv(const void *sendbuf, void *recvbuf, int r
        calculated for that process */
     if (rank < 2*rem) {
         if (rank % 2) { /* odd */
-            mpi_errno = MPID_Sched_send(((char *)tmp_results + disps[rank-1]*extent),
+            mpi_errno = MPIR_Sched_send(((char *)tmp_results + disps[rank-1]*extent),
                                         recvcount, datatype, rank-1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
         else  {   /* even */
-            mpi_errno = MPID_Sched_recv(recvbuf, recvcount, datatype, rank+1, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(recvbuf, recvcount, datatype, rank+1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
     }
 
@@ -256,7 +256,7 @@ fn_fail:
 #define FUNCNAME MPIR_Ireduce_scatter_block_rec_pairwise
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ireduce_scatter_block_pairwise(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ireduce_scatter_block_pairwise(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int   rank, comm_size, i;
@@ -275,7 +275,7 @@ int MPIR_Ireduce_scatter_block_pairwise(const void *sendbuf, void *recvbuf, int
     MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
 
     is_commutative = MPIR_Op_is_commutative(op);
-    MPIU_Assert(is_commutative);
+    MPIR_Assert(is_commutative);
 
     MPIR_SCHED_CHKPMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps");
 
@@ -290,15 +290,15 @@ int MPIR_Ireduce_scatter_block_pairwise(const void *sendbuf, void *recvbuf, int
     }
     /* total_count*extent eventually gets malloced. it isn't added to
      * a user-passed in buffer */
-    MPIU_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));
+    MPIR_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));
 
     if (sendbuf != MPI_IN_PLACE) {
         /* copy local data into recvbuf */
-        mpi_errno = MPID_Sched_copy(((char *)sendbuf+disps[rank]*extent),
+        mpi_errno = MPIR_Sched_copy(((char *)sendbuf+disps[rank]*extent),
                                     recvcount, datatype,
                                     recvbuf, recvcount, datatype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     /* allocate temporary buffer to store incoming data */
@@ -313,28 +313,28 @@ int MPIR_Ireduce_scatter_block_pairwise(const void *sendbuf, void *recvbuf, int
         /* send the data that dst needs. recv data that this process
            needs from src into tmp_recvbuf */
         if (sendbuf != MPI_IN_PLACE) {
-            mpi_errno = MPID_Sched_send(((char *)sendbuf+disps[dst]*extent),
+            mpi_errno = MPIR_Sched_send(((char *)sendbuf+disps[dst]*extent),
                                         recvcount, datatype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
         else {
-            mpi_errno = MPID_Sched_send(((char *)recvbuf+disps[dst]*extent),
+            mpi_errno = MPIR_Sched_send(((char *)recvbuf+disps[dst]*extent),
                                         recvcount, datatype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
-        mpi_errno = MPID_Sched_recv(tmp_recvbuf, recvcount, datatype, src, comm_ptr, s);
+        mpi_errno = MPIR_Sched_recv(tmp_recvbuf, recvcount, datatype, src, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         /* FIXME does this algorithm actually work correctly for noncommutative ops?
          * If so, relax restriction in assert and comments... */
         if (is_commutative || (src < rank)) {
             if (sendbuf != MPI_IN_PLACE) {
-                mpi_errno = MPID_Sched_reduce(tmp_recvbuf, recvbuf, recvcount, datatype, op, s);
+                mpi_errno = MPIR_Sched_reduce(tmp_recvbuf, recvbuf, recvcount, datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             }
             else {
-                mpi_errno = MPID_Sched_reduce(tmp_recvbuf, ((char *)recvbuf+disps[rank]*extent),
+                mpi_errno = MPIR_Sched_reduce(tmp_recvbuf, ((char *)recvbuf+disps[rank]*extent),
                                               recvcount, datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                 /* We can't store the result at the beginning of
@@ -342,41 +342,41 @@ int MPIR_Ireduce_scatter_block_pairwise(const void *sendbuf, void *recvbuf, int
                    other process/processes need.  At the end we will copy back
                    the result to the beginning of recvbuf. */
             }
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
         else {
             if (sendbuf != MPI_IN_PLACE) {
-                mpi_errno = MPID_Sched_reduce(recvbuf, tmp_recvbuf, recvcount, datatype, op, s);
+                mpi_errno = MPIR_Sched_reduce(recvbuf, tmp_recvbuf, recvcount, datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
                 /* copy result back into recvbuf */
-                mpi_errno = MPID_Sched_copy(tmp_recvbuf, recvcount, datatype,
+                mpi_errno = MPIR_Sched_copy(tmp_recvbuf, recvcount, datatype,
                                             recvbuf, recvcount, datatype, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             }
             else {
-                mpi_errno = MPID_Sched_reduce(((char *)recvbuf+disps[rank]*extent),
+                mpi_errno = MPIR_Sched_reduce(((char *)recvbuf+disps[rank]*extent),
                                               tmp_recvbuf, recvcount, datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
                 /* copy result back into recvbuf */
-                mpi_errno = MPID_Sched_copy(tmp_recvbuf, recvcount, datatype,
+                mpi_errno = MPIR_Sched_copy(tmp_recvbuf, recvcount, datatype,
                                             ((char *)recvbuf + disps[rank]*extent),
                                             recvcount, datatype, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             }
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
     }
 
     /* if MPI_IN_PLACE, move output data to the beginning of
        recvbuf. already done for rank 0. */
     if ((sendbuf == MPI_IN_PLACE) && (rank != 0)) {
-        mpi_errno = MPID_Sched_copy(((char *)recvbuf + disps[rank]*extent),
+        mpi_errno = MPIR_Sched_copy(((char *)recvbuf + disps[rank]*extent),
                                     recvcount, datatype,
                                     recvbuf, recvcount, datatype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     MPIR_SCHED_CHKPMEM_COMMIT(s);
@@ -393,7 +393,7 @@ fn_fail:
 #define FUNCNAME MPIR_Ireduce_scatter_block_rec_dbl
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ireduce_scatter_block_rec_dbl(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ireduce_scatter_block_rec_dbl(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int rank, comm_size, i;
@@ -430,7 +430,7 @@ int MPIR_Ireduce_scatter_block_rec_dbl(const void *sendbuf, void *recvbuf, int r
 
     /* total_count*extent eventually gets malloced. it isn't added to
      * a user-passed in buffer */
-    MPIU_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));
+    MPIR_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));
 
 
     /* need to allocate temporary buffer to receive incoming data*/
@@ -446,14 +446,14 @@ int MPIR_Ireduce_scatter_block_rec_dbl(const void *sendbuf, void *recvbuf, int r
 
     /* copy sendbuf into tmp_results */
     if (sendbuf != MPI_IN_PLACE)
-        mpi_errno = MPID_Sched_copy(sendbuf, total_count, datatype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, total_count, datatype,
                                     tmp_results, total_count, datatype, s);
     else
-        mpi_errno = MPID_Sched_copy(recvbuf, total_count, datatype,
+        mpi_errno = MPIR_Sched_copy(recvbuf, total_count, datatype,
                                     tmp_results, total_count, datatype, s);
 
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
     mask = 0x1;
     i = 0;
@@ -510,11 +510,11 @@ int MPIR_Ireduce_scatter_block_rec_dbl(const void *sendbuf, void *recvbuf, int r
             /* tmp_results contains data to be sent in each step. Data is
                received in tmp_recvbuf and then accumulated into
                tmp_results. accumulation is done later below.   */
-            mpi_errno = MPID_Sched_send(tmp_results, 1, sendtype, dst, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(tmp_results, 1, sendtype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            mpi_errno = MPID_Sched_recv(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
             received = 1;
         }
 
@@ -555,9 +555,9 @@ int MPIR_Ireduce_scatter_block_rec_dbl(const void *sendbuf, void *recvbuf, int r
                     && (dst >= tree_root + nprocs_completed))
                 {
                     /* send the current result */
-                    mpi_errno = MPID_Sched_send(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
+                    mpi_errno = MPIR_Sched_send(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
                 /* recv only if this proc. doesn't have data and sender
                    has data */
@@ -565,9 +565,9 @@ int MPIR_Ireduce_scatter_block_rec_dbl(const void *sendbuf, void *recvbuf, int r
                          (dst < tree_root + nprocs_completed) &&
                          (rank >= tree_root + nprocs_completed))
                 {
-                    mpi_errno = MPID_Sched_recv(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
+                    mpi_errno = MPIR_Sched_recv(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                     received = 1;
                 }
                 tmp_mask >>= 1;
@@ -589,28 +589,28 @@ int MPIR_Ireduce_scatter_block_rec_dbl(const void *sendbuf, void *recvbuf, int r
            we do the reduce here. */
         if (received) {
             if (is_commutative || (dst_tree_root < my_tree_root)) {
-                mpi_errno = MPID_Sched_reduce(tmp_recvbuf, tmp_results, blklens[0], datatype, op, s);
+                mpi_errno = MPIR_Sched_reduce(tmp_recvbuf, tmp_results, blklens[0], datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                mpi_errno = MPID_Sched_reduce(((char *)tmp_recvbuf + dis[1]*extent),
+                mpi_errno = MPIR_Sched_reduce(((char *)tmp_recvbuf + dis[1]*extent),
                                               ((char *)tmp_results + dis[1]*extent),
                                               blklens[1], datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
             else {
-                mpi_errno = MPID_Sched_reduce(tmp_results, tmp_recvbuf, blklens[0], datatype, op, s);
+                mpi_errno = MPIR_Sched_reduce(tmp_results, tmp_recvbuf, blklens[0], datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                mpi_errno = MPID_Sched_reduce(((char *)tmp_results + dis[1]*extent),
+                mpi_errno = MPIR_Sched_reduce(((char *)tmp_results + dis[1]*extent),
                                               ((char *)tmp_recvbuf + dis[1]*extent),
                                               blklens[1], datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
 
                 /* copy result back into tmp_results */
-                mpi_errno = MPID_Sched_copy(tmp_recvbuf, 1, recvtype,
+                mpi_errno = MPIR_Sched_copy(tmp_recvbuf, 1, recvtype,
                                             tmp_results, 1, recvtype, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
         }
 
@@ -622,11 +622,11 @@ int MPIR_Ireduce_scatter_block_rec_dbl(const void *sendbuf, void *recvbuf, int r
     }
 
     /* now copy final results from tmp_results to recvbuf */
-    mpi_errno = MPID_Sched_copy(((char *)tmp_results+disps[rank]*extent),
+    mpi_errno = MPIR_Sched_copy(((char *)tmp_results+disps[rank]*extent),
                                 recvcount, datatype,
                                 recvbuf, recvcount, datatype, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
     MPIR_SCHED_CHKPMEM_COMMIT(s);
 fn_exit:
@@ -644,7 +644,7 @@ fn_fail:
 #define FUNCNAME MPIR_Reduce_scatter_block_noncomm
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ireduce_scatter_block_noncomm(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ireduce_scatter_block_noncomm(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int comm_size = comm_ptr->local_size;
@@ -671,7 +671,7 @@ int MPIR_Ireduce_scatter_block_noncomm(const void *sendbuf, void *recvbuf, int r
     }
 
     /* begin error checking */
-    MPIU_Assert(pof2 == comm_size); /* FIXME this version only works for power of 2 procs */
+    MPIR_Assert(pof2 == comm_size); /* FIXME this version only works for power of 2 procs */
     /* end error checking */
 
     /* size of a block (count of datatype per block, NOT bytes per block) */
@@ -687,13 +687,13 @@ int MPIR_Ireduce_scatter_block_noncomm(const void *sendbuf, void *recvbuf, int r
     /* Copy our send data to tmp_buf0.  We do this one block at a time and
        permute the blocks as we go according to the mirror permutation. */
     for (i = 0; i < comm_size; ++i) {
-        mpi_errno = MPID_Sched_copy(((char *)(sendbuf == MPI_IN_PLACE ? (const void *)recvbuf : sendbuf) + (i * true_extent * block_size)),
+        mpi_errno = MPIR_Sched_copy(((char *)(sendbuf == MPI_IN_PLACE ? (const void *)recvbuf : sendbuf) + (i * true_extent * block_size)),
                                     block_size, datatype,
                                     ((char *)tmp_buf0 + (MPIU_Mirror_permutation(i, log2_comm_size) * true_extent * block_size)),
                                      block_size, datatype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
     buf0_was_inout = 1;
 
     send_offset = 0;
@@ -715,43 +715,43 @@ int MPIR_Ireduce_scatter_block_noncomm(const void *sendbuf, void *recvbuf, int r
             send_offset += size;
         }
 
-        mpi_errno = MPID_Sched_send((outgoing_data + send_offset*true_extent),
+        mpi_errno = MPIR_Sched_send((outgoing_data + send_offset*true_extent),
                                     size, datatype, peer, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        mpi_errno = MPID_Sched_recv((incoming_data + recv_offset*true_extent),
+        mpi_errno = MPIR_Sched_recv((incoming_data + recv_offset*true_extent),
                                     size, datatype, peer, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         /* always perform the reduction at recv_offset, the data at send_offset
            is now our peer's responsibility */
         if (rank > peer) {
             /* higher ranked value so need to call op(received_data, my_data) */
-            mpi_errno = MPID_Sched_reduce((incoming_data + recv_offset*true_extent),
+            mpi_errno = MPIR_Sched_reduce((incoming_data + recv_offset*true_extent),
                                           (outgoing_data + recv_offset*true_extent),
                                           size, datatype, op, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
         else {
             /* lower ranked value so need to call op(my_data, received_data) */
-            mpi_errno = MPID_Sched_reduce((outgoing_data + recv_offset*true_extent),
+            mpi_errno = MPIR_Sched_reduce((outgoing_data + recv_offset*true_extent),
                                           (incoming_data + recv_offset*true_extent),
                                           size, datatype, op, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             buf0_was_inout = !buf0_was_inout;
         }
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         /* the next round of send/recv needs to happen within the block (of size
            "size") that we just received and reduced */
         send_offset = recv_offset;
     }
 
-    MPIU_Assert(size == recvcount);
+    MPIR_Assert(size == recvcount);
 
     /* copy the reduced data to the recvbuf */
     result_ptr = (char *)(buf0_was_inout ? tmp_buf0 : tmp_buf1) + recv_offset * true_extent;
-    mpi_errno = MPID_Sched_copy(result_ptr, size, datatype,
+    mpi_errno = MPIR_Sched_copy(result_ptr, size, datatype,
                                 recvbuf, size, datatype, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
@@ -767,7 +767,7 @@ fn_fail:
 #define FUNCNAME MPIR_Ireduce_scatter_block_intra
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ireduce_scatter_block_intra(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ireduce_scatter_block_intra(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int is_commutative;
@@ -817,7 +817,7 @@ fn_fail:
 #define FUNCNAME MPIR_Ireduce_scatter_block_inter
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ireduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ireduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
 /* Intercommunicator Ireduce_scatter_block.
    We first do an intercommunicator reduce to rank 0 on left group,
@@ -850,7 +850,7 @@ int MPIR_Ireduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int rec
 
     /* first do a reduce from right group to rank 0 in left group,
        then from left group to rank 0 in right group*/
-    MPIU_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ireduce_sched);
+    MPIR_Assert(comm_ptr->coll_fns && comm_ptr->coll_fns->Ireduce_sched);
     if (comm_ptr->is_low_group) {
         /* reduce from right group to rank 0*/
         root = (rank == 0) ? MPI_ROOT : MPI_PROC_NULL;
@@ -883,17 +883,17 @@ int MPIR_Ireduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int rec
                                                 datatype, op, root, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
     /* Get the local intracommunicator */
     if (!comm_ptr->local_comm) {
-        mpi_errno = MPIR_Setup_intercomm_localcomm(comm_ptr);
+        mpi_errno = MPII_Setup_intercomm_localcomm(comm_ptr);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
     newcomm_ptr = comm_ptr->local_comm;
 
-    MPIU_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Iscatter_sched);
+    MPIR_Assert(newcomm_ptr->coll_fns && newcomm_ptr->coll_fns->Iscatter_sched);
     mpi_errno = newcomm_ptr->coll_fns->Iscatter_sched(tmp_buf, recvcount, datatype,
                                                 recvbuf, recvcount, datatype, 0,
                                                 newcomm_ptr, s);
@@ -916,21 +916,21 @@ int MPIR_Ireduce_scatter_block_impl(const void *sendbuf, void *recvbuf, int recv
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ireduce_scatter_block_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Ireduce_scatter_block_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Ireduce_scatter_block_sched(sendbuf, recvbuf, recvcount, datatype, op, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -975,10 +975,10 @@ int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf,
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IREDUCE_SCATTER_BLOCK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IREDUCE_SCATTER_BLOCK);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IREDUCE_SCATTER_BLOCK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IREDUCE_SCATTER_BLOCK);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -1041,7 +1041,7 @@ int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf,
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IREDUCE_SCATTER_BLOCK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IREDUCE_SCATTER_BLOCK);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/ireduce.c b/src/mpi/coll/ireduce.c
index 2308189..42f2629 100644
--- a/src/mpi/coll/ireduce.c
+++ b/src/mpi/coll/ireduce.c
@@ -33,7 +33,7 @@ int MPI_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype data
 #define FUNCNAME MPIR_Ireduce_binomial
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ireduce_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ireduce_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int comm_size, rank, is_commutative;
@@ -42,7 +42,7 @@ int MPIR_Ireduce_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Dat
     void *tmp_buf;
     MPIR_SCHED_CHKPMEM_DECL(2);
 
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
 
     if (count == 0) return MPI_SUCCESS;
 
@@ -56,7 +56,7 @@ int MPIR_Ireduce_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Dat
 
         MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                      MPIR_Per_thread, per_thread, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
         per_thread->op_errno = 0;
     }
 
@@ -70,7 +70,7 @@ int MPIR_Ireduce_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Dat
     /* I think this is the worse case, so we can avoid an assert()
      * inside the for loop */
     /* should be buf+{this}? */
-    MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+    MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
 
     MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)),
                         mpi_errno, "temporary buffer");
@@ -89,9 +89,9 @@ int MPIR_Ireduce_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Dat
     if ((rank != root) || (sendbuf != MPI_IN_PLACE)) {
         /* could do this up front as an MPIR_Localcopy instead, but we'll defer
          * it to the progress engine */
-        mpi_errno = MPID_Sched_copy(sendbuf, count, datatype, recvbuf, count, datatype, s);
+        mpi_errno = MPIR_Sched_copy(sendbuf, count, datatype, recvbuf, count, datatype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        mpi_errno = MPID_Sched_barrier(s);
+        mpi_errno = MPIR_Sched_barrier(s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
@@ -139,26 +139,26 @@ int MPIR_Ireduce_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Dat
             source = (relrank | mask);
             if (source < comm_size) {
                 source = (source + lroot) % comm_size;
-                mpi_errno = MPID_Sched_recv(tmp_buf, count, datatype, source, comm_ptr, s);
+                mpi_errno = MPIR_Sched_recv(tmp_buf, count, datatype, source, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                mpi_errno = MPID_Sched_barrier(s);
+                mpi_errno = MPIR_Sched_barrier(s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
                 /* The sender is above us, so the received buffer must be
                    the second argument (in the noncommutative case). */
                 if (is_commutative) {
-                    mpi_errno = MPID_Sched_reduce(tmp_buf, recvbuf, count, datatype, op, s);
+                    mpi_errno = MPIR_Sched_reduce(tmp_buf, recvbuf, count, datatype, op, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                 }
                 else {
-                    mpi_errno = MPID_Sched_reduce(recvbuf, tmp_buf, count, datatype, op, s);
+                    mpi_errno = MPIR_Sched_reduce(recvbuf, tmp_buf, count, datatype, op, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    mpi_errno = MPID_Sched_barrier(s);
+                    mpi_errno = MPIR_Sched_barrier(s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    mpi_errno = MPID_Sched_copy(tmp_buf, count, datatype, recvbuf, count, datatype, s);
+                    mpi_errno = MPIR_Sched_copy(tmp_buf, count, datatype, recvbuf, count, datatype, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                 }
-                mpi_errno = MPID_Sched_barrier(s);
+                mpi_errno = MPIR_Sched_barrier(s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             }
         }
@@ -166,9 +166,9 @@ int MPIR_Ireduce_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Dat
             /* I've received all that I'm going to.  Send my result to
                my parent */
             source = ((relrank & (~ mask)) + lroot) % comm_size;
-            mpi_errno = MPID_Sched_send(recvbuf, count, datatype, source, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(recvbuf, count, datatype, source, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            mpi_errno = MPID_Sched_barrier(s);
+            mpi_errno = MPIR_Sched_barrier(s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
             break;
@@ -180,16 +180,16 @@ int MPIR_Ireduce_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Dat
     {
         if (rank == 0)
         {
-            mpi_errno = MPID_Sched_send(recvbuf, count, datatype, root, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(recvbuf, count, datatype, root, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            mpi_errno = MPID_Sched_barrier(s);
+            mpi_errno = MPIR_Sched_barrier(s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
         else if (rank == root)
         {
-            mpi_errno = MPID_Sched_recv(recvbuf, count, datatype, 0, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(recvbuf, count, datatype, 0, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            mpi_errno = MPID_Sched_barrier(s);
+            mpi_errno = MPIR_Sched_barrier(s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
     }
@@ -233,7 +233,7 @@ fn_fail:
 #define FUNCNAME MPIR_Ireduce_redscat_gather
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int i, j, comm_size, rank, pof2, is_commutative ATTRIBUTE((unused));
@@ -243,14 +243,14 @@ int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, M
     int *cnts, *disps;
     MPI_Aint true_lb, true_extent, extent;
     MPIR_SCHED_CHKPMEM_DECL(2);
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
 
     /* NOTE: this algorithm is currently only correct for commutative operations */
     is_commutative = MPIR_Op_is_commutative(op);
-    MPIU_Assert(is_commutative);
+    MPIR_Assert(is_commutative);
 
     /* Create a temporary buffer */
     MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
@@ -259,7 +259,7 @@ int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, M
     /* I think this is the worse case, so we can avoid an assert()
      * inside the for loop */
     /* should be buf+{this}? */
-    MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+    MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
 
     MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)),
                               mpi_errno, "temporary buffer");
@@ -282,7 +282,7 @@ int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, M
     }
 
     if ((rank != root) || (sendbuf != MPI_IN_PLACE)) {
-        mpi_errno = MPID_Sched_copy(sendbuf, count, datatype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, count, datatype,
                                     recvbuf, count, datatype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
@@ -305,9 +305,9 @@ int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, M
 
     if (rank < 2*rem) {
         if (rank % 2 != 0) { /* odd */
-            mpi_errno = MPID_Sched_send(recvbuf, count, datatype, rank-1, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(recvbuf, count, datatype, rank-1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* temporarily set the rank to -1 so that this
                process does not pariticipate in recursive
@@ -315,16 +315,16 @@ int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, M
             newrank = -1;
         }
         else { /* even */
-            mpi_errno = MPID_Sched_recv(tmp_buf, count, datatype, rank+1, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(tmp_buf, count, datatype, rank+1, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* do the reduction on received data. */
             /* This algorithm is used only for predefined ops
                and predefined ops are always commutative. */
-            mpi_errno = MPID_Sched_reduce(tmp_buf, recvbuf, count, datatype, op, s);
+            mpi_errno = MPIR_Sched_reduce(tmp_buf, recvbuf, count, datatype, op, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* change the rank */
             newrank = rank / 2;
@@ -341,8 +341,8 @@ int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, M
     /* We allocate these arrays on all processes, even if newrank=-1,
        because if root is one of the excluded processes, we will
        need them on the root later on below. */
-    MPIU_CHKLMEM_MALLOC(cnts, int *, pof2*sizeof(int), mpi_errno, "counts");
-    MPIU_CHKLMEM_MALLOC(disps, int *, pof2*sizeof(int), mpi_errno, "displacements");
+    MPIR_CHKLMEM_MALLOC(cnts, int *, pof2*sizeof(int), mpi_errno, "counts");
+    MPIR_CHKLMEM_MALLOC(disps, int *, pof2*sizeof(int), mpi_errno, "displacements");
 
     last_idx = send_idx = 0; /* suppress spurious compiler warnings */
 
@@ -380,26 +380,26 @@ int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, M
             }
 
             /* Send data from recvbuf. Recv into tmp_buf */
-            mpi_errno = MPID_Sched_send(((char *)recvbuf + disps[send_idx]*extent),
+            mpi_errno = MPIR_Sched_send(((char *)recvbuf + disps[send_idx]*extent),
                                         send_cnt, datatype,
                                         dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             /* sendrecv, no barrier here */
-            mpi_errno = MPID_Sched_recv(((char *)tmp_buf + disps[recv_idx]*extent),
+            mpi_errno = MPIR_Sched_recv(((char *)tmp_buf + disps[recv_idx]*extent),
                                         recv_cnt, datatype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* tmp_buf contains data received in this step.
                recvbuf contains data accumulated so far */
 
             /* This algorithm is used only for predefined ops
                and predefined ops are always commutative. */
-            mpi_errno = MPID_Sched_reduce(((char *)tmp_buf + disps[recv_idx]*extent),
+            mpi_errno = MPIR_Sched_reduce(((char *)tmp_buf + disps[recv_idx]*extent),
                                           ((char *)recvbuf + disps[recv_idx]*extent),
                                           recv_cnt, datatype, op, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             /* update send_idx for next iteration */
             send_idx = recv_idx;
@@ -431,18 +431,18 @@ int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, M
                 for (i=1; i<pof2; i++)
                     disps[i] = disps[i-1] + cnts[i-1];
 
-                mpi_errno = MPID_Sched_recv(recvbuf, cnts[0], datatype, 0, comm_ptr, s);
+                mpi_errno = MPIR_Sched_recv(recvbuf, cnts[0], datatype, 0, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
 
                 newrank = 0;
                 send_idx = 0;
                 last_idx = 2;
             }
             else if (newrank == 0) {  /* send */
-                mpi_errno = MPID_Sched_send(recvbuf, cnts[0], datatype, root, comm_ptr, s);
+                mpi_errno = MPIR_Sched_send(recvbuf, cnts[0], datatype, root, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
                 newrank = -1;
             }
             newroot = 0;
@@ -505,18 +505,18 @@ int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, M
             if (newdst_tree_root == newroot_tree_root) {
                 /* send and exit */
                 /* Send data from recvbuf. Recv into tmp_buf */
-                mpi_errno = MPID_Sched_send(((char *)recvbuf + disps[send_idx]*extent),
+                mpi_errno = MPIR_Sched_send(((char *)recvbuf + disps[send_idx]*extent),
                                             send_cnt, datatype, dst, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
                 break;
             }
             else {
                 /* recv and continue */
-                mpi_errno = MPID_Sched_recv(((char *)recvbuf + disps[recv_idx]*extent),
+                mpi_errno = MPIR_Sched_recv(((char *)recvbuf + disps[recv_idx]*extent),
                                             recv_cnt, datatype, dst, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
 
             if (newrank > newdst) send_idx = recv_idx;
@@ -528,7 +528,7 @@ int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, M
 
     MPIR_SCHED_CHKPMEM_COMMIT(s);
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
 fn_fail:
     MPIR_SCHED_CHKPMEM_REAP(s);
@@ -539,12 +539,12 @@ fn_fail:
 #define FUNCNAME MPIR_Ireduce_intra
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ireduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ireduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int pof2, type_size, comm_size;
 
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
 
     comm_size = comm_ptr->local_size;
 
@@ -579,7 +579,7 @@ fn_fail:
 #define FUNCNAME MPIR_Ireduce_SMP
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int is_commutative;
@@ -591,8 +591,8 @@ int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
 
     if (!MPIR_CVAR_ENABLE_SMP_COLLECTIVES || !MPIR_CVAR_ENABLE_SMP_REDUCE)
         MPID_Abort(comm_ptr, MPI_ERR_OTHER, 1, "SMP collectives are disabled!");
-    MPIU_Assert(MPIR_Comm_is_node_aware(comm_ptr));
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
+    MPIR_Assert(MPIR_Comm_is_node_aware(comm_ptr));
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
 
     nc = comm_ptr->node_comm;
     nrc = comm_ptr->node_roots_comm;
@@ -611,7 +611,7 @@ int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
         MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
         MPID_Datatype_get_extent_macro(datatype, extent);
 
-        MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+        MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
 
         MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)),
                                   mpi_errno, "temporary buffer");
@@ -620,25 +620,25 @@ int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
     }
 
     /* do the intranode reduce on all nodes other than the root's node */
-    if (nc != NULL && MPIU_Get_intranode_rank(comm_ptr, root) == -1) {
-        MPIU_Assert(nc->coll_fns && nc->coll_fns->Ireduce_sched);
+    if (nc != NULL && MPIR_Get_intranode_rank(comm_ptr, root) == -1) {
+        MPIR_Assert(nc->coll_fns && nc->coll_fns->Ireduce_sched);
         mpi_errno = nc->coll_fns->Ireduce_sched(sendbuf, tmp_buf, count, datatype, op, 0, nc, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     /* do the internode reduce to the root's node */
     if (nrc != NULL) {
-        MPIU_Assert(nrc->coll_fns && nrc->coll_fns->Ireduce_sched);
-        if (nrc->rank != MPIU_Get_internode_rank(comm_ptr, root)) {
+        MPIR_Assert(nrc->coll_fns && nrc->coll_fns->Ireduce_sched);
+        if (nrc->rank != MPIR_Get_internode_rank(comm_ptr, root)) {
             /* I am not on root's node.  Use tmp_buf if we
                participated in the first reduce, otherwise use sendbuf */
             const void *buf = (nc == NULL ? sendbuf : tmp_buf);
             mpi_errno = nrc->coll_fns->Ireduce_sched(buf, NULL, count, datatype,
-                                               op, MPIU_Get_internode_rank(comm_ptr, root),
+                                               op, MPIR_Get_internode_rank(comm_ptr, root),
                                                nrc, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
         else { /* I am on root's node. I have not participated in the earlier reduce. */
             if (comm_ptr->rank != root) {
@@ -646,10 +646,10 @@ int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
                    Use tmp_buf as recvbuf. */
 
                 mpi_errno = nrc->coll_fns->Ireduce_sched(sendbuf, tmp_buf, count, datatype,
-                                                   op, MPIU_Get_internode_rank(comm_ptr, root),
+                                                   op, MPIR_Get_internode_rank(comm_ptr, root),
                                                    nrc, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
 
                 /* point sendbuf at tmp_buf to make final intranode reduce easy */
                 sendbuf = tmp_buf;
@@ -658,10 +658,10 @@ int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
                 /* I am the root. in_place is automatically handled. */
 
                 mpi_errno = nrc->coll_fns->Ireduce_sched(sendbuf, recvbuf, count, datatype,
-                                                   op, MPIU_Get_internode_rank(comm_ptr, root),
+                                                   op, MPIR_Get_internode_rank(comm_ptr, root),
                                                    nrc, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
 
                 /* set sendbuf to MPI_IN_PLACE to make final intranode reduce easy. */
                 sendbuf = MPI_IN_PLACE;
@@ -670,12 +670,12 @@ int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
     }
 
     /* do the intranode reduce on the root's node */
-    if (nc != NULL && MPIU_Get_intranode_rank(comm_ptr, root) != -1) {
+    if (nc != NULL && MPIR_Get_intranode_rank(comm_ptr, root) != -1) {
         mpi_errno = nc->coll_fns->Ireduce_sched(sendbuf, recvbuf, count, datatype,
-                                          op, MPIU_Get_intranode_rank(comm_ptr, root),
+                                          op, MPIR_Get_intranode_rank(comm_ptr, root),
                                           nc, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
 
@@ -691,7 +691,7 @@ fn_fail:
 #define FUNCNAME MPIR_Ireduce_inter
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ireduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ireduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int rank;
@@ -699,7 +699,7 @@ int MPIR_Ireduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Dataty
     void *tmp_buf = NULL;
     MPIR_SCHED_CHKPMEM_DECL(1);
 
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM);
 
 /*  Intercommunicator reduce.
     Remote group does a local intracommunicator
@@ -713,9 +713,9 @@ int MPIR_Ireduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Dataty
 
     if (root == MPI_ROOT) {
         /* root receives data from rank 0 on remote group */
-        mpi_errno = MPID_Sched_recv(recvbuf, count, datatype, 0, comm_ptr, s);
+        mpi_errno = MPIR_Sched_recv(recvbuf, count, datatype, 0, comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        mpi_errno = MPID_Sched_barrier(s);
+        mpi_errno = MPIR_Sched_barrier(s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
     else {
@@ -731,26 +731,26 @@ int MPIR_Ireduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Dataty
             /* I think this is the worse case, so we can avoid an assert()
              * inside the for loop */
             /* Should MPIR_SCHED_CHKPMEM_MALLOC do this? */
-            MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+            MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
             MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "temporary buffer");
             /* adjust for potential negative lower bound in datatype */
             tmp_buf = (void *)((char*)tmp_buf - true_lb);
         }
 
         if (!comm_ptr->local_comm) {
-            mpi_errno = MPIR_Setup_intercomm_localcomm(comm_ptr);
+            mpi_errno = MPII_Setup_intercomm_localcomm(comm_ptr);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
 
         mpi_errno = MPIR_Ireduce_intra(sendbuf, tmp_buf, count, datatype, op, 0, comm_ptr->local_comm, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        mpi_errno = MPID_Sched_barrier(s);
+        mpi_errno = MPIR_Sched_barrier(s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
         if (rank == 0) {
-            mpi_errno = MPID_Sched_send(tmp_buf, count, datatype, root, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(tmp_buf, count, datatype, root, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            mpi_errno = MPID_Sched_barrier(s);
+            mpi_errno = MPIR_Sched_barrier(s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
     }
@@ -772,21 +772,21 @@ int MPIR_Ireduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatyp
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ireduce_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Ireduce_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Ireduce_sched(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -830,10 +830,10 @@ int MPI_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype data
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IREDUCE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IREDUCE);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IREDUCE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IREDUCE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -913,7 +913,7 @@ int MPI_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype data
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IREDUCE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IREDUCE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/iscan.c b/src/mpi/coll/iscan.c
index 8727274..e651e4d 100644
--- a/src/mpi/coll/iscan.c
+++ b/src/mpi/coll/iscan.c
@@ -69,7 +69,7 @@ int MPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dataty
 #define FUNCNAME MPIR_Iscan_rec_dbl
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Iscan_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Iscan_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     MPI_Aint true_extent, true_lb, extent;
@@ -95,7 +95,7 @@ int MPIR_Iscan_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Dataty
 
     /* This eventually gets malloc()ed as a temp buffer, not added to
      * any user buffers */
-    MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+    MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
 
     /* adjust for potential negative lower bound in datatype */
     partial_scan = (void *)((char*)partial_scan - true_lb);
@@ -109,16 +109,16 @@ int MPIR_Iscan_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Dataty
     /* Since this is an inclusive scan, copy local contribution into
        recvbuf. */
     if (sendbuf != MPI_IN_PLACE) {
-        mpi_errno = MPID_Sched_copy(sendbuf, count, datatype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, count, datatype,
                                     recvbuf, count, datatype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
     if (sendbuf != MPI_IN_PLACE)
-        mpi_errno = MPID_Sched_copy(sendbuf, count, datatype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, count, datatype,
                                     partial_scan, count, datatype, s);
     else
-        mpi_errno = MPID_Sched_copy(recvbuf, count, datatype,
+        mpi_errno = MPIR_Sched_copy(recvbuf, count, datatype,
                                     partial_scan, count, datatype, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
@@ -127,35 +127,35 @@ int MPIR_Iscan_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Dataty
         dst = rank ^ mask;
         if (dst < comm_size) {
             /* Send partial_scan to dst. Recv into tmp_buf */
-            mpi_errno = MPID_Sched_send(partial_scan, count, datatype, dst, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(partial_scan, count, datatype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             /* sendrecv, no barrier here */
-            mpi_errno = MPID_Sched_recv(tmp_buf, count, datatype, dst, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(tmp_buf, count, datatype, dst, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
 
             if (rank > dst) {
-                mpi_errno = MPID_Sched_reduce(tmp_buf, partial_scan, count, datatype, op, s);
+                mpi_errno = MPIR_Sched_reduce(tmp_buf, partial_scan, count, datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                mpi_errno = MPID_Sched_reduce(tmp_buf, recvbuf, count, datatype, op, s);
+                mpi_errno = MPIR_Sched_reduce(tmp_buf, recvbuf, count, datatype, op, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
             else {
                 if (is_commutative) {
-                    mpi_errno = MPID_Sched_reduce(tmp_buf, partial_scan, count, datatype, op, s);
+                    mpi_errno = MPIR_Sched_reduce(tmp_buf, partial_scan, count, datatype, op, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
                 else {
-                    mpi_errno = MPID_Sched_reduce(partial_scan, tmp_buf, count, datatype, op, s);
+                    mpi_errno = MPIR_Sched_reduce(partial_scan, tmp_buf, count, datatype, op, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
 
-                    mpi_errno = MPID_Sched_copy(tmp_buf, count, datatype,
+                    mpi_errno = MPIR_Sched_copy(tmp_buf, count, datatype,
                                                 partial_scan, count, datatype, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
             }
         }
@@ -174,7 +174,7 @@ fn_fail:
 #define FUNCNAME MPIR_Iscan_rec_dbl
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int rank = comm_ptr->rank;
@@ -191,7 +191,7 @@ int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype d
        communicator in which all the nodes contain processes with
        consecutive ranks. */
 
-    if (!MPIR_Comm_is_node_consecutive(comm_ptr)) {
+    if (!MPII_Comm_is_node_consecutive(comm_ptr)) {
         /* We can't use the SMP-aware algorithm, use the generic one */
         return MPIR_Iscan_rec_dbl(sendbuf, recvbuf, count, datatype, op, comm_ptr, s);
     }
@@ -199,16 +199,16 @@ int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype d
     node_comm = comm_ptr->node_comm;
     roots_comm = comm_ptr->node_roots_comm;
     if (node_comm) {
-        MPIU_Assert(node_comm->coll_fns && node_comm->coll_fns->Iscan_sched && node_comm->coll_fns->Ibcast_sched);
+        MPIR_Assert(node_comm->coll_fns && node_comm->coll_fns->Iscan_sched && node_comm->coll_fns->Ibcast_sched);
     }
     if (roots_comm) {
-        MPIU_Assert(roots_comm->coll_fns && roots_comm->coll_fns->Iscan_sched);
+        MPIR_Assert(roots_comm->coll_fns && roots_comm->coll_fns->Iscan_sched);
     }
 
     MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
     MPID_Datatype_get_extent_macro(datatype, extent);
 
-    MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+    MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
 
     MPIR_SCHED_CHKPMEM_MALLOC(tempbuf, void *, count*(MPL_MAX(extent, true_extent)),
                         mpi_errno, "temporary buffer");
@@ -232,13 +232,13 @@ int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype d
     if (node_comm != NULL) {
         mpi_errno = node_comm->coll_fns->Iscan_sched(sendbuf, recvbuf, count, datatype, op, node_comm, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
     else if (sendbuf != MPI_IN_PLACE) {
-        mpi_errno = MPID_Sched_copy(sendbuf, count, datatype,
+        mpi_errno = MPIR_Sched_copy(sendbuf, count, datatype,
                                     recvbuf, count, datatype, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
 
     /* get result from local node's last processor which
@@ -246,15 +246,15 @@ int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype d
        localfulldata. For example, localfulldata from node 1 contains
        reduced data of rank 1,2,3. */
     if (roots_comm != NULL && node_comm != NULL) {
-        mpi_errno = MPID_Sched_recv(localfulldata, count, datatype,
+        mpi_errno = MPIR_Sched_recv(localfulldata, count, datatype,
                                     (node_comm->local_size - 1), node_comm, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
     else if (roots_comm == NULL && node_comm != NULL && node_comm->rank == node_comm->local_size - 1) {
-        mpi_errno = MPID_Sched_send(recvbuf, count, datatype, 0, node_comm, s);
+        mpi_errno = MPIR_Sched_send(recvbuf, count, datatype, 0, node_comm, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
     }
     else if (roots_comm != NULL) {
         localfulldata = recvbuf;
@@ -266,22 +266,22 @@ int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype d
        process of node 3. */
     if (roots_comm != NULL) {
         /* FIXME just use roots_comm->rank instead */
-        int roots_rank = MPIU_Get_internode_rank(comm_ptr, rank);
-        MPIU_Assert(roots_rank == roots_comm->rank);
+        int roots_rank = MPIR_Get_internode_rank(comm_ptr, rank);
+        MPIR_Assert(roots_rank == roots_comm->rank);
 
         mpi_errno = roots_comm->coll_fns->Iscan_sched(localfulldata, prefulldata, count, datatype, op, roots_comm, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(s);
+        MPIR_SCHED_BARRIER(s);
 
         if (roots_rank != roots_comm->local_size-1) {
-            mpi_errno = MPID_Sched_send(prefulldata, count, datatype, (roots_rank + 1), roots_comm, s);
+            mpi_errno = MPIR_Sched_send(prefulldata, count, datatype, (roots_rank + 1), roots_comm, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
         if (roots_rank != 0) {
-            mpi_errno = MPID_Sched_recv(tempbuf, count, datatype, (roots_rank - 1), roots_comm, s);
+            mpi_errno = MPIR_Sched_recv(tempbuf, count, datatype, (roots_rank - 1), roots_comm, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
     }
 
@@ -291,18 +291,18 @@ int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype d
        then we should broadcast this result in the local node, and
        reduce it with recvbuf to get final result if nessesary. */
 
-    if (MPIU_Get_internode_rank(comm_ptr, rank) != 0) {
+    if (MPIR_Get_internode_rank(comm_ptr, rank) != 0) {
         /* we aren't on "node 0", so our node leader (possibly us) received
          * "prefulldata" from another leader into "tempbuf" */
 
         if (node_comm != NULL) {
             mpi_errno = node_comm->coll_fns->Ibcast_sched(tempbuf, count, datatype, 0, node_comm, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
 
         /* do reduce on tempbuf and recvbuf, finish scan. */
-        mpi_errno = MPID_Sched_reduce(tempbuf, recvbuf, count, datatype, op, s);
+        mpi_errno = MPIR_Sched_reduce(tempbuf, recvbuf, count, datatype, op, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
@@ -323,20 +323,20 @@ int MPIR_Iscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns->Iscan_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Iscan_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Iscan_sched(sendbuf, recvbuf, count, datatype, op, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -379,10 +379,10 @@ int MPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dataty
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ISCAN);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ISCAN);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ISCAN);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ISCAN);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -449,7 +449,7 @@ int MPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dataty
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ISCAN);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ISCAN);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/iscatter.c b/src/mpi/coll/iscatter.c
index 027ec38..76fe9bb 100644
--- a/src/mpi/coll/iscatter.c
+++ b/src/mpi/coll/iscatter.c
@@ -90,7 +90,7 @@ static int calc_curr_count(MPIR_Comm *comm, int tag, void *state)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Iscatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                         void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                        int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+                        int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     MPI_Aint extent = 0;
@@ -131,14 +131,14 @@ int MPIR_Iscatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
                in the event of recvbuf=MPI_IN_PLACE on the root,
                recvcount and recvtype are not valid */
             MPID_Datatype_get_size_macro(sendtype, sendtype_size);
-            MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
+            MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
                                              extent*sendcount*comm_size);
 
             ss->nbytes = sendtype_size * sendcount;
         }
         else {
             MPID_Datatype_get_size_macro(recvtype, recvtype_size);
-            MPIU_Ensure_Aint_fits_in_pointer(extent*recvcount*comm_size);
+            MPIR_Ensure_Aint_fits_in_pointer(extent*recvcount*comm_size);
             ss->nbytes = recvtype_size * recvcount;
         }
 
@@ -161,22 +161,22 @@ int MPIR_Iscatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
                 MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
 
                 if (recvbuf != MPI_IN_PLACE)
-                    mpi_errno = MPID_Sched_copy(((char *) sendbuf + extent*sendcount*rank),
+                    mpi_errno = MPIR_Sched_copy(((char *) sendbuf + extent*sendcount*rank),
                                                 sendcount*(comm_size-rank), sendtype,
                                                 tmp_buf, ss->nbytes*(comm_size-rank), MPI_BYTE, s);
                 else
-                    mpi_errno = MPID_Sched_copy(((char *) sendbuf + extent*sendcount*(rank+1)),
+                    mpi_errno = MPIR_Sched_copy(((char *) sendbuf + extent*sendcount*(rank+1)),
                                                 sendcount*(comm_size-rank-1), sendtype,
                                                 ((char *)tmp_buf + ss->nbytes),
                                                 ss->nbytes*(comm_size-rank-1), MPI_BYTE, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-                mpi_errno = MPID_Sched_copy(sendbuf, sendcount*rank, sendtype,
+                mpi_errno = MPIR_Sched_copy(sendbuf, sendcount*rank, sendtype,
                                             ((char *) tmp_buf + ss->nbytes*(comm_size-rank)),
                                             ss->nbytes*rank, MPI_BYTE, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
                 ss->curr_count = ss->nbytes*comm_size;
             }
             else
@@ -195,20 +195,20 @@ int MPIR_Iscatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
                    they don't have to forward data to anyone. Others
                    receive data into a temporary buffer. */
                 if (relative_rank % 2) {
-                    mpi_errno = MPID_Sched_recv(recvbuf, recvcount, recvtype, src, comm_ptr, s);
+                    mpi_errno = MPIR_Sched_recv(recvbuf, recvcount, recvtype, src, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
                 else {
 
                     /* the recv size is larger than what may be sent in
                        some cases. query amount of data actually received */
-                    mpi_errno = MPID_Sched_recv_status(tmp_buf, tmp_buf_size, MPI_BYTE, src, comm_ptr, &ss->status, s);
+                    mpi_errno = MPIR_Sched_recv_status(tmp_buf, tmp_buf_size, MPI_BYTE, src, comm_ptr, &ss->status, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
-                    mpi_errno = MPID_Sched_cb(&get_count, ss, s);
+                    MPIR_SCHED_BARRIER(s);
+                    mpi_errno = MPIR_Sched_cb(&get_count, ss, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
                 break;
             }
@@ -234,64 +234,64 @@ int MPIR_Iscatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
                      * is it always true the (curr_cnt/2==sendcount*mask)? */
                     send_subtree_cnt = curr_cnt - sendcount * mask;
 #endif
-                    mpi_errno = MPID_Sched_cb2(&calc_send_count_root, ss, ((void *)(size_t)mask), s);
+                    mpi_errno = MPIR_Sched_cb2(&calc_send_count_root, ss, ((void *)(size_t)mask), s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
 
                     /* mask is also the size of this process's subtree */
-                    mpi_errno = MPID_Sched_send_defer(((char *)sendbuf + extent*sendcount*mask),
+                    mpi_errno = MPIR_Sched_send_defer(((char *)sendbuf + extent*sendcount*mask),
                                                       &ss->send_subtree_count, sendtype, dst,
                                                       comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
                 else
                 {
                     /* non-zero root and others */
-                    mpi_errno = MPID_Sched_cb2(&calc_send_count_non_root, ss, ((void *)(size_t)mask), s);
+                    mpi_errno = MPIR_Sched_cb2(&calc_send_count_non_root, ss, ((void *)(size_t)mask), s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
 
                     /* mask is also the size of this process's subtree */
-                    mpi_errno = MPID_Sched_send_defer(((char *)tmp_buf + ss->nbytes*mask),
+                    mpi_errno = MPIR_Sched_send_defer(((char *)tmp_buf + ss->nbytes*mask),
                                                       &ss->send_subtree_count, MPI_BYTE, dst,
                                                       comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                    MPID_SCHED_BARRIER(s);
+                    MPIR_SCHED_BARRIER(s);
                 }
-                mpi_errno = MPID_Sched_cb(&calc_curr_count, ss, s);
+                mpi_errno = MPIR_Sched_cb(&calc_curr_count, ss, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
             mask >>= 1;
         }
 
         if ((rank == root) && (root == 0) && (recvbuf != MPI_IN_PLACE)) {
             /* for root=0, put root's data in recvbuf if not MPI_IN_PLACE */
-            mpi_errno = MPID_Sched_copy(sendbuf, sendcount, sendtype,
+            mpi_errno = MPIR_Sched_copy(sendbuf, sendcount, sendtype,
                                         recvbuf, recvcount, recvtype, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
         else if (!(relative_rank % 2) && (recvbuf != MPI_IN_PLACE)) {
             /* for non-zero root and non-leaf nodes, copy from tmp_buf
                into recvbuf */
-            mpi_errno = MPID_Sched_copy(tmp_buf, ss->nbytes, MPI_BYTE,
+            mpi_errno = MPIR_Sched_copy(tmp_buf, ss->nbytes, MPI_BYTE,
                                         recvbuf, recvcount, recvtype, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
 
     }
 #ifdef MPID_HAS_HETERO
     else { /* communicator is heterogeneous */
         int position;
-        MPIU_Assertp(FALSE); /* hetero case not yet implemented */
+        MPIR_Assertp(FALSE); /* hetero case not yet implemented */
 
         if (rank == root) {
             MPIR_Pack_size_impl(sendcount*comm_size, sendtype, &tmp_buf_size);
 
-            MPIU_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
+            MPIR_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
 
           /* calculate the value of nbytes, the number of bytes in packed
              representation that each process receives. We can't
@@ -346,7 +346,7 @@ int MPIR_Iscatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
         }
         else {
             MPIR_Pack_size_impl(recvcount*(comm_size/2), recvtype, &tmp_buf_size);
-            MPIU_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
+            MPIR_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
 
             /* calculate nbytes */
             position = 0;
@@ -432,7 +432,7 @@ int MPIR_Iscatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Iscatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                         void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                        int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+                        int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
 /*  Intercommunicator scatter.
     For short messages, root sends to rank 0 in remote group. rank 0
@@ -471,9 +471,9 @@ int MPIR_Iscatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
     if (nbytes < MPIR_CVAR_SCATTER_INTER_SHORT_MSG_SIZE) {
         if (root == MPI_ROOT) {
             /* root sends all data to rank 0 on remote group and returns */
-            mpi_errno = MPID_Sched_send(sendbuf, sendcount*remote_size, sendtype, 0, comm_ptr, s);
+            mpi_errno = MPIR_Sched_send(sendbuf, sendcount*remote_size, sendtype, 0, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
             goto fn_exit;
         }
         else {
@@ -485,8 +485,8 @@ int MPIR_Iscatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
                 MPIR_Type_get_true_extent_impl(recvtype, &true_lb, &true_extent);
 
                 MPID_Datatype_get_extent_macro(recvtype, extent);
-                MPIU_Ensure_Aint_fits_in_pointer(extent*recvcount*local_size);
-                MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
+                MPIR_Ensure_Aint_fits_in_pointer(extent*recvcount*local_size);
+                MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
                                                  sendcount*remote_size*extent);
 
                 MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, recvcount*local_size*(MPL_MAX(extent,true_extent)),
@@ -495,25 +495,25 @@ int MPIR_Iscatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
                 /* adjust for potential negative lower bound in datatype */
                 tmp_buf = (void *)((char*)tmp_buf - true_lb);
 
-                mpi_errno = MPID_Sched_recv(tmp_buf, recvcount*local_size, recvtype, root, comm_ptr, s);
+                mpi_errno = MPIR_Sched_recv(tmp_buf, recvcount*local_size, recvtype, root, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-                MPID_SCHED_BARRIER(s);
+                MPIR_SCHED_BARRIER(s);
             }
 
             /* Get the local intracommunicator */
             if (!comm_ptr->local_comm)
-                MPIR_Setup_intercomm_localcomm(comm_ptr);
+                MPII_Setup_intercomm_localcomm(comm_ptr);
 
             newcomm_ptr = comm_ptr->local_comm;
 
             /* now do the usual scatter on this intracommunicator */
-            MPIU_Assert(newcomm_ptr->coll_fns != NULL);
-            MPIU_Assert(newcomm_ptr->coll_fns->Iscatter_sched != NULL);
+            MPIR_Assert(newcomm_ptr->coll_fns != NULL);
+            MPIR_Assert(newcomm_ptr->coll_fns->Iscatter_sched != NULL);
             mpi_errno = newcomm_ptr->coll_fns->Iscatter_sched(tmp_buf, recvcount, recvtype,
                                                         recvbuf, recvcount, recvtype,
                                                         0, newcomm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
     }
     else {
@@ -521,16 +521,16 @@ int MPIR_Iscatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtyp
         if (root == MPI_ROOT) {
             MPID_Datatype_get_extent_macro(sendtype, extent);
             for (i = 0; i < remote_size; i++) {
-                mpi_errno = MPID_Sched_send(((char *)sendbuf+sendcount*i*extent),
+                mpi_errno = MPIR_Sched_send(((char *)sendbuf+sendcount*i*extent),
                                             sendcount, sendtype, i, comm_ptr, s);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             }
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
         else {
-            mpi_errno = MPID_Sched_recv(recvbuf, recvcount, recvtype, root, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(recvbuf, recvcount, recvtype, root, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(s);
+            MPIR_SCHED_BARRIER(s);
         }
     }
 
@@ -552,20 +552,20 @@ int MPIR_Iscatter_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns->Iscatter_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Iscatter_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Iscatter_sched(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -611,10 +611,10 @@ int MPI_Iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ISCATTER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ISCATTER);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ISCATTER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ISCATTER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -722,7 +722,7 @@ int MPI_Iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ISCATTER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ISCATTER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/iscatterv.c b/src/mpi/coll/iscatterv.c
index ad5bb03..5107ed6 100644
--- a/src/mpi/coll/iscatterv.c
+++ b/src/mpi/coll/iscatterv.c
@@ -52,7 +52,7 @@ int MPI_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[],
                    MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                   int root, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+                   int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int rank, comm_size;
@@ -77,20 +77,20 @@ int MPIR_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs
          * a minimal sanity check. Maybe add a global var since we do
          * loop over sendcount[] in MPI_Scatterv before calling
          * this? */
-        MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf + extent);
+        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf + extent);
 
         for (i = 0; i < comm_size; i++) {
             if (sendcounts[i]) {
                 if ((comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) && (i == rank)) {
                     if (recvbuf != MPI_IN_PLACE) {
-                        mpi_errno = MPID_Sched_copy(((char *)sendbuf+displs[rank]*extent),
+                        mpi_errno = MPIR_Sched_copy(((char *)sendbuf+displs[rank]*extent),
                                                     sendcounts[rank], sendtype,
                                                     recvbuf, recvcount, recvtype, s);
                         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                     }
                 }
                 else {
-                    mpi_errno = MPID_Sched_send(((char *)sendbuf+displs[i]*extent),
+                    mpi_errno = MPIR_Sched_send(((char *)sendbuf+displs[i]*extent),
                                                 sendcounts[i], sendtype, i, comm_ptr, s);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                 }
@@ -101,7 +101,7 @@ int MPIR_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs
     else if (root != MPI_PROC_NULL) {
         /* non-root nodes, and in the intercomm. case, non-root nodes on remote side */
         if (recvcount) {
-            mpi_errno = MPID_Sched_recv(recvbuf, recvcount, recvtype, root, comm_ptr, s);
+            mpi_errno = MPIR_Sched_recv(recvbuf, recvcount, recvtype, root, comm_ptr, s);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
     }
@@ -123,20 +123,20 @@ int MPIR_Iscatterv_impl(const void *sendbuf, const int sendcounts[], const int d
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *reqp = NULL;
     int tag = -1;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(comm_ptr->coll_fns->Iscatterv_sched != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Iscatterv_sched != NULL);
     mpi_errno = comm_ptr->coll_fns->Iscatterv_sched(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -183,10 +183,10 @@ int MPI_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ISCATTERV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ISCATTERV);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ISCATTERV);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ISCATTERV);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -324,7 +324,7 @@ int MPI_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ISCATTERV);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ISCATTERV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/op_commutative.c b/src/mpi/coll/op_commutative.c
index 773e416..72df322 100644
--- a/src/mpi/coll/op_commutative.c
+++ b/src/mpi/coll/op_commutative.c
@@ -56,12 +56,12 @@ int MPI_Op_commutative(MPI_Op op, int *commute)
 {
     MPIR_Op *op_ptr = NULL;
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_OP_COMMUTATIVE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_OP_COMMUTATIVE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_OP_COMMUTATIVE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_OP_COMMUTATIVE);
 
     MPIR_Op_get_ptr( op, op_ptr );
 
@@ -91,7 +91,7 @@ int MPI_Op_commutative(MPI_Op op, int *commute)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_OP_COMMUTATIVE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_OP_COMMUTATIVE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 fn_fail:
diff --git a/src/mpi/coll/op_create.c b/src/mpi/coll/op_create.c
index e163311..e90d9a1 100644
--- a/src/mpi/coll/op_create.c
+++ b/src/mpi/coll/op_create.c
@@ -32,13 +32,13 @@ int MPI_Op_create(MPI_User_function *user_fn, int commute, MPI_Op *op) __attribu
 /* Preallocated op objects */
 MPIR_Op MPIR_Op_builtin[MPIR_OP_N_BUILTIN] = { {0} };
 MPIR_Op MPIR_Op_direct[MPIR_OP_PREALLOC] = { {0} };
-MPIU_Object_alloc_t MPIR_Op_mem = { 0, 0, 0, 0, MPIR_OP,
+MPIR_Object_alloc_t MPIR_Op_mem = { 0, 0, 0, 0, MPIR_OP,
 					    sizeof(MPIR_Op),
 					    MPIR_Op_direct,
 					    MPIR_OP_PREALLOC, };
 
 #ifdef HAVE_CXX_BINDING
-void MPIR_Op_set_cxx( MPI_Op op, void (*opcall)(void) )
+void MPII_Op_set_cxx( MPI_Op op, void (*opcall)(void) )
 {
     MPIR_Op *op_ptr;
     
@@ -53,7 +53,7 @@ void MPIR_Op_set_cxx( MPI_Op op, void (*opcall)(void) )
    MPI Standard.  However, if MPI_Fint and int are not the same size (e.g.,
    MPI_Fint was made 8 bytes but int is 4 bytes), then the C and Fortran
    versions must be distinquished. */
-void MPIR_Op_set_fc( MPI_Op op )
+void MPII_Op_set_fc( MPI_Op op )
 {
     MPIR_Op *op_ptr;
     
@@ -104,16 +104,16 @@ int MPI_Op_create(MPI_User_function *user_fn, int commute, MPI_Op *op)
     static const char FCNAME[] = "MPI_Op_create";
     MPIR_Op *op_ptr;
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_OP_CREATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_OP_CREATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_OP_CREATE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_OP_CREATE);
 
     /* ... body of routine ...  */
     
-    op_ptr = (MPIR_Op *)MPIU_Handle_obj_alloc( &MPIR_Op_mem );
+    op_ptr = (MPIR_Op *)MPIR_Handle_obj_alloc( &MPIR_Op_mem );
     /* --BEGIN ERROR HANDLING-- */
     if (!op_ptr)
     {
@@ -127,13 +127,13 @@ int MPI_Op_create(MPI_User_function *user_fn, int commute, MPI_Op *op)
     op_ptr->kind     = commute ? MPIR_OP_KIND__USER : MPIR_OP_KIND__USER_NONCOMMUTE;
     op_ptr->function.c_function = (void (*)(const void *, void *, 
 				   const int *, const MPI_Datatype *))user_fn;
-    MPIU_Object_set_ref(op_ptr,1);
+    MPIR_Object_set_ref(op_ptr,1);
 
     MPIR_OBJ_PUBLISH_HANDLE(*op, op_ptr->handle);
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_OP_CREATE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_OP_CREATE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/coll/op_free.c b/src/mpi/coll/op_free.c
index cfc5445..c0c16af 100644
--- a/src/mpi/coll/op_free.c
+++ b/src/mpi/coll/op_free.c
@@ -60,12 +60,12 @@ int MPI_Op_free(MPI_Op *op)
     MPIR_Op *op_ptr = NULL;
     int     in_use;
     int     mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_OP_FREE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_OP_FREE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_OP_FREE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_OP_FREE);
     
     MPIR_Op_get_ptr( *op, op_ptr );
 #   ifdef HAVE_ERROR_CHECKING
@@ -90,7 +90,7 @@ int MPI_Op_free(MPI_Op *op)
     
     MPIR_Op_release_ref( op_ptr, &in_use);
     if (!in_use) {
-	MPIU_Handle_obj_free( &MPIR_Op_mem, op_ptr );
+	MPIR_Handle_obj_free( &MPIR_Op_mem, op_ptr );
     }
     *op = MPI_OP_NULL;
     
@@ -99,7 +99,7 @@ int MPI_Op_free(MPI_Op *op)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_OP_FREE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_OP_FREE);
         MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 	return mpi_errno;
 	
diff --git a/src/mpi/coll/opband.c b/src/mpi/coll/opband.c
index 04c0ad6..d349e17 100644
--- a/src/mpi/coll/opband.c
+++ b/src/mpi/coll/opband.c
@@ -48,7 +48,7 @@ void MPIR_BAND (
 
                 MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                              MPIR_Per_thread, per_thread, &err);
-                MPIU_Assert(err == 0);
+                MPIR_Assert(err == 0);
                 per_thread->op_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opundefined","**opundefined %s", "MPI_BAND" );
             }
             break;
diff --git a/src/mpi/coll/opbor.c b/src/mpi/coll/opbor.c
index 5a2fe5c..85743bc 100644
--- a/src/mpi/coll/opbor.c
+++ b/src/mpi/coll/opbor.c
@@ -48,7 +48,7 @@ void MPIR_BOR (
 
                 MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                              MPIR_Per_thread, per_thread, &err);
-                MPIU_Assert(err == 0);
+                MPIR_Assert(err == 0);
                 per_thread->op_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opundefined","**opundefined %s", "MPI_BOR" );
             }
             break;
diff --git a/src/mpi/coll/opbxor.c b/src/mpi/coll/opbxor.c
index 42cf011..e7df1b5 100644
--- a/src/mpi/coll/opbxor.c
+++ b/src/mpi/coll/opbxor.c
@@ -48,7 +48,7 @@ void MPIR_BXOR (
 
                 MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                              MPIR_Per_thread, per_thread, &err);
-                MPIU_Assert(err == 0);
+                MPIR_Assert(err == 0);
                 per_thread->op_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opundefined","**opundefined %s", "MPI_BXOR" );
             }
             break;
diff --git a/src/mpi/coll/opland.c b/src/mpi/coll/opland.c
index a6bf56f..890d739 100644
--- a/src/mpi/coll/opland.c
+++ b/src/mpi/coll/opland.c
@@ -8,7 +8,7 @@
 #include "mpiimpl.h"
 #include "mpir_op_util.h"
 #ifdef HAVE_FORTRAN_BINDING
-#include "mpir_fortlogical.h"
+#include "mpii_fortlogical.h"
 #endif
 
 /*
@@ -45,8 +45,8 @@ void MPIR_LAND (
             c_type_ * restrict a = (c_type_ *)inoutvec;                \
             c_type_ * restrict b = (c_type_ *)invec;                   \
             for (i=0; i<len; i++)                                      \
-                a[i] = MPIR_TO_FLOG(MPIR_LLAND(MPIR_FROM_FLOG(a[i]),   \
-                                               MPIR_FROM_FLOG(b[i]))); \
+                a[i] = MPII_TO_FLOG(MPIR_LLAND(MPII_FROM_FLOG(a[i]),   \
+                                               MPII_FROM_FLOG(b[i]))); \
             break;                                                     \
     }
         /* expand logicals (which may include MPI_C_BOOL, a non-Fortran type) */
@@ -79,7 +79,7 @@ void MPIR_LAND (
 
                 MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                              MPIR_Per_thread, per_thread, &err);
-                MPIU_Assert(err == 0);
+                MPIR_Assert(err == 0);
                 per_thread->op_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opundefined","**opundefined %s", "MPI_LAND" );
             }
             break;
diff --git a/src/mpi/coll/oplor.c b/src/mpi/coll/oplor.c
index d71316f..f334c91 100644
--- a/src/mpi/coll/oplor.c
+++ b/src/mpi/coll/oplor.c
@@ -8,7 +8,7 @@
 #include "mpiimpl.h"
 #include "mpir_op_util.h"
 #ifdef HAVE_FORTRAN_BINDING
-#include "mpir_fortlogical.h"
+#include "mpii_fortlogical.h"
 #endif
 
 /*
@@ -45,8 +45,8 @@ void MPIR_LOR (
                 c_type_ * restrict a = (c_type_ *)inoutvec;                \
                 c_type_ * restrict b = (c_type_ *)invec;                   \
                 for (i=0; i<len; i++)                                      \
-                    a[i] = MPIR_TO_FLOG(MPIR_LLOR(MPIR_FROM_FLOG(a[i]),    \
-                                                   MPIR_FROM_FLOG(b[i]))); \
+                    a[i] = MPII_TO_FLOG(MPIR_LLOR(MPII_FROM_FLOG(a[i]),    \
+                                                   MPII_FROM_FLOG(b[i]))); \
                 break;                                                     \
         }
         /* expand logicals (which may include MPI_C_BOOL, a non-Fortran type) */
@@ -79,7 +79,7 @@ void MPIR_LOR (
 
                 MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                              MPIR_Per_thread, per_thread, &err);
-                MPIU_Assert(err == 0);
+                MPIR_Assert(err == 0);
                 per_thread->op_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opundefined","**opundefined %s", "MPI_LOR" );
             }
             break;
diff --git a/src/mpi/coll/oplxor.c b/src/mpi/coll/oplxor.c
index e66c22c..7bcc44d 100644
--- a/src/mpi/coll/oplxor.c
+++ b/src/mpi/coll/oplxor.c
@@ -8,7 +8,7 @@
 #include "mpiimpl.h"
 #include "mpir_op_util.h"
 #ifdef HAVE_FORTRAN_BINDING
-#include "mpir_fortlogical.h"
+#include "mpii_fortlogical.h"
 #endif
 
 /*
@@ -45,8 +45,8 @@ void MPIR_LXOR (
                 c_type_ * restrict a = (c_type_ *)inoutvec;                \
                 c_type_ * restrict b = (c_type_ *)invec;                   \
                 for (i=0; i<len; i++)                                      \
-                    a[i] = MPIR_TO_FLOG(MPIR_LLXOR(MPIR_FROM_FLOG(a[i]),   \
-                                                   MPIR_FROM_FLOG(b[i]))); \
+                    a[i] = MPII_TO_FLOG(MPIR_LLXOR(MPII_FROM_FLOG(a[i]),   \
+                                                   MPII_FROM_FLOG(b[i]))); \
                 break;                                                     \
         }
         /* expand logicals (which may include MPI_C_BOOL, a non-Fortran type) */
@@ -79,7 +79,7 @@ void MPIR_LXOR (
 
                 MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                              MPIR_Per_thread, per_thread, &err);
-                MPIU_Assert(err == 0);
+                MPIR_Assert(err == 0);
                 per_thread->op_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opundefined","**opundefined %s", "MPI_LXOR" );
             }
             break;
diff --git a/src/mpi/coll/opmax.c b/src/mpi/coll/opmax.c
index 93fad00..7aeba8a 100644
--- a/src/mpi/coll/opmax.c
+++ b/src/mpi/coll/opmax.c
@@ -45,7 +45,7 @@ void MPIR_MAXF(
 
                 MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                              MPIR_Per_thread, per_thread, &err);
-                MPIU_Assert(err == 0);
+                MPIR_Assert(err == 0);
                 per_thread->op_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opundefined","**opundefined %s", "MPI_MAX" );
             }
             break;
diff --git a/src/mpi/coll/opmaxloc.c b/src/mpi/coll/opmaxloc.c
index 3732364..123a2d6 100644
--- a/src/mpi/coll/opmaxloc.c
+++ b/src/mpi/coll/opmaxloc.c
@@ -119,7 +119,7 @@ void MPIR_MAXLOC(
 
             MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                          MPIR_Per_thread, per_thread, &err);
-            MPIU_Assert(err == 0);
+            MPIR_Assert(err == 0);
             per_thread->op_errno = mpi_errno;
         }
         break;
diff --git a/src/mpi/coll/opmin.c b/src/mpi/coll/opmin.c
index a79b5f5..be1af55 100644
--- a/src/mpi/coll/opmin.c
+++ b/src/mpi/coll/opmin.c
@@ -44,7 +44,7 @@ void MPIR_MINF (
 
                 MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                              MPIR_Per_thread, per_thread, &err);
-                MPIU_Assert(err == 0);
+                MPIR_Assert(err == 0);
                 per_thread->op_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opundefined","**opundefined %s", "MPI_MIN" );
             }
             break;
diff --git a/src/mpi/coll/opminloc.c b/src/mpi/coll/opminloc.c
index ff343ef..94d0537 100644
--- a/src/mpi/coll/opminloc.c
+++ b/src/mpi/coll/opminloc.c
@@ -118,7 +118,7 @@ void MPIR_MINLOC(
 
             MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                          MPIR_Per_thread, per_thread, &err);
-            MPIU_Assert(err == 0);
+            MPIR_Assert(err == 0);
             per_thread->op_errno = mpi_errno;
         }
         break;
diff --git a/src/mpi/coll/opprod.c b/src/mpi/coll/opprod.c
index 34382ad..54d9d57 100644
--- a/src/mpi/coll/opprod.c
+++ b/src/mpi/coll/opprod.c
@@ -68,7 +68,7 @@ void MPIR_PROD (
 
                 MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                              MPIR_Per_thread, per_thread, &err);
-                MPIU_Assert(err == 0);
+                MPIR_Assert(err == 0);
                 per_thread->op_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opundefined","**opundefined %s", "MPI_PROD" );
             }
             break;
diff --git a/src/mpi/coll/opsum.c b/src/mpi/coll/opsum.c
index f7e4eee..0f47afd 100644
--- a/src/mpi/coll/opsum.c
+++ b/src/mpi/coll/opsum.c
@@ -67,7 +67,7 @@ void MPIR_SUM (
 
                 MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                              MPIR_Per_thread, per_thread, &err);
-                MPIU_Assert(err == 0);
+                MPIR_Assert(err == 0);
                 per_thread->op_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opundefined","**opundefined %s", "MPI_SUM" );
             }
             break;
diff --git a/src/mpi/coll/red_scat.c b/src/mpi/coll/red_scat.c
index 7babc13..92f2625 100644
--- a/src/mpi/coll/red_scat.c
+++ b/src/mpi/coll/red_scat.c
@@ -73,7 +73,7 @@ static int MPIR_Reduce_scatter_noncomm(const void *sendbuf, void *recvbuf, const
     void *tmp_buf0;
     void *tmp_buf1;
     void *result_ptr;
-    MPIU_CHKLMEM_DECL(3);
+    MPIR_CHKLMEM_DECL(3);
 
     MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
 
@@ -85,10 +85,10 @@ static int MPIR_Reduce_scatter_noncomm(const void *sendbuf, void *recvbuf, const
     }
 
     /* begin error checking */
-    MPIU_Assert(pof2 == comm_size); /* FIXME this version only works for power of 2 procs */
+    MPIR_Assert(pof2 == comm_size); /* FIXME this version only works for power of 2 procs */
 
     for (i = 0; i < (comm_size - 1); ++i) {
-        MPIU_Assert(recvcounts[i] == recvcounts[i+1]);
+        MPIR_Assert(recvcounts[i] == recvcounts[i+1]);
     }
     /* end error checking */
 
@@ -96,8 +96,8 @@ static int MPIR_Reduce_scatter_noncomm(const void *sendbuf, void *recvbuf, const
     block_size = recvcounts[0];
     total_count = block_size * comm_size;
 
-    MPIU_CHKLMEM_MALLOC(tmp_buf0, void *, true_extent * total_count, mpi_errno, "tmp_buf0");
-    MPIU_CHKLMEM_MALLOC(tmp_buf1, void *, true_extent * total_count, mpi_errno, "tmp_buf1");
+    MPIR_CHKLMEM_MALLOC(tmp_buf0, void *, true_extent * total_count, mpi_errno, "tmp_buf0");
+    MPIR_CHKLMEM_MALLOC(tmp_buf1, void *, true_extent * total_count, mpi_errno, "tmp_buf1");
     /* adjust for potential negative lower bound in datatype */
     tmp_buf0 = (void *)((char*)tmp_buf0 - true_lb);
     tmp_buf1 = (void *)((char*)tmp_buf1 - true_lb);
@@ -167,7 +167,7 @@ static int MPIR_Reduce_scatter_noncomm(const void *sendbuf, void *recvbuf, const
         send_offset = recv_offset;
     }
 
-    MPIU_Assert(size == recvcounts[rank]);
+    MPIR_Assert(size == recvcounts[rank]);
 
     /* copy the reduced data to the recvbuf */
     result_ptr = (char *)(buf0_was_inout ? tmp_buf0 : tmp_buf1) + recv_offset * true_extent;
@@ -176,7 +176,7 @@ static int MPIR_Reduce_scatter_noncomm(const void *sendbuf, void *recvbuf, const
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -256,7 +256,7 @@ int MPIR_Reduce_scatter_intra(const void *sendbuf, void *recvbuf, const int recv
     MPI_Datatype sendtype, recvtype;
     int nprocs_completed, tmp_mask, tree_root, is_commutative;
     MPIR_Op *op_ptr;
-    MPIU_CHKLMEM_DECL(5);
+    MPIR_CHKLMEM_DECL(5);
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -268,7 +268,7 @@ int MPIR_Reduce_scatter_intra(const void *sendbuf, void *recvbuf, const int recv
 
         MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                      MPIR_Per_thread, per_thread, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
         per_thread->op_errno = 0;
     }
 
@@ -286,7 +286,7 @@ int MPIR_Reduce_scatter_intra(const void *sendbuf, void *recvbuf, const int recv
             is_commutative = 1;
     }
 
-    MPIU_CHKLMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps");
+    MPIR_CHKLMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps");
 
     total_count = 0;
     for (i=0; i<comm_size; i++) {
@@ -303,19 +303,19 @@ int MPIR_Reduce_scatter_intra(const void *sendbuf, void *recvbuf, const int recv
     
     /* total_count*extent eventually gets malloced. it isn't added to
      * a user-passed in buffer */
-    MPIU_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));
+    MPIR_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));
 
     if ((is_commutative) && (nbytes < MPIR_CVAR_REDSCAT_COMMUTATIVE_LONG_MSG_SIZE)) {
         /* commutative and short. use recursive halving algorithm */
 
         /* allocate temp. buffer to receive incoming data */
-        MPIU_CHKLMEM_MALLOC(tmp_recvbuf, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_recvbuf");
+        MPIR_CHKLMEM_MALLOC(tmp_recvbuf, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_recvbuf");
         /* adjust for potential negative lower bound in datatype */
         tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb);
             
         /* need to allocate another temporary buffer to accumulate
            results because recvbuf may not be big enough */
-        MPIU_CHKLMEM_MALLOC(tmp_results, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_results");
+        MPIR_CHKLMEM_MALLOC(tmp_results, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_results");
         /* adjust for potential negative lower bound in datatype */
         tmp_results = (void *)((char*)tmp_results - true_lb);
         
@@ -389,8 +389,8 @@ int MPIR_Reduce_scatter_intra(const void *sendbuf, void *recvbuf, const int recv
                have their result calculated by the process to their
                right (rank+1). */
 
-            MPIU_CHKLMEM_MALLOC(newcnts, int *, pof2*sizeof(int), mpi_errno, "newcnts");
-            MPIU_CHKLMEM_MALLOC(newdisps, int *, pof2*sizeof(int), mpi_errno, "newdisps");
+            MPIR_CHKLMEM_MALLOC(newcnts, int *, pof2*sizeof(int), mpi_errno, "newcnts");
+            MPIR_CHKLMEM_MALLOC(newdisps, int *, pof2*sizeof(int), mpi_errno, "newdisps");
             
             for (i=0; i<pof2; i++) {
                 /* what does i map to in the old ranking? */
@@ -542,7 +542,7 @@ int MPIR_Reduce_scatter_intra(const void *sendbuf, void *recvbuf, const int recv
         }
         
         /* allocate temporary buffer to store incoming data */
-        MPIU_CHKLMEM_MALLOC(tmp_recvbuf, void *, recvcounts[rank]*(MPL_MAX(true_extent,extent))+1, mpi_errno, "tmp_recvbuf");
+        MPIR_CHKLMEM_MALLOC(tmp_recvbuf, void *, recvcounts[rank]*(MPL_MAX(true_extent,extent))+1, mpi_errno, "tmp_recvbuf");
         /* adjust for potential negative lower bound in datatype */
         tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb);
         
@@ -655,13 +655,13 @@ int MPIR_Reduce_scatter_intra(const void *sendbuf, void *recvbuf, const int recv
             /* noncommutative and (non-pof2 or block irregular), use recursive doubling. */
 
             /* need to allocate temporary buffer to receive incoming data*/
-            MPIU_CHKLMEM_MALLOC(tmp_recvbuf, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_recvbuf");
+            MPIR_CHKLMEM_MALLOC(tmp_recvbuf, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_recvbuf");
             /* adjust for potential negative lower bound in datatype */
             tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb);
 
             /* need to allocate another temporary buffer to accumulate
                results */
-            MPIU_CHKLMEM_MALLOC(tmp_results, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_results");
+            MPIR_CHKLMEM_MALLOC(tmp_results, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_results");
             /* adjust for potential negative lower bound in datatype */
             tmp_results = (void *)((char*)tmp_results - true_lb);
 
@@ -877,7 +877,7 @@ int MPIR_Reduce_scatter_intra(const void *sendbuf, void *recvbuf, const int recv
     }
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
 
     {
         MPIR_Per_thread_t *per_thread = NULL;
@@ -885,7 +885,7 @@ fn_exit:
 
         MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                      MPIR_Per_thread, per_thread, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
         if (per_thread->op_errno)
             mpi_errno = per_thread->op_errno;
     }
@@ -922,7 +922,7 @@ int MPIR_Reduce_scatter_inter(const void *sendbuf, void *recvbuf, const int recv
     void *tmp_buf=NULL;
     int *disps=NULL;
     MPIR_Comm *newcomm_ptr = NULL;
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
 
     rank = comm_ptr->rank;
     local_size = comm_ptr->local_size;
@@ -934,7 +934,7 @@ int MPIR_Reduce_scatter_inter(const void *sendbuf, void *recvbuf, const int recv
         /* In each group, rank 0 allocates a temp. buffer for the 
            reduce */
         
-        MPIU_CHKLMEM_MALLOC(disps, int *, local_size*sizeof(int), mpi_errno, "disps");
+        MPIR_CHKLMEM_MALLOC(disps, int *, local_size*sizeof(int), mpi_errno, "disps");
 
         total_count = 0;
         for (i=0; i<local_size; i++) {
@@ -945,7 +945,7 @@ int MPIR_Reduce_scatter_inter(const void *sendbuf, void *recvbuf, const int recv
         MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
         MPID_Datatype_get_extent_macro(datatype, extent);
 
-        MPIU_CHKLMEM_MALLOC(tmp_buf, void *, total_count*(MPL_MAX(extent,true_extent)), mpi_errno, "tmp_buf");
+        MPIR_CHKLMEM_MALLOC(tmp_buf, void *, total_count*(MPL_MAX(extent,true_extent)), mpi_errno, "tmp_buf");
 
         /* adjust for potential negative lower bound in datatype */
         tmp_buf = (void *)((char*)tmp_buf - true_lb);
@@ -1002,7 +1002,7 @@ int MPIR_Reduce_scatter_inter(const void *sendbuf, void *recvbuf, const int recv
 
     /* Get the local intracommunicator */
     if (!comm_ptr->local_comm) {
-	mpi_errno = MPIR_Setup_intercomm_localcomm( comm_ptr );
+	mpi_errno = MPII_Setup_intercomm_localcomm( comm_ptr );
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
@@ -1018,7 +1018,7 @@ int MPIR_Reduce_scatter_inter(const void *sendbuf, void *recvbuf, const int recv
     }
     
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -1135,12 +1135,12 @@ int MPI_Reduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_REDUCE_SCATTER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_REDUCE_SCATTER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_REDUCE_SCATTER);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_REDUCE_SCATTER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -1219,7 +1219,7 @@ int MPI_Reduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_REDUCE_SCATTER);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_REDUCE_SCATTER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/red_scat_block.c b/src/mpi/coll/red_scat_block.c
index 6a23515..1a05bb5 100644
--- a/src/mpi/coll/red_scat_block.c
+++ b/src/mpi/coll/red_scat_block.c
@@ -69,7 +69,7 @@ static int MPIR_Reduce_scatter_block_noncomm (
     void *tmp_buf0;
     void *tmp_buf1;
     void *result_ptr;
-    MPIU_CHKLMEM_DECL(3);
+    MPIR_CHKLMEM_DECL(3);
 
     MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
 
@@ -81,15 +81,15 @@ static int MPIR_Reduce_scatter_block_noncomm (
     }
 
     /* begin error checking */
-    MPIU_Assert(pof2 == comm_size); /* FIXME this version only works for power of 2 procs */
+    MPIR_Assert(pof2 == comm_size); /* FIXME this version only works for power of 2 procs */
     /* end error checking */
 
     /* size of a block (count of datatype per block, NOT bytes per block) */
     block_size = recvcount;
     total_count = block_size * comm_size;
 
-    MPIU_CHKLMEM_MALLOC(tmp_buf0, void *, true_extent * total_count, mpi_errno, "tmp_buf0");
-    MPIU_CHKLMEM_MALLOC(tmp_buf1, void *, true_extent * total_count, mpi_errno, "tmp_buf1");
+    MPIR_CHKLMEM_MALLOC(tmp_buf0, void *, true_extent * total_count, mpi_errno, "tmp_buf0");
+    MPIR_CHKLMEM_MALLOC(tmp_buf1, void *, true_extent * total_count, mpi_errno, "tmp_buf1");
     /* adjust for potential negative lower bound in datatype */
     tmp_buf0 = (void *)((char*)tmp_buf0 - true_lb);
     tmp_buf1 = (void *)((char*)tmp_buf1 - true_lb);
@@ -159,7 +159,7 @@ static int MPIR_Reduce_scatter_block_noncomm (
         send_offset = recv_offset;
     }
 
-    MPIU_Assert(size == recvcount);
+    MPIR_Assert(size == recvcount);
 
     /* copy the reduced data to the recvbuf */
     result_ptr = (char *)(buf0_was_inout ? tmp_buf0 : tmp_buf1) + recv_offset * true_extent;
@@ -168,7 +168,7 @@ static int MPIR_Reduce_scatter_block_noncomm (
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     /* --BEGIN ERROR HANDLING-- */
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
@@ -256,7 +256,7 @@ int MPIR_Reduce_scatter_block_intra (
     MPI_Datatype sendtype, recvtype;
     int nprocs_completed, tmp_mask, tree_root, is_commutative;
     MPIR_Op *op_ptr;
-    MPIU_CHKLMEM_DECL(5);
+    MPIR_CHKLMEM_DECL(5);
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -268,7 +268,7 @@ int MPIR_Reduce_scatter_block_intra (
 
         MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                      MPIR_Per_thread, per_thread, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
         per_thread->op_errno = 0;
     }
 
@@ -290,7 +290,7 @@ int MPIR_Reduce_scatter_block_intra (
             is_commutative = 1;
     }
 
-    MPIU_CHKLMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps");
+    MPIR_CHKLMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps");
 
     total_count = comm_size*recvcount;
     for (i=0; i<comm_size; i++) {
@@ -302,19 +302,19 @@ int MPIR_Reduce_scatter_block_intra (
     
     /* total_count*extent eventually gets malloced. it isn't added to
      * a user-passed in buffer */
-    MPIU_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));
+    MPIR_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));
 
     if ((is_commutative) && (nbytes < MPIR_CVAR_REDSCAT_COMMUTATIVE_LONG_MSG_SIZE)) {
         /* commutative and short. use recursive halving algorithm */
 
         /* allocate temp. buffer to receive incoming data */
-        MPIU_CHKLMEM_MALLOC(tmp_recvbuf, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_recvbuf");
+        MPIR_CHKLMEM_MALLOC(tmp_recvbuf, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_recvbuf");
         /* adjust for potential negative lower bound in datatype */
         tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb);
             
         /* need to allocate another temporary buffer to accumulate
            results because recvbuf may not be big enough */
-        MPIU_CHKLMEM_MALLOC(tmp_results, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_results");
+        MPIR_CHKLMEM_MALLOC(tmp_results, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_results");
         /* adjust for potential negative lower bound in datatype */
         tmp_results = (void *)((char*)tmp_results - true_lb);
         
@@ -388,8 +388,8 @@ int MPIR_Reduce_scatter_block_intra (
                have their result calculated by the process to their
                right (rank+1). */
 
-            MPIU_CHKLMEM_MALLOC(newcnts, int *, pof2*sizeof(int), mpi_errno, "newcnts");
-            MPIU_CHKLMEM_MALLOC(newdisps, int *, pof2*sizeof(int), mpi_errno, "newdisps");
+            MPIR_CHKLMEM_MALLOC(newcnts, int *, pof2*sizeof(int), mpi_errno, "newcnts");
+            MPIR_CHKLMEM_MALLOC(newdisps, int *, pof2*sizeof(int), mpi_errno, "newdisps");
             
             for (i=0; i<pof2; i++) {
                 /* what does i map to in the old ranking? */
@@ -528,7 +528,7 @@ int MPIR_Reduce_scatter_block_intra (
         }
         
         /* allocate temporary buffer to store incoming data */
-        MPIU_CHKLMEM_MALLOC(tmp_recvbuf, void *, recvcount*(MPL_MAX(true_extent,extent))+1, mpi_errno, "tmp_recvbuf");
+        MPIR_CHKLMEM_MALLOC(tmp_recvbuf, void *, recvcount*(MPL_MAX(true_extent,extent))+1, mpi_errno, "tmp_recvbuf");
         /* adjust for potential negative lower bound in datatype */
         tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb);
         
@@ -630,13 +630,13 @@ int MPIR_Reduce_scatter_block_intra (
             /* noncommutative and non-pof2, use recursive doubling. */
 
             /* need to allocate temporary buffer to receive incoming data*/
-            MPIU_CHKLMEM_MALLOC(tmp_recvbuf, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_recvbuf");
+            MPIR_CHKLMEM_MALLOC(tmp_recvbuf, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_recvbuf");
             /* adjust for potential negative lower bound in datatype */
             tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb);
 
             /* need to allocate another temporary buffer to accumulate
                results */
-            MPIU_CHKLMEM_MALLOC(tmp_results, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_results");
+            MPIR_CHKLMEM_MALLOC(tmp_results, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_results");
             /* adjust for potential negative lower bound in datatype */
             tmp_results = (void *)((char*)tmp_results - true_lb);
 
@@ -849,7 +849,7 @@ int MPIR_Reduce_scatter_block_intra (
     }
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
 
     {
         MPIR_Per_thread_t *per_thread = NULL;
@@ -857,7 +857,7 @@ fn_exit:
 
         MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                      MPIR_Per_thread, per_thread, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
         if (per_thread->op_errno)
             mpi_errno = per_thread->op_errno;
     }
@@ -899,7 +899,7 @@ int MPIR_Reduce_scatter_block_inter (
     MPI_Aint true_extent, true_lb = 0, extent;
     void *tmp_buf=NULL;
     MPIR_Comm *newcomm_ptr = NULL;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     rank = comm_ptr->rank;
     local_size = comm_ptr->local_size;
@@ -913,7 +913,7 @@ int MPIR_Reduce_scatter_block_inter (
         MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
         MPID_Datatype_get_extent_macro(datatype, extent);
 
-        MPIU_CHKLMEM_MALLOC(tmp_buf, void *, total_count*(MPL_MAX(extent,true_extent)), mpi_errno, "tmp_buf");
+        MPIR_CHKLMEM_MALLOC(tmp_buf, void *, total_count*(MPL_MAX(extent,true_extent)), mpi_errno, "tmp_buf");
 
         /* adjust for potential negative lower bound in datatype */
         tmp_buf = (void *)((char*)tmp_buf - true_lb);
@@ -970,7 +970,7 @@ int MPIR_Reduce_scatter_block_inter (
 
     /* Get the local intracommunicator */
     if (!comm_ptr->local_comm)
-	MPIR_Setup_intercomm_localcomm( comm_ptr );
+	MPII_Setup_intercomm_localcomm( comm_ptr );
 
     newcomm_ptr = comm_ptr->local_comm;
 
@@ -984,7 +984,7 @@ int MPIR_Reduce_scatter_block_inter (
     }
     
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     /* --BEGIN ERROR HANDLING-- */
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
@@ -1108,12 +1108,12 @@ int MPI_Reduce_scatter_block(const void *sendbuf, void *recvbuf,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_REDUCE_SCATTER_BLOCK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_REDUCE_SCATTER_BLOCK);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_REDUCE_SCATTER_BLOCK);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_REDUCE_SCATTER_BLOCK);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -1186,7 +1186,7 @@ int MPI_Reduce_scatter_block(const void *sendbuf, void *recvbuf,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_REDUCE_SCATTER_BLOCK);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_REDUCE_SCATTER_BLOCK);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/reduce.c b/src/mpi/coll/reduce.c
index 136209c..717860c 100644
--- a/src/mpi/coll/reduce.c
+++ b/src/mpi/coll/reduce.c
@@ -93,7 +93,7 @@ static int MPIR_Reduce_binomial (
     int mask, relrank, source, lroot;
     MPI_Aint true_lb, true_extent, extent; 
     void *tmp_buf;
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
 
     if (count == 0) return MPI_SUCCESS;
 
@@ -110,9 +110,9 @@ static int MPIR_Reduce_binomial (
     /* I think this is the worse case, so we can avoid an assert() 
      * inside the for loop */
     /* should be buf+{this}? */
-    MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+    MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
 
-    MPIU_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)),
+    MPIR_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)),
                         mpi_errno, "temporary buffer");
     /* adjust for potential negative lower bound in datatype */
     tmp_buf = (void *)((char*)tmp_buf - true_lb);
@@ -120,7 +120,7 @@ static int MPIR_Reduce_binomial (
     /* If I'm not the root, then my recvbuf may not be valid, therefore
        I have to allocate a temporary one */
     if (rank != root) {
-        MPIU_CHKLMEM_MALLOC(recvbuf, void *, 
+        MPIR_CHKLMEM_MALLOC(recvbuf, void *,
                             count*(MPL_MAX(extent,true_extent)), 
                             mpi_errno, "receive buffer");
         recvbuf = (void *)((char*)recvbuf - true_lb);
@@ -241,7 +241,7 @@ static int MPIR_Reduce_binomial (
     }
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -301,7 +301,7 @@ static int MPIR_Reduce_redscat_gather (
     MPI_Aint true_lb, true_extent, extent; 
     void *tmp_buf;
 
-    MPIU_CHKLMEM_DECL(4);
+    MPIR_CHKLMEM_DECL(4);
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -313,7 +313,7 @@ static int MPIR_Reduce_redscat_gather (
 
         MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                      MPIR_Per_thread, per_thread, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
         per_thread->op_errno = 0;
     }
 
@@ -325,9 +325,9 @@ static int MPIR_Reduce_redscat_gather (
     /* I think this is the worse case, so we can avoid an assert() 
      * inside the for loop */
     /* should be buf+{this}? */
-    MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+    MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
 
-    MPIU_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)),
+    MPIR_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)),
                         mpi_errno, "temporary buffer");
     /* adjust for potential negative lower bound in datatype */
     tmp_buf = (void *)((char*)tmp_buf - true_lb);
@@ -335,7 +335,7 @@ static int MPIR_Reduce_redscat_gather (
     /* If I'm not the root, then my recvbuf may not be valid, therefore
        I have to allocate a temporary one */
     if (rank != root) {
-        MPIU_CHKLMEM_MALLOC(recvbuf, void *, 
+        MPIR_CHKLMEM_MALLOC(recvbuf, void *,
                             count*(MPL_MAX(extent,true_extent)), 
                             mpi_errno, "receive buffer");
         recvbuf = (void *)((char*)recvbuf - true_lb);
@@ -420,8 +420,8 @@ static int MPIR_Reduce_redscat_gather (
     /* We allocate these arrays on all processes, even if newrank=-1,
        because if root is one of the excluded processes, we will
        need them on the root later on below. */
-    MPIU_CHKLMEM_MALLOC(cnts, int *, pof2*sizeof(int), mpi_errno, "counts");
-    MPIU_CHKLMEM_MALLOC(disps, int *, pof2*sizeof(int), mpi_errno, "displacements");
+    MPIR_CHKLMEM_MALLOC(cnts, int *, pof2*sizeof(int), mpi_errno, "counts");
+    MPIR_CHKLMEM_MALLOC(disps, int *, pof2*sizeof(int), mpi_errno, "displacements");
     
     if (newrank != -1) {
         for (i=0; i<(pof2-1); i++) 
@@ -646,7 +646,7 @@ static int MPIR_Reduce_redscat_gather (
 
         MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                      MPIR_Per_thread, per_thread, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
         if (per_thread->op_errno) {
             mpi_errno = per_thread->op_errno;
             goto fn_fail;
@@ -655,7 +655,7 @@ static int MPIR_Reduce_redscat_gather (
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -741,7 +741,7 @@ int MPIR_Reduce_intra (
     int comm_size, is_commutative, type_size, pof2;
     int nbytes = 0;
     MPIR_Op *op_ptr;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     if (count == 0) return MPI_SUCCESS;
 
@@ -768,9 +768,9 @@ int MPIR_Reduce_intra (
             MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
             MPID_Datatype_get_extent_macro(datatype, extent);
 
-            MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+            MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
 
-            MPIU_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)),
+            MPIR_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)),
                                 mpi_errno, "temporary buffer");
             /* adjust for potential negative lower bound in datatype */
             tmp_buf = (void *)((char*)tmp_buf - true_lb);
@@ -778,7 +778,7 @@ int MPIR_Reduce_intra (
 
         /* do the intranode reduce on all nodes other than the root's node */
         if (comm_ptr->node_comm != NULL &&
-            MPIU_Get_intranode_rank(comm_ptr, root) == -1) {
+            MPIR_Get_intranode_rank(comm_ptr, root) == -1) {
             mpi_errno = MPID_Reduce(sendbuf, tmp_buf, count, datatype,
                                          op, 0, comm_ptr->node_comm, errflag);
             if (mpi_errno) {
@@ -791,12 +791,12 @@ int MPIR_Reduce_intra (
 
         /* do the internode reduce to the root's node */
         if (comm_ptr->node_roots_comm != NULL) {
-            if (comm_ptr->node_roots_comm->rank != MPIU_Get_internode_rank(comm_ptr, root)) {
+            if (comm_ptr->node_roots_comm->rank != MPIR_Get_internode_rank(comm_ptr, root)) {
                 /* I am not on root's node.  Use tmp_buf if we
                    participated in the first reduce, otherwise use sendbuf */
                 const void *buf = (comm_ptr->node_comm == NULL ? sendbuf : tmp_buf);
                 mpi_errno = MPID_Reduce(buf, NULL, count, datatype,
-                                             op, MPIU_Get_internode_rank(comm_ptr, root),
+                                             op, MPIR_Get_internode_rank(comm_ptr, root),
                                              comm_ptr->node_roots_comm, errflag);
                 if (mpi_errno) {
                     /* for communication errors, just record the error but continue */
@@ -811,7 +811,7 @@ int MPIR_Reduce_intra (
                        Use tmp_buf as recvbuf. */
 
                     mpi_errno = MPID_Reduce(sendbuf, tmp_buf, count, datatype,
-                                                 op, MPIU_Get_internode_rank(comm_ptr, root),
+                                                 op, MPIR_Get_internode_rank(comm_ptr, root),
                                                  comm_ptr->node_roots_comm, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
@@ -827,7 +827,7 @@ int MPIR_Reduce_intra (
                     /* I am the root. in_place is automatically handled. */
 
                     mpi_errno = MPID_Reduce(sendbuf, recvbuf, count, datatype,
-                                                 op, MPIU_Get_internode_rank(comm_ptr, root),
+                                                 op, MPIR_Get_internode_rank(comm_ptr, root),
                                                  comm_ptr->node_roots_comm, errflag);
                     if (mpi_errno) {
                         /* for communication errors, just record the error but continue */
@@ -845,9 +845,9 @@ int MPIR_Reduce_intra (
 
         /* do the intranode reduce on the root's node */
         if (comm_ptr->node_comm != NULL &&
-            MPIU_Get_intranode_rank(comm_ptr, root) != -1) { 
+            MPIR_Get_intranode_rank(comm_ptr, root) != -1) {
             mpi_errno = MPID_Reduce(sendbuf, recvbuf, count, datatype,
-                                         op, MPIU_Get_intranode_rank(comm_ptr, root),
+                                         op, MPIR_Get_intranode_rank(comm_ptr, root),
                                          comm_ptr->node_comm, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
@@ -894,7 +894,7 @@ int MPIR_Reduce_intra (
         
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
 
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
@@ -935,7 +935,7 @@ int MPIR_Reduce_inter (
     MPI_Aint true_extent, true_lb, extent;
     void *tmp_buf=NULL;
     MPIR_Comm *newcomm_ptr = NULL;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     if (root == MPI_PROC_NULL) {
         /* local processes other than root do nothing */
@@ -966,16 +966,16 @@ int MPIR_Reduce_inter (
             MPID_Datatype_get_extent_macro(datatype, extent);
 	    /* I think this is the worse case, so we can avoid an assert() 
 	     * inside the for loop */
-	    /* Should MPIU_CHKLMEM_MALLOC do this? */
-	    MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
-	    MPIU_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "temporary buffer");
+	    /* Should MPIR_CHKLMEM_MALLOC do this? */
+	    MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+	    MPIR_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "temporary buffer");
             /* adjust for potential negative lower bound in datatype */
             tmp_buf = (void *)((char*)tmp_buf - true_lb);
         }
         
         /* Get the local intracommunicator */
         if (!comm_ptr->local_comm) {
-            mpi_errno = MPIR_Setup_intercomm_localcomm( comm_ptr );
+            mpi_errno = MPII_Setup_intercomm_localcomm( comm_ptr );
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
 
@@ -1005,7 +1005,7 @@ int MPIR_Reduce_inter (
     }
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -1133,12 +1133,12 @@ int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datat
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_REDUCE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_REDUCE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_REDUCE);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_REDUCE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -1251,7 +1251,7 @@ int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datat
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_REDUCE);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_REDUCE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/reduce_local.c b/src/mpi/coll/reduce_local.c
index ffa7cf5..547ad56 100644
--- a/src/mpi/coll/reduce_local.c
+++ b/src/mpi/coll/reduce_local.c
@@ -53,7 +53,7 @@ int MPIR_Reduce_local_impl(const void *inbuf, void *inoutbuf, int count, MPI_Dat
 
         MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                      MPIR_Per_thread, per_thread, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
         per_thread->op_errno = MPI_SUCCESS;
     }
 
@@ -96,7 +96,7 @@ int MPIR_Reduce_local_impl(const void *inbuf, void *inoutbuf, int count, MPI_Dat
         if (is_f77_uop) {
             MPI_Fint lcount = (MPI_Fint)count;
             MPI_Fint ldtype = (MPI_Fint)datatype;
-            MPIR_F77_User_function *uop_f77 = (MPIR_F77_User_function *)uop;
+            MPII_F77_User_function *uop_f77 = (MPII_F77_User_function *)uop;
 
             (*uop_f77)((void *) inbuf, inoutbuf, &lcount, &ldtype);
         }
@@ -115,7 +115,7 @@ int MPIR_Reduce_local_impl(const void *inbuf, void *inoutbuf, int count, MPI_Dat
 
         MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                      MPIR_Per_thread, per_thread, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
         if (per_thread->op_errno)
             mpi_errno = per_thread->op_errno;
     }
@@ -161,12 +161,12 @@ Output Parameters:
 int MPI_Reduce_local(const void *inbuf, void *inoutbuf, int count, MPI_Datatype datatype, MPI_Op op)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_REDUCE_LOCAL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_REDUCE_LOCAL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_REDUCE_LOCAL);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_REDUCE_LOCAL);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -203,7 +203,7 @@ int MPI_Reduce_local(const void *inbuf, void *inoutbuf, int count, MPI_Datatype
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_REDUCE_LOCAL);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_REDUCE_LOCAL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/scan.c b/src/mpi/coll/scan.c
index dfaf145..ea26924 100644
--- a/src/mpi/coll/scan.c
+++ b/src/mpi/coll/scan.c
@@ -86,7 +86,7 @@ static int MPIR_Scan_generic (
     MPI_Aint true_extent, true_lb, extent;
     void *partial_scan, *tmp_buf;
     MPIR_Op *op_ptr;
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
     
     if (count == 0) return MPI_SUCCESS;
 
@@ -100,7 +100,7 @@ static int MPIR_Scan_generic (
 
         MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                      MPIR_Per_thread, per_thread, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
         per_thread->op_errno = 0;
     }
 
@@ -119,17 +119,17 @@ static int MPIR_Scan_generic (
     MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
 
     MPID_Datatype_get_extent_macro(datatype, extent);
-    MPIU_CHKLMEM_MALLOC(partial_scan, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "partial_scan");
+    MPIR_CHKLMEM_MALLOC(partial_scan, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "partial_scan");
 
     /* This eventually gets malloc()ed as a temp buffer, not added to
      * any user buffers */
-    MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+    MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
 
     /* adjust for potential negative lower bound in datatype */
     partial_scan = (void *)((char*)partial_scan - true_lb);
     
     /* need to allocate temporary buffer to store incoming data*/
-    MPIU_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "tmp_buf");
+    MPIR_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "tmp_buf");
     
     /* adjust for potential negative lower bound in datatype */
     tmp_buf = (void *)((char*)tmp_buf - true_lb);
@@ -201,7 +201,7 @@ static int MPIR_Scan_generic (
 
         MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                      MPIR_Per_thread, per_thread, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
         if (per_thread->op_errno) {
             mpi_errno = per_thread->op_errno;
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -209,7 +209,7 @@ static int MPIR_Scan_generic (
     }
     
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
@@ -241,7 +241,7 @@ int MPIR_Scan(
 {
     int mpi_errno = MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
-    MPIU_CHKLMEM_DECL(3);
+    MPIR_CHKLMEM_DECL(3);
     int rank = comm_ptr->rank;
     MPI_Status status;
     void *tempbuf = NULL, *localfulldata = NULL, *prefulldata = NULL;
@@ -254,7 +254,7 @@ int MPIR_Scan(
        communicator in which all the nodes contain processes with
        consecutive ranks. */
 
-    if (!MPIR_Comm_is_node_consecutive(comm_ptr)) {
+    if (!MPII_Comm_is_node_consecutive(comm_ptr)) {
         /* We can't use the SMP-aware algorithm, use the generic one */
         return MPIR_Scan_generic(sendbuf, recvbuf, count, datatype, op, comm_ptr, errflag);
     }
@@ -263,20 +263,20 @@ int MPIR_Scan(
 
     MPID_Datatype_get_extent_macro(datatype, extent);
 
-    MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
+    MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent));
 
-    MPIU_CHKLMEM_MALLOC(tempbuf, void *, count*(MPL_MAX(extent, true_extent)),
+    MPIR_CHKLMEM_MALLOC(tempbuf, void *, count*(MPL_MAX(extent, true_extent)),
                         mpi_errno, "temporary buffer");
     tempbuf = (void *)((char*)tempbuf - true_lb);
 
     /* Create prefulldata and localfulldata on local roots of all nodes */
     if (comm_ptr->node_roots_comm != NULL) {
-        MPIU_CHKLMEM_MALLOC(prefulldata, void *, count*(MPL_MAX(extent, true_extent)),
+        MPIR_CHKLMEM_MALLOC(prefulldata, void *, count*(MPL_MAX(extent, true_extent)),
                             mpi_errno, "prefulldata for scan");
         prefulldata = (void *)((char*)prefulldata - true_lb);
 
         if (comm_ptr->node_comm != NULL) {
-            MPIU_CHKLMEM_MALLOC(localfulldata, void *, count*(MPL_MAX(extent, true_extent)),
+            MPIR_CHKLMEM_MALLOC(localfulldata, void *, count*(MPL_MAX(extent, true_extent)),
                                 mpi_errno, "localfulldata for scan");
             localfulldata = (void *)((char*)localfulldata - true_lb);
         }
@@ -320,7 +320,7 @@ int MPIR_Scan(
     }
     else if (comm_ptr->node_roots_comm == NULL && 
              comm_ptr->node_comm != NULL && 
-             MPIU_Get_intranode_rank(comm_ptr, rank) == comm_ptr->node_comm->local_size - 1)
+             MPIR_Get_intranode_rank(comm_ptr, rank) == comm_ptr->node_comm->local_size - 1)
     {
         mpi_errno = MPIC_Send(recvbuf, count, datatype,
                                  0, MPIR_SCAN_TAG, comm_ptr->node_comm, errflag);
@@ -351,11 +351,11 @@ int MPIR_Scan(
             MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
 
-        if (MPIU_Get_internode_rank(comm_ptr, rank) != 
+        if (MPIR_Get_internode_rank(comm_ptr, rank) !=
             comm_ptr->node_roots_comm->local_size-1)
         {
             mpi_errno = MPIC_Send(prefulldata, count, datatype,
-                                     MPIU_Get_internode_rank(comm_ptr, rank) + 1,
+                                     MPIR_Get_internode_rank(comm_ptr, rank) + 1,
                                      MPIR_SCAN_TAG, comm_ptr->node_roots_comm, errflag);
             if (mpi_errno) {
                 /* for communication errors, just record the error but continue */
@@ -364,10 +364,10 @@ int MPIR_Scan(
                 MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
             }
         }
-        if (MPIU_Get_internode_rank(comm_ptr, rank) != 0)
+        if (MPIR_Get_internode_rank(comm_ptr, rank) != 0)
         {
             mpi_errno = MPIC_Recv(tempbuf, count, datatype,
-                                     MPIU_Get_internode_rank(comm_ptr, rank) - 1,
+                                     MPIR_Get_internode_rank(comm_ptr, rank) - 1,
                                      MPIR_SCAN_TAG, comm_ptr->node_roots_comm, &status, errflag);
             noneed = 0;
             if (mpi_errno) {
@@ -413,7 +413,7 @@ int MPIR_Scan(
 
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -497,12 +497,12 @@ int MPI_Scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatyp
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_SCAN);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_SCAN);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_SCAN);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_SCAN);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -572,7 +572,7 @@ int MPI_Scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatyp
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_SCAN);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_SCAN);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/scatter.c b/src/mpi/coll/scatter.c
index 8a9d6ea..9842e49 100644
--- a/src/mpi/coll/scatter.c
+++ b/src/mpi/coll/scatter.c
@@ -85,7 +85,7 @@ int MPIR_Scatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype
     void *tmp_buf=NULL;
     int        mpi_errno = MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
-    MPIU_CHKLMEM_DECL(4);
+    MPIR_CHKLMEM_DECL(4);
 
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
@@ -115,14 +115,14 @@ int MPIR_Scatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype
                in the event of recvbuf=MPI_IN_PLACE on the root,
                recvcount and recvtype are not valid */
             MPID_Datatype_get_size_macro(sendtype, sendtype_size);
-            MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
+            MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
 					     extent*sendcount*comm_size);
 
             nbytes = sendtype_size * sendcount;
         }
         else {
             MPID_Datatype_get_size_macro(recvtype, recvtype_size);
-            MPIU_Ensure_Aint_fits_in_pointer(extent*recvcount*comm_size);
+            MPIR_Ensure_Aint_fits_in_pointer(extent*recvcount*comm_size);
             nbytes = recvtype_size * recvcount;
         }
         
@@ -132,7 +132,7 @@ int MPIR_Scatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype
            receive data of max size (nbytes*comm_size)/2 */
         if (relative_rank && !(relative_rank % 2)) {
 	    tmp_buf_size = (nbytes*comm_size)/2;
-            MPIU_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
+            MPIR_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
         }
         
         /* if the root is not rank 0, we reorder the sendbuf in order of
@@ -142,7 +142,7 @@ int MPIR_Scatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype
         if (rank == root) {
             if (root != 0) {
 		tmp_buf_size = nbytes*comm_size;
-                MPIU_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
+                MPIR_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
 
                 if (recvbuf != MPI_IN_PLACE)
                     mpi_errno = MPIR_Localcopy(((char *) sendbuf + extent*sendcount*rank),
@@ -271,7 +271,7 @@ int MPIR_Scatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype
         if (rank == root) {
             MPIR_Pack_size_impl(sendcount*comm_size, sendtype, &tmp_buf_size);
 
-            MPIU_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
+            MPIR_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
 
           /* calculate the value of nbytes, the number of bytes in packed
              representation that each process receives. We can't
@@ -326,7 +326,7 @@ int MPIR_Scatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype
         }
         else {
             MPIR_Pack_size_impl(recvcount*(comm_size/2), recvtype, &tmp_buf_size);
-            MPIU_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
+            MPIR_CHKLMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf");
 
             /* calculate nbytes */
             position = 0;
@@ -398,7 +398,7 @@ int MPIR_Scatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype
 #endif /* MPID_HAS_HETERO */
     
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -435,7 +435,7 @@ int MPIR_Scatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype
     MPI_Aint extent, true_extent, true_lb = 0;
     void *tmp_buf=NULL;
     MPIR_Comm *newcomm_ptr = NULL;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     if (root == MPI_PROC_NULL) {
         /* local processes other than root do nothing */
@@ -478,11 +478,11 @@ int MPIR_Scatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype
                 MPIR_Type_get_true_extent_impl(recvtype, &true_lb, &true_extent);
 
                 MPID_Datatype_get_extent_macro(recvtype, extent);
-		MPIU_Ensure_Aint_fits_in_pointer(extent*recvcount*local_size);
-		MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
+		MPIR_Ensure_Aint_fits_in_pointer(extent*recvcount*local_size);
+		MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
 						 sendcount*remote_size*extent);
 
-                MPIU_CHKLMEM_MALLOC(tmp_buf, void *, recvcount*local_size*(MPL_MAX(extent,true_extent)), mpi_errno, "tmp_buf");
+                MPIR_CHKLMEM_MALLOC(tmp_buf, void *, recvcount*local_size*(MPL_MAX(extent,true_extent)), mpi_errno, "tmp_buf");
                 
                 /* adjust for potential negative lower bound in datatype */
                 tmp_buf = (void *)((char*)tmp_buf - true_lb);
@@ -500,7 +500,7 @@ int MPIR_Scatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype
             
             /* Get the local intracommunicator */
             if (!comm_ptr->local_comm)
-                MPIR_Setup_intercomm_localcomm( comm_ptr );
+                MPII_Setup_intercomm_localcomm( comm_ptr );
             
             newcomm_ptr = comm_ptr->local_comm;
             
@@ -545,7 +545,7 @@ int MPIR_Scatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype
     }
 
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -666,12 +666,12 @@ int MPI_Scatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_SCATTER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_SCATTER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_SCATTER);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_SCATTER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -784,7 +784,7 @@ int MPI_Scatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_SCATTER);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_SCATTER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/coll/scatterv.c b/src/mpi/coll/scatterv.c
index 5db178c..fc80200 100644
--- a/src/mpi/coll/scatterv.c
+++ b/src/mpi/coll/scatterv.c
@@ -63,7 +63,7 @@ int MPIR_Scatterv(const void *sendbuf, const int *sendcounts, const int *displs,
     int      i, reqs;
     MPIR_Request **reqarray;
     MPI_Status *starray;
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
 
     rank = comm_ptr->rank;
     
@@ -82,10 +82,10 @@ int MPIR_Scatterv(const void *sendbuf, const int *sendcounts, const int *displs,
          * a minimal sanity check. Maybe add a global var since we do
          * loop over sendcount[] in MPI_Scatterv before calling
          * this? */
-        MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf + extent);
+        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf + extent);
 
-        MPIU_CHKLMEM_MALLOC(reqarray, MPIR_Request **, comm_size * sizeof(MPIR_Request *), mpi_errno, "reqarray");
-        MPIU_CHKLMEM_MALLOC(starray, MPI_Status *, comm_size * sizeof(MPI_Status), mpi_errno, "starray");
+        MPIR_CHKLMEM_MALLOC(reqarray, MPIR_Request **, comm_size * sizeof(MPIR_Request *), mpi_errno, "reqarray");
+        MPIR_CHKLMEM_MALLOC(starray, MPI_Status *, comm_size * sizeof(MPI_Status), mpi_errno, "starray");
 
         reqs = 0;
         for (i = 0; i < comm_size; i++) {
@@ -141,7 +141,7 @@ int MPIR_Scatterv(const void *sendbuf, const int *sendcounts, const int *displs,
     
     
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -230,12 +230,12 @@ int MPI_Scatterv(const void *sendbuf, const int *sendcounts, const int *displs,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_SCATTERV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_SCATTERV);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_SCATTERV);
+    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_SCATTERV);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -369,7 +369,7 @@ int MPI_Scatterv(const void *sendbuf, const int *sendcounts, const int *displs,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_SCATTERV);
+    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_SCATTERV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_agree.c b/src/mpi/comm/comm_agree.c
index 6f1521b..3c19c94 100644
--- a/src/mpi/comm/comm_agree.c
+++ b/src/mpi/comm/comm_agree.c
@@ -39,8 +39,8 @@ int MPIR_Comm_agree(MPIR_Comm *comm_ptr, int *flag)
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
     int values[2];
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_AGREE);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_AGREE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_AGREE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_AGREE);
 
     MPIR_Comm_group_impl(comm_ptr, &comm_grp);
 
@@ -100,7 +100,7 @@ int MPIR_Comm_agree(MPIR_Comm *comm_ptr, int *flag)
     }
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_AGREE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_AGREE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -132,12 +132,12 @@ int MPIX_Comm_agree(MPI_Comm comm, int *flag)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIX_COMM_AGREE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIX_COMM_AGREE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIX_COMM_AGREE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIX_COMM_AGREE);
 
     /* Validate parameters, and convert MPI object handles to object pointers */
 #   ifdef HAVE_ERROR_CHECKING
@@ -171,7 +171,7 @@ int MPIX_Comm_agree(MPI_Comm comm, int *flag)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIX_COMM_AGREE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIX_COMM_AGREE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_compare.c b/src/mpi/comm/comm_compare.c
index c97368a..274e63e 100644
--- a/src/mpi/comm/comm_compare.c
+++ b/src/mpi/comm/comm_compare.c
@@ -73,12 +73,12 @@ int MPI_Comm_compare(MPI_Comm comm1, MPI_Comm comm2, int *result)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr1 = NULL;
     MPIR_Comm *comm_ptr2 = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_COMPARE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_COMPARE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_COMPARE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_COMPARE);
 
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -179,7 +179,7 @@ int MPI_Comm_compare(MPI_Comm comm1, MPI_Comm comm2, int *result)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_COMPARE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_COMPARE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/comm/comm_create.c b/src/mpi/comm/comm_create.c
index f52467c..d75f825 100644
--- a/src/mpi/comm/comm_create.c
+++ b/src/mpi/comm/comm_create.c
@@ -38,10 +38,10 @@ PMPI_LOCAL int MPIR_Comm_create_inter(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr
  * Ownership of the (*mapping_out) array is transferred to the caller who is
  * responsible for freeing it. */
 #undef FUNCNAME
-#define FUNCNAME MPIR_Comm_create_calculate_mapping
+#define FUNCNAME MPII_Comm_create_calculate_mapping
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Comm_create_calculate_mapping(MPIR_Group  *group_ptr,
+int MPII_Comm_create_calculate_mapping(MPIR_Group  *group_ptr,
                                        MPIR_Comm   *comm_ptr,
                                        int        **mapping_out,
                                        MPIR_Comm **mapping_comm)
@@ -51,16 +51,16 @@ int MPIR_Comm_create_calculate_mapping(MPIR_Group  *group_ptr,
     int i, j;
     int n;
     int *mapping=0;
-    MPIU_CHKPMEM_DECL(1);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_CALCULATE_MAPPING);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_CALCULATE_MAPPING);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_CREATE_CALCULATE_MAPPING);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_CREATE_CALCULATE_MAPPING);
 
     *mapping_out = NULL;
     *mapping_comm = comm_ptr;
 
     n = group_ptr->size;
-    MPIU_CHKPMEM_MALLOC(mapping,int*,n*sizeof(int),mpi_errno,"mapping");
+    MPIR_CHKPMEM_MALLOC(mapping,int*,n*sizeof(int),mpi_errno,"mapping");
 
     /* Make sure that the processes for this group are contained within
        the input communicator.  Also identify the mapping from the ranks of
@@ -76,7 +76,7 @@ int MPIR_Comm_create_calculate_mapping(MPIR_Group  *group_ptr,
     */
 
     /* we examine the group's lpids in both the intracomm and non-comm_world cases */
-    MPIR_Group_setup_lpid_list( group_ptr );
+    MPII_Group_setup_lpid_list( group_ptr );
 
     /* Optimize for groups contained within MPI_COMM_WORLD. */
     if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) {
@@ -134,16 +134,16 @@ int MPIR_Comm_create_calculate_mapping(MPIR_Group  *group_ptr,
         }
     }
 
-    MPIU_Assert(mapping != NULL);
+    MPIR_Assert(mapping != NULL);
     *mapping_out     = mapping;
     MPL_VG_CHECK_MEM_IS_DEFINED(*mapping_out, n * sizeof(**mapping_out));
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_CREATE_CALCULATE_MAPPING);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_CREATE_CALCULATE_MAPPING);
     return mpi_errno;
 fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -155,10 +155,10 @@ fn_fail:
  * function.  The caller is responsible for their storage and will
  * need to retain them till Comm_commit. */
 #undef FUNCNAME
-#define FUNCNAME MPIR_Comm_create_map
+#define FUNCNAME MPII_Comm_create_map
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Comm_create_map(int         local_n,
+int MPII_Comm_create_map(int         local_n,
                          int         remote_n,
                          int        *local_mapping,
                          int        *remote_mapping,
@@ -191,14 +191,14 @@ int MPIR_Comm_create_intra(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr,
                            MPIR_Comm **newcomm_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIU_Context_id_t new_context_id = 0;
+    MPIR_Context_id_t new_context_id = 0;
     int *mapping = NULL;
     int n;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_INTRA);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_INTRA);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_CREATE_INTRA);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_CREATE_INTRA);
 
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
 
     n = group_ptr->size;
     *newcomm_ptr = NULL;
@@ -213,12 +213,12 @@ int MPIR_Comm_create_intra(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr,
     mpi_errno = MPIR_Get_contextid_sparse( comm_ptr, &new_context_id,
                                            group_ptr->rank == MPI_UNDEFINED );
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_Assert(new_context_id != 0);
+    MPIR_Assert(new_context_id != 0);
 
     if (group_ptr->rank != MPI_UNDEFINED) {
         MPIR_Comm *mapping_comm = NULL;
 
-        mpi_errno = MPIR_Comm_create_calculate_mapping(group_ptr, comm_ptr,
+        mpi_errno = MPII_Comm_create_calculate_mapping(group_ptr, comm_ptr,
                                                        &mapping, &mapping_comm);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
@@ -243,7 +243,7 @@ int MPIR_Comm_create_intra(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr,
 
         /* Setup the communicator's network address mapping.  This is for the remote group,
            which is the same as the local group for intracommunicators */
-        mpi_errno = MPIR_Comm_create_map(n, 0,
+        mpi_errno = MPII_Comm_create_map(n, 0,
                                          mapping,
                                          NULL,
                                          mapping_comm,
@@ -262,7 +262,7 @@ fn_exit:
     if (mapping)
         MPL_free(mapping);
 
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_CREATE_INTRA);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_CREATE_INTRA);
     return mpi_errno;
 fn_fail:
     /* --BEGIN ERROR HANDLING-- */
@@ -287,19 +287,19 @@ PMPI_LOCAL int MPIR_Comm_create_inter(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr
                                       MPIR_Comm **newcomm_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIU_Context_id_t new_context_id;
+    MPIR_Context_id_t new_context_id;
     int *mapping = NULL;
     int *remote_mapping = NULL;
     MPIR_Comm *mapping_comm = NULL;
     int remote_size = -1;
     int rinfo[2];
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPIU_CHKLMEM_DECL(1);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_INTER);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_INTER);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_CREATE_INTER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_CREATE_INTER);
 
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM);
 
     /* Create a new communicator from the specified group members */
 
@@ -311,14 +311,14 @@ PMPI_LOCAL int MPIR_Comm_create_inter(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr
     /* In the multi-threaded case, MPIR_Get_contextid_sparse assumes that the
        calling routine already holds the single criticial section */
     if (!comm_ptr->local_comm) {
-        MPIR_Setup_intercomm_localcomm( comm_ptr );
+        MPII_Setup_intercomm_localcomm( comm_ptr );
     }
     mpi_errno = MPIR_Get_contextid_sparse( comm_ptr->local_comm, &new_context_id, FALSE );
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_Assert(new_context_id != 0);
-    MPIU_Assert(new_context_id != comm_ptr->recvcontext_id);
+    MPIR_Assert(new_context_id != 0);
+    MPIR_Assert(new_context_id != comm_ptr->recvcontext_id);
 
-    mpi_errno = MPIR_Comm_create_calculate_mapping(group_ptr, comm_ptr, 
+    mpi_errno = MPII_Comm_create_calculate_mapping(group_ptr, comm_ptr,
 						   &mapping, &mapping_comm);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
@@ -366,7 +366,7 @@ PMPI_LOCAL int MPIR_Comm_create_inter(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr
         }
         remote_size = rinfo[1];
 
-        MPIU_CHKLMEM_MALLOC(remote_mapping,int*,
+        MPIR_CHKLMEM_MALLOC(remote_mapping,int*,
                             remote_size*sizeof(int),
                             mpi_errno,"remote_mapping");
 
@@ -396,7 +396,7 @@ PMPI_LOCAL int MPIR_Comm_create_inter(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr
             (*newcomm_ptr)->context_id = rinfo[0];
         }
         remote_size = rinfo[1];
-        MPIU_CHKLMEM_MALLOC(remote_mapping,int*,
+        MPIR_CHKLMEM_MALLOC(remote_mapping,int*,
                             remote_size*sizeof(int),
                             mpi_errno,"remote_mapping");
         mpi_errno = MPID_Bcast( remote_mapping, remote_size, MPI_INT, 0,
@@ -405,7 +405,7 @@ PMPI_LOCAL int MPIR_Comm_create_inter(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr
         MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
     }
 
-    MPIU_Assert(remote_size >= 0);
+    MPIR_Assert(remote_size >= 0);
 
     if (group_ptr->rank != MPI_UNDEFINED) {
         (*newcomm_ptr)->remote_size    = remote_size;
@@ -413,7 +413,7 @@ PMPI_LOCAL int MPIR_Comm_create_inter(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr
            the network address mapping. */
 
         /* Setup the communicator's network addresses from the local mapping. */
-        mpi_errno = MPIR_Comm_create_map(group_ptr->size,
+        mpi_errno = MPII_Comm_create_map(group_ptr->size,
                                          remote_size,
                                          mapping,
                                          remote_mapping,
@@ -444,11 +444,11 @@ PMPI_LOCAL int MPIR_Comm_create_inter(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr
     }
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     if (mapping)
         MPL_free(mapping);
 
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_CREATE_INTER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_CREATE_INTER);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -487,12 +487,12 @@ int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL, *newcomm_ptr;
     MPIR_Group *group_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_CREATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_CREATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_CREATE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_CREATE);
 
     /* Validate parameters, and convert MPI object handles to object pointers */
 #   ifdef HAVE_ERROR_CHECKING
@@ -541,7 +541,7 @@ int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm)
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
     else {
-        MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM);
+        MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM);
         mpi_errno = MPIR_Comm_create_inter(comm_ptr, group_ptr, &newcomm_ptr);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
@@ -552,7 +552,7 @@ int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_CREATE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_CREATE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_create_group.c b/src/mpi/comm/comm_create_group.c
index ae41026..9476af9 100644
--- a/src/mpi/comm/comm_create_group.c
+++ b/src/mpi/comm/comm_create_group.c
@@ -37,14 +37,14 @@ int MPIR_Comm_create_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr, int tag
                            MPIR_Comm ** newcomm_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIU_Context_id_t new_context_id = 0;
+    MPIR_Context_id_t new_context_id = 0;
     int *mapping = NULL;
     int n;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_GROUP);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_CREATE_GROUP);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_GROUP);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_CREATE_GROUP);
 
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
 
     n = group_ptr->size;
     *newcomm_ptr = NULL;
@@ -59,9 +59,9 @@ int MPIR_Comm_create_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr, int tag
 
         mpi_errno = MPIR_Get_contextid_sparse_group( comm_ptr, group_ptr, tag, &new_context_id, 0 );
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPIU_Assert(new_context_id != 0);
+        MPIR_Assert(new_context_id != 0);
 
-        mpi_errno = MPIR_Comm_create_calculate_mapping(group_ptr, comm_ptr, 
+        mpi_errno = MPII_Comm_create_calculate_mapping(group_ptr, comm_ptr,
                                                        &mapping, &mapping_comm);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
@@ -86,7 +86,7 @@ int MPIR_Comm_create_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr, int tag
 
         /* Setup the communicator's vc table.  This is for the remote group,
            which is the same as the local group for intracommunicators */
-        mpi_errno = MPIR_Comm_create_map(n, 0,
+        mpi_errno = MPII_Comm_create_map(n, 0,
                                          mapping,
                                          NULL,
                                          mapping_comm,
@@ -105,7 +105,7 @@ fn_exit:
     if (mapping)
         MPL_free(mapping);
 
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_CREATE_GROUP);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_CREATE_GROUP);
     return mpi_errno;
 fn_fail:
     /* --BEGIN ERROR HANDLING-- */
@@ -154,12 +154,12 @@ int MPI_Comm_create_group(MPI_Comm comm, MPI_Group group, int tag, MPI_Comm * ne
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL, *newcomm_ptr;
     MPIR_Group *group_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_CREATE_GROUP);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_CREATE_GROUP);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_CREATE_GROUP);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_CREATE_GROUP);
 
     /* Validate parameters, and convert MPI object handles to object pointers */
 #   ifdef HAVE_ERROR_CHECKING
@@ -215,7 +215,7 @@ int MPI_Comm_create_group(MPI_Comm comm, MPI_Group group, int tag, MPI_Comm * ne
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_CREATE_GROUP);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_CREATE_GROUP);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_dup.c b/src/mpi/comm/comm_dup.c
index 4b08f0c..dfc080f 100644
--- a/src/mpi/comm/comm_dup.c
+++ b/src/mpi/comm/comm_dup.c
@@ -53,7 +53,7 @@ int MPIR_Comm_dup_impl(MPIR_Comm *comm_ptr, MPIR_Comm **newcomm_ptr)
     /* We must use the local size, because this is compared to the
        rank of the process in the communicator.  For intercomms,
        this must be the local size */
-    mpi_errno = MPIR_Comm_copy( comm_ptr, comm_ptr->local_size, newcomm_ptr );
+    mpi_errno = MPII_Comm_copy( comm_ptr, comm_ptr->local_size, newcomm_ptr );
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
     (*newcomm_ptr)->attributes = new_attributes;
@@ -120,12 +120,12 @@ int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL, *newcomm_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_DUP);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_DUP);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_DUP);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_DUP);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -165,7 +165,7 @@ int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_DUP);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_DUP);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/comm/comm_dup_with_info.c b/src/mpi/comm/comm_dup_with_info.c
index 647c57a..03ba68a 100644
--- a/src/mpi/comm/comm_dup_with_info.c
+++ b/src/mpi/comm/comm_dup_with_info.c
@@ -90,12 +90,12 @@ int MPI_Comm_dup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm * newcomm)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL, *newcomm_ptr;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_DUP_WITH_INFO);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_DUP_WITH_INFO);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_DUP_WITH_INFO);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_DUP_WITH_INFO);
 
     /* Validate parameters, especially handles needing to be converted */
 #ifdef HAVE_ERROR_CHECKING
@@ -136,7 +136,7 @@ int MPI_Comm_dup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm * newcomm)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_DUP_WITH_INFO);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_DUP_WITH_INFO);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_failure_ack.c b/src/mpi/comm/comm_failure_ack.c
index 848bfad..ba1fd40 100644
--- a/src/mpi/comm/comm_failure_ack.c
+++ b/src/mpi/comm/comm_failure_ack.c
@@ -54,12 +54,12 @@ int MPIX_Comm_failure_ack( MPI_Comm comm )
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIX_COMM_FAILURE_ACK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIX_COMM_FAILURE_ACK);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIX_COMM_FAILURE_ACK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIX_COMM_FAILURE_ACK);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -97,7 +97,7 @@ int MPIX_Comm_failure_ack( MPI_Comm comm )
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIX_COMM_FAILURE_ACK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIX_COMM_FAILURE_ACK);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_failure_get_acked.c b/src/mpi/comm/comm_failure_get_acked.c
index 0277768..a7e53db 100644
--- a/src/mpi/comm/comm_failure_get_acked.c
+++ b/src/mpi/comm/comm_failure_get_acked.c
@@ -58,12 +58,12 @@ int MPIX_Comm_failure_get_acked( MPI_Comm comm, MPI_Group *failedgrp )
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Group *group_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIX_COMM_FAILURE_GET_ACKED);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIX_COMM_FAILURE_GET_ACKED);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIX_COMM_FAILURE_GET_ACKED);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIX_COMM_FAILURE_GET_ACKED);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -101,7 +101,7 @@ int MPIX_Comm_failure_get_acked( MPI_Comm comm, MPI_Group *failedgrp )
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIX_COMM_FAILURE_GET_ACKED);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIX_COMM_FAILURE_GET_ACKED);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_free.c b/src/mpi/comm/comm_free.c
index 07ba3fb..d6354a0 100644
--- a/src/mpi/comm/comm_free.c
+++ b/src/mpi/comm/comm_free.c
@@ -78,12 +78,12 @@ int MPI_Comm_free(MPI_Comm *comm)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_FREE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_FREE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_FREE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_FREE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -131,7 +131,7 @@ int MPI_Comm_free(MPI_Comm *comm)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_FREE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_FREE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_get_info.c b/src/mpi/comm/comm_get_info.c
index d4bf586..8858ce4 100644
--- a/src/mpi/comm/comm_get_info.c
+++ b/src/mpi/comm/comm_get_info.c
@@ -35,7 +35,7 @@ int MPIR_Comm_get_info_impl(MPIR_Comm * comm_ptr, MPIR_Info ** info_p_p)
     int mpi_errno = MPI_SUCCESS;
 
     /* Allocate an empty info object */
-    mpi_errno = MPIU_Info_alloc(info_p_p);
+    mpi_errno = MPIR_Info_alloc(info_p_p);
     if (mpi_errno != MPI_SUCCESS)
         goto fn_fail;
 
@@ -80,12 +80,12 @@ int MPI_Comm_get_info(MPI_Comm comm, MPI_Info * info_used)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Info *info_used_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_GET_INFO);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_GET_INFO);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_GET_INFO);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_GET_INFO);
 
     /* Validate parameters, especially handles needing to be converted */
 #ifdef HAVE_ERROR_CHECKING
@@ -125,7 +125,7 @@ int MPI_Comm_get_info(MPI_Comm comm, MPI_Info * info_used)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_GET_INFO);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_GET_INFO);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_get_name.c b/src/mpi/comm/comm_get_name.c
index f55c68b..b6dc89d 100644
--- a/src/mpi/comm/comm_get_name.c
+++ b/src/mpi/comm/comm_get_name.c
@@ -67,11 +67,11 @@ int MPI_Comm_get_name(MPI_Comm comm, char *comm_name, int *resultlen)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_GET_NAME);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_GET_NAME);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_GET_NAME);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_GET_NAME);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -112,7 +112,7 @@ int MPI_Comm_get_name(MPI_Comm comm, char *comm_name, int *resultlen)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_GET_NAME);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_GET_NAME);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/comm/comm_group.c b/src/mpi/comm/comm_group.c
index 4af1e6d..5b502e3 100644
--- a/src/mpi/comm/comm_group.c
+++ b/src/mpi/comm/comm_group.c
@@ -35,9 +35,9 @@ int MPIR_Comm_group_impl(MPIR_Comm *comm_ptr, MPIR_Group **group_ptr)
     int mpi_errno = MPI_SUCCESS;
     int i, lpid, n;
     int comm_world_size = MPIR_Process.comm_world->local_size;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_GROUP_IMPL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_GROUP_IMPL);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_GROUP_IMPL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_GROUP_IMPL);
     /* Create a group if necessary and populate it with the
        local process ids */
     if (!comm_ptr->local_group) {
@@ -72,7 +72,7 @@ int MPIR_Comm_group_impl(MPIR_Comm *comm_ptr, MPIR_Group **group_ptr)
     MPIR_Group_add_ref( comm_ptr->local_group );
 
  fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_GROUP_IMPL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_GROUP_IMPL);
     return mpi_errno;
  fn_fail:
 
@@ -111,12 +111,12 @@ int MPI_Comm_group(MPI_Comm comm, MPI_Group *group)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Group *group_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_GROUP);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_GROUP);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_GROUP);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_GROUP);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -155,7 +155,7 @@ int MPI_Comm_group(MPI_Comm comm, MPI_Group *group)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_GROUP);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_GROUP);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_idup.c b/src/mpi/comm/comm_idup.c
index 240c226..75c427b 100644
--- a/src/mpi/comm/comm_idup.c
+++ b/src/mpi/comm/comm_idup.c
@@ -48,7 +48,7 @@ int MPIR_Comm_idup_impl(MPIR_Comm *comm_ptr, MPIR_Comm **newcommp, MPIR_Request
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
-    mpi_errno = MPIR_Comm_copy_data(comm_ptr, newcommp);
+    mpi_errno = MPII_Comm_copy_data(comm_ptr, newcommp);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
     (*newcommp)->attributes = new_attributes;
@@ -98,10 +98,10 @@ int MPI_Comm_idup(MPI_Comm comm, MPI_Comm *newcomm, MPI_Request *request)
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Comm *newcomm_ptr = NULL;
     MPIR_Request *dreq = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_IDUP);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_IDUP);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_IDUP);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_IDUP);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -149,7 +149,7 @@ int MPI_Comm_idup(MPI_Comm comm, MPI_Comm *newcomm, MPI_Request *request)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_IDUP);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_IDUP);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_rank.c b/src/mpi/comm/comm_rank.c
index 8df6f41..8c4b2e1 100644
--- a/src/mpi/comm/comm_rank.c
+++ b/src/mpi/comm/comm_rank.c
@@ -53,12 +53,12 @@ int MPI_Comm_rank( MPI_Comm comm, int *rank )
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = 0;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_RANK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_RANK);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_RANK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_RANK);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -98,7 +98,7 @@ int MPI_Comm_rank( MPI_Comm comm, int *rank )
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_RANK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_RANK);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_remote_group.c b/src/mpi/comm/comm_remote_group.c
index bfb28ad..0e4ee4e 100644
--- a/src/mpi/comm/comm_remote_group.c
+++ b/src/mpi/comm/comm_remote_group.c
@@ -34,9 +34,9 @@ int MPIR_Comm_remote_group_impl(MPIR_Comm *comm_ptr, MPIR_Group **group_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
     int i, lpid, n;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_REMOTE_GROUP_IMPL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_REMOTE_GROUP_IMPL);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_REMOTE_GROUP_IMPL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_REMOTE_GROUP_IMPL);
     /* Create a group and populate it with the local process ids */
     if (!comm_ptr->remote_group) {
         n = comm_ptr->remote_size;
@@ -59,7 +59,7 @@ int MPIR_Comm_remote_group_impl(MPIR_Comm *comm_ptr, MPIR_Group **group_ptr)
     MPIR_Group_add_ref( comm_ptr->remote_group );
 
  fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_REMOTE_GROUP_IMPL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_REMOTE_GROUP_IMPL);
     return mpi_errno;
  fn_fail:
 
@@ -101,12 +101,12 @@ int MPI_Comm_remote_group(MPI_Comm comm, MPI_Group *group)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Group *group_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_REMOTE_GROUP);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_REMOTE_GROUP);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_REMOTE_GROUP);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_REMOTE_GROUP);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -151,7 +151,7 @@ int MPI_Comm_remote_group(MPI_Comm comm, MPI_Group *group)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_REMOTE_GROUP);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_REMOTE_GROUP);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_remote_size.c b/src/mpi/comm/comm_remote_size.c
index 86bb4d8..c99855e 100644
--- a/src/mpi/comm/comm_remote_size.c
+++ b/src/mpi/comm/comm_remote_size.c
@@ -56,11 +56,11 @@ int MPI_Comm_remote_size(MPI_Comm comm, int *size)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_REMOTE_SIZE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_REMOTE_SIZE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_REMOTE_SIZE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_REMOTE_SIZE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -104,7 +104,7 @@ int MPI_Comm_remote_size(MPI_Comm comm, int *size)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_REMOTE_SIZE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_REMOTE_SIZE);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/comm/comm_revoke.c b/src/mpi/comm/comm_revoke.c
index 1c0c623..330e3b6 100644
--- a/src/mpi/comm/comm_revoke.c
+++ b/src/mpi/comm/comm_revoke.c
@@ -54,12 +54,12 @@ int MPIX_Comm_revoke(MPI_Comm comm)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIX_COMM_REVOKE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIX_COMM_REVOKE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIX_COMM_REVOKE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIX_COMM_REVOKE);
 
     /* Validate parameters, especially handles needing to be converted */
 #    ifdef HAVE_ERROR_CHECKING
@@ -96,7 +96,7 @@ int MPIX_Comm_revoke(MPI_Comm comm)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIX_COMM_REVOKE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIX_COMM_REVOKE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
   fn_fail:
diff --git a/src/mpi/comm/comm_set_info.c b/src/mpi/comm/comm_set_info.c
index 493577c..ee03e8e 100644
--- a/src/mpi/comm/comm_set_info.c
+++ b/src/mpi/comm/comm_set_info.c
@@ -35,17 +35,17 @@ int MPIR_Comm_set_info_impl(MPIR_Comm * comm_ptr, MPIR_Info * info_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Info *curr_info = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_SET_INFO_IMPL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_SET_INFO_IMPL);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_SET_INFO_IMPL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_SET_INFO_IMPL);
 
-    mpi_errno = MPIR_Comm_apply_hints(comm_ptr, info_ptr);
+    mpi_errno = MPII_Comm_apply_hints(comm_ptr, info_ptr);
     if (mpi_errno != MPI_SUCCESS)
         goto fn_fail;
 
     if (comm_ptr->info == NULL) {
         /* Always have at least a blank info hint. */
-        mpi_errno = MPIU_Info_alloc(&(comm_ptr->info));
+        mpi_errno = MPIR_Info_alloc(&(comm_ptr->info));
         if (mpi_errno != MPI_SUCCESS)
             goto fn_fail;
     }
@@ -62,7 +62,7 @@ int MPIR_Comm_set_info_impl(MPIR_Comm * comm_ptr, MPIR_Info * info_ptr)
     }
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_SET_INFO_IMPL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_SET_INFO_IMPL);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -100,12 +100,12 @@ int MPI_Comm_set_info(MPI_Comm comm, MPI_Info info)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_SET_INFO);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_SET_INFO);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_SET_INFO);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_SET_INFO);
 
     /* Validate parameters, especially handles needing to be converted */
 #ifdef HAVE_ERROR_CHECKING
@@ -142,7 +142,7 @@ int MPI_Comm_set_info(MPI_Comm comm, MPI_Info info)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_SET_INFO);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_SET_INFO);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_set_name.c b/src/mpi/comm/comm_set_name.c
index ae22f4c..6510c95 100644
--- a/src/mpi/comm/comm_set_name.c
+++ b/src/mpi/comm/comm_set_name.c
@@ -51,11 +51,11 @@ int MPI_Comm_set_name(MPI_Comm comm, const char *comm_name)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_SET_NAME);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_SET_NAME);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_SET_NAME);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_SET_NAME);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -97,7 +97,7 @@ int MPI_Comm_set_name(MPI_Comm comm, const char *comm_name)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_SET_NAME);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_SET_NAME);
     return mpi_errno;
     
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/comm/comm_shrink.c b/src/mpi/comm/comm_shrink.c
index f327937..61cce4a 100644
--- a/src/mpi/comm/comm_shrink.c
+++ b/src/mpi/comm/comm_shrink.c
@@ -54,8 +54,8 @@ int MPIR_Comm_shrink(MPIR_Comm *comm_ptr, MPIR_Comm **newcomm_ptr)
     int attempts = 0;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_SHRINK);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_SHRINK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_SHRINK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_SHRINK);
 
     /* TODO - Implement this function for intercommunicators */
     MPIR_Comm_group_impl(comm_ptr, &comm_grp);
@@ -84,12 +84,12 @@ int MPIR_Comm_shrink(MPIR_Comm *comm_ptr, MPIR_Comm **newcomm_ptr)
         MPIR_Group_release(new_group_ptr);
 
         if (errflag) {
-            if (*newcomm_ptr != NULL && MPIU_Object_get_ref(*newcomm_ptr) > 0) {
-                MPIU_Object_set_ref(*newcomm_ptr, 1);
+            if (*newcomm_ptr != NULL && MPIR_Object_get_ref(*newcomm_ptr) > 0) {
+                MPIR_Object_set_ref(*newcomm_ptr, 1);
                 MPIR_Comm_release(*newcomm_ptr);
             }
-            if (MPIU_Object_get_ref(new_group_ptr) > 0) {
-                MPIU_Object_set_ref(new_group_ptr, 1);
+            if (MPIR_Object_get_ref(new_group_ptr) > 0) {
+                MPIR_Object_set_ref(new_group_ptr, 1);
                 MPIR_Group_release(new_group_ptr);
             }
         }
@@ -100,12 +100,12 @@ int MPIR_Comm_shrink(MPIR_Comm *comm_ptr, MPIR_Comm **newcomm_ptr)
 
   fn_exit:
     MPIR_Group_release(comm_grp);
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_SHRINK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_SHRINK);
     return mpi_errno;
   fn_fail:
-    if (*newcomm_ptr) MPIU_Object_set_ref(*newcomm_ptr, 0);
-    MPIU_Object_set_ref(global_failed, 0);
-    MPIU_Object_set_ref(new_group_ptr, 0);
+    if (*newcomm_ptr) MPIR_Object_set_ref(*newcomm_ptr, 0);
+    MPIR_Object_set_ref(global_failed, 0);
+    MPIR_Object_set_ref(new_group_ptr, 0);
     goto fn_exit;
 }
 
@@ -136,12 +136,12 @@ int MPIX_Comm_shrink(MPI_Comm comm, MPI_Comm *newcomm)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL, *newcomm_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIX_COMM_SHRINK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIX_COMM_SHRINK);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIX_COMM_SHRINK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIX_COMM_SHRINK);
 
     /* Validate parameters, and convert MPI object handles to object pointers */
 #   ifdef HAVE_ERROR_CHECKING
@@ -179,7 +179,7 @@ int MPIX_Comm_shrink(MPI_Comm comm, MPI_Comm *newcomm)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIX_COMM_SHRINK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIX_COMM_SHRINK);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_size.c b/src/mpi/comm/comm_size.c
index 27dcda1..fe78d75 100644
--- a/src/mpi/comm/comm_size.c
+++ b/src/mpi/comm/comm_size.c
@@ -57,11 +57,11 @@ int MPI_Comm_size( MPI_Comm comm, int *size )
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = 0;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_SIZE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_SIZE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_SIZE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_SIZE);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -101,7 +101,7 @@ int MPI_Comm_size( MPI_Comm comm, int *size )
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_SIZE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_SIZE);
     return mpi_errno;
     
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/comm/comm_split.c b/src/mpi/comm/comm_split.c
index 18d1424..ba6437a 100644
--- a/src/mpi/comm/comm_split.c
+++ b/src/mpi/comm/comm_split.c
@@ -136,17 +136,17 @@ int MPIR_Comm_split_impl(MPIR_Comm *comm_ptr, int color, int key, MPIR_Comm **ne
     int rank, size, remote_size, i, new_size, new_remote_size,
 	first_entry = 0, first_remote_entry = 0, *last_ptr;
     int in_newcomm; /* TRUE iff *newcomm should be populated */
-    MPIU_Context_id_t   new_context_id, remote_context_id;
+    MPIR_Context_id_t   new_context_id, remote_context_id;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
     MPIR_Comm_map_t *mapper;
-    MPIU_CHKLMEM_DECL(4);
+    MPIR_CHKLMEM_DECL(4);
 
     rank        = comm_ptr->rank;
     size        = comm_ptr->local_size;
     remote_size = comm_ptr->remote_size;
 	
     /* Step 1: Find out what color and keys all of the processes have */
-    MPIU_CHKLMEM_MALLOC(table,splittype*,size*sizeof(splittype),mpi_errno,
+    MPIR_CHKLMEM_MALLOC(table,splittype*,size*sizeof(splittype),mpi_errno,
 			"table");
     table[rank].color = color;
     table[rank].key   = key;
@@ -155,7 +155,7 @@ int MPIR_Comm_split_impl(MPIR_Comm *comm_ptr, int color, int key, MPIR_Comm **ne
        processes */
     if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) {
 	if (!comm_ptr->local_comm) {
-	    MPIR_Setup_intercomm_localcomm( comm_ptr );
+	    MPII_Setup_intercomm_localcomm( comm_ptr );
 	}
 	local_comm_ptr = comm_ptr->local_comm;
     }
@@ -200,7 +200,7 @@ int MPIR_Comm_split_impl(MPIR_Comm *comm_ptr, int color, int key, MPIR_Comm **ne
 	   local group - perform an (intercommunicator) all gather
 	   of the color and rank information for the remote group.
 	*/
-	MPIU_CHKLMEM_MALLOC(remotetable,splittype*,
+	MPIR_CHKLMEM_MALLOC(remotetable,splittype*,
 			    remote_size*sizeof(splittype),mpi_errno,
 			    "remotetable");
 	/* This is an intercommunicator allgather */
@@ -250,22 +250,22 @@ int MPIR_Comm_split_impl(MPIR_Comm *comm_ptr, int color, int key, MPIR_Comm **ne
        calling routine already holds the single criticial section */
     mpi_errno = MPIR_Get_contextid_sparse(local_comm_ptr, &new_context_id, !in_newcomm);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_Assert(new_context_id != 0);
+    MPIR_Assert(new_context_id != 0);
 
     /* In the intercomm case, we need to exchange the context ids */
     if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) {
 	if (comm_ptr->rank == 0) {
-	    mpi_errno = MPIC_Sendrecv( &new_context_id, 1, MPIU_CONTEXT_ID_T_DATATYPE, 0, 0,
-				       &remote_context_id, 1, MPIU_CONTEXT_ID_T_DATATYPE, 
+	    mpi_errno = MPIC_Sendrecv( &new_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, 0,
+				       &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE,
 				       0, 0, comm_ptr, MPI_STATUS_IGNORE, &errflag );
 	    if (mpi_errno) { MPIR_ERR_POP( mpi_errno ); }
-	    mpi_errno = MPID_Bcast( &remote_context_id, 1, MPIU_CONTEXT_ID_T_DATATYPE, 0, local_comm_ptr, &errflag );
+	    mpi_errno = MPID_Bcast( &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, local_comm_ptr, &errflag );
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 	}
 	else {
 	    /* Broadcast to the other members of the local group */
-	    mpi_errno = MPID_Bcast( &remote_context_id, 1, MPIU_CONTEXT_ID_T_DATATYPE, 0, local_comm_ptr, &errflag );
+	    mpi_errno = MPID_Bcast( &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, local_comm_ptr, &errflag );
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
             MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 	}
@@ -289,7 +289,7 @@ int MPIR_Comm_split_impl(MPIR_Comm *comm_ptr, int color, int key, MPIR_Comm **ne
 	   extract the table into a smaller array and sort that.
 	   Also, store in the "color" entry the rank in the input communicator
 	   of the entry. */
-	MPIU_CHKLMEM_MALLOC(keytable,sorttype*,new_size*sizeof(sorttype),
+	MPIR_CHKLMEM_MALLOC(keytable,sorttype*,new_size*sizeof(sorttype),
 			    mpi_errno,"keytable");
 	for (i=0; i<new_size; i++) {
 	    keytable[i].key   = table[first_entry].key;
@@ -302,7 +302,7 @@ int MPIR_Comm_split_impl(MPIR_Comm *comm_ptr, int color, int key, MPIR_Comm **ne
 	MPIU_Sort_inttable( keytable, new_size );
 
 	if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) {
-	    MPIU_CHKLMEM_MALLOC(remotekeytable,sorttype*,
+	    MPIR_CHKLMEM_MALLOC(remotekeytable,sorttype*,
 				new_remote_size*sizeof(sorttype),
 				mpi_errno,"remote keytable");
 	    for (i=0; i<new_remote_size; i++) {
@@ -382,7 +382,7 @@ int MPIR_Comm_split_impl(MPIR_Comm *comm_ptr, int color, int key, MPIR_Comm **ne
     }
     
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -437,12 +437,12 @@ int MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL, *newcomm_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_SPLIT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_SPLIT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_SPLIT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_SPLIT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -485,7 +485,7 @@ int MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_SPLIT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_SPLIT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/comm/comm_split_type.c b/src/mpi/comm/comm_split_type.c
index f7407e7..1c34303 100644
--- a/src/mpi/comm/comm_split_type.c
+++ b/src/mpi/comm/comm_split_type.c
@@ -37,7 +37,7 @@ int MPIR_Comm_split_type_impl(MPIR_Comm * comm_ptr, int split_type, int key,
 
     /* Only MPI_COMM_TYPE_SHARED, MPI_UNDEFINED, and
      * NEIGHBORHOOD are supported */
-    MPIU_Assert(split_type == MPI_COMM_TYPE_SHARED ||
+    MPIR_Assert(split_type == MPI_COMM_TYPE_SHARED ||
                 split_type == MPI_UNDEFINED ||
                 split_type == MPIX_COMM_TYPE_NEIGHBORHOOD);
 
@@ -137,12 +137,12 @@ int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key, MPI_Info info,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL, *newcomm_ptr;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_SPLIT_TYPE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_SPLIT_TYPE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_SPLIT_TYPE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_SPLIT_TYPE);
 
     /* Validate parameters, especially handles needing to be converted */
 #ifdef HAVE_ERROR_CHECKING
@@ -188,7 +188,7 @@ int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key, MPI_Info info,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_SPLIT_TYPE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_SPLIT_TYPE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/comm_test_inter.c b/src/mpi/comm/comm_test_inter.c
index 6b7383b..30e973c 100644
--- a/src/mpi/comm/comm_test_inter.c
+++ b/src/mpi/comm/comm_test_inter.c
@@ -55,11 +55,11 @@ int MPI_Comm_test_inter(MPI_Comm comm, int *flag)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_TEST_INTER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_TEST_INTER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_TEST_INTER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_TEST_INTER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -99,7 +99,7 @@ int MPI_Comm_test_inter(MPI_Comm comm, int *flag)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_TEST_INTER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_TEST_INTER);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/comm/commutil.c b/src/mpi/comm/commutil.c
index ce93308..ded0bb4 100644
--- a/src/mpi/comm/commutil.c
+++ b/src/mpi/comm/commutil.c
@@ -6,7 +6,7 @@
 
 #include "mpiimpl.h"
 #include "mpicomm.h"
-#include "mpir_info.h"    /* MPIU_Info_free */
+#include "mpir_info.h"    /* MPIR_Info_free */
 
 #include "mpl_utlist.h"
 #include "mpir_uthash.h"
@@ -22,7 +22,7 @@
 MPIR_Comm MPIR_Comm_builtin[MPIR_COMM_N_BUILTIN] = { {0} };
 MPIR_Comm MPIR_Comm_direct[MPID_COMM_PREALLOC] = { {0} };
 
-MPIU_Object_alloc_t MPIR_Comm_mem = {
+MPIR_Object_alloc_t MPIR_Comm_mem = {
     0,
     0,
     0,
@@ -68,11 +68,11 @@ static struct MPIR_Comm_hint_fn_elt *MPID_hint_fns = NULL;
  * to it.
  *
  * !!! The resulting struct is _not_ ready for communication !!! */
-int MPIR_Comm_init(MPIR_Comm * comm_p)
+int MPII_Comm_init(MPIR_Comm * comm_p)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Object_set_ref(comm_p, 1);
+    MPIR_Object_set_ref(comm_p, 1);
 
     /* initialize local and remote sizes to -1 to allow other parts of
      * the stack to detect errors more easily */
@@ -124,26 +124,26 @@ int MPIR_Comm_create(MPIR_Comm ** newcomm_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *newptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_CREATE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_CREATE);
 
-    newptr = (MPIR_Comm *) MPIU_Handle_obj_alloc(&MPIR_Comm_mem);
+    newptr = (MPIR_Comm *) MPIR_Handle_obj_alloc(&MPIR_Comm_mem);
     MPIR_ERR_CHKANDJUMP(!newptr, mpi_errno, MPI_ERR_OTHER, "**nomem");
 
     *newcomm_ptr = newptr;
 
-    mpi_errno = MPIR_Comm_init(newptr);
+    mpi_errno = MPII_Comm_init(newptr);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
 
     /* Insert this new communicator into the list of known communicators.
      * Make this conditional on debugger support to match the test in
      * MPIR_Comm_release . */
-    MPIR_COMML_REMEMBER(newptr);
+    MPII_COMML_REMEMBER(newptr);
 
   fn_fail:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_CREATE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_CREATE);
 
     return mpi_errno;
 }
@@ -152,22 +152,22 @@ int MPIR_Comm_create(MPIR_Comm ** newcomm_ptr)
    specified intercomm. */
 /* FIXME this is an alternative constructor that doesn't use MPIR_Comm_create! */
 #undef FUNCNAME
-#define FUNCNAME MPIR_Setup_intercomm_localcomm
+#define FUNCNAME MPII_Setup_intercomm_localcomm
 #undef FCNAME
-#define FCNAME "MPIR_Setup_intercomm_localcomm"
-int MPIR_Setup_intercomm_localcomm(MPIR_Comm * intercomm_ptr)
+#define FCNAME "MPII_Setup_intercomm_localcomm"
+int MPII_Setup_intercomm_localcomm(MPIR_Comm * intercomm_ptr)
 {
     MPIR_Comm *localcomm_ptr;
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM);
 
-    localcomm_ptr = (MPIR_Comm *) MPIU_Handle_obj_alloc(&MPIR_Comm_mem);
+    localcomm_ptr = (MPIR_Comm *) MPIR_Handle_obj_alloc(&MPIR_Comm_mem);
     MPIR_ERR_CHKANDJUMP(!localcomm_ptr, mpi_errno, MPI_ERR_OTHER, "**nomem");
 
     /* get sensible default values for most fields (usually zeros) */
-    mpi_errno = MPIR_Comm_init(localcomm_ptr);
+    mpi_errno = MPII_Comm_init(localcomm_ptr);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
 
@@ -205,7 +205,7 @@ int MPIR_Setup_intercomm_localcomm(MPIR_Comm * intercomm_ptr)
         MPIR_ERR_POP(mpi_errno);
 
   fn_fail:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_SETUP_INTERCOMM_LOCALCOMM);
 
     return mpi_errno;
 }
@@ -227,14 +227,14 @@ static int cleanup_default_collops(void *unused)
     int i;
     for (i = 0; i < MPIR_COMM_HIERARCHY_KIND__SIZE; ++i) {
         if (default_collops[i]) {
-            MPIU_Assert(default_collops[i]->ref_count >= 1);
+            MPIR_Assert(default_collops[i]->ref_count >= 1);
             if (--default_collops[i]->ref_count == 0)
                 MPL_free(default_collops[i]);
             default_collops[i] = NULL;
         }
     }
     if (ic_default_collops) {
-        MPIU_Assert(ic_default_collops->ref_count >= 1);
+        MPIR_Assert(ic_default_collops->ref_count >= 1);
         if (--ic_default_collops->ref_count == 0)
             MPL_free(ic_default_collops);
     }
@@ -250,11 +250,11 @@ static int init_default_collops(void)
     int mpi_errno = MPI_SUCCESS;
     int i;
     struct MPIR_Collops *ops = NULL;
-    MPIU_CHKPMEM_DECL(MPIR_COMM_HIERARCHY_KIND__SIZE + 1);
+    MPIR_CHKPMEM_DECL(MPIR_COMM_HIERARCHY_KIND__SIZE + 1);
 
     /* first initialize the intracomms */
     for (i = 0; i < MPIR_COMM_HIERARCHY_KIND__SIZE; ++i) {
-        MPIU_CHKPMEM_CALLOC(ops, struct MPIR_Collops *, sizeof(struct MPIR_Collops), mpi_errno,
+        MPIR_CHKPMEM_CALLOC(ops, struct MPIR_Collops *, sizeof(struct MPIR_Collops), mpi_errno,
                             "default intracomm collops");
         ops->ref_count = 1;     /* force existence until finalize time */
 
@@ -304,7 +304,7 @@ static int init_default_collops(void)
 
             /* --BEGIN ERROR HANDLING-- */
         default:
-            MPIU_Assertp(FALSE);
+            MPIR_Assertp(FALSE);
             break;
             /* --END ERROR HANDLING-- */
         }
@@ -317,7 +317,7 @@ static int init_default_collops(void)
 
     /* now the intercomm table */
     {
-        MPIU_CHKPMEM_CALLOC(ops, struct MPIR_Collops *, sizeof(struct MPIR_Collops), mpi_errno,
+        MPIR_CHKPMEM_CALLOC(ops, struct MPIR_Collops *, sizeof(struct MPIR_Collops), mpi_errno,
                             "default intercomm collops");
         ops->ref_count = 1;     /* force existence until finalize time */
 
@@ -351,12 +351,12 @@ static int init_default_collops(void)
     /* run after MPID_Finalize to permit collective usage during finalize */
     MPIR_Add_finalize(cleanup_default_collops, NULL, MPIR_FINALIZE_CALLBACK_PRIO - 1);
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
   fn_exit:
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
@@ -414,12 +414,12 @@ int MPIR_Comm_map_irregular(MPIR_Comm * newcomm, MPIR_Comm * src_comm,
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm_map_t *mapper;
-    MPIU_CHKPMEM_DECL(3);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_MAP_TYPE__IRREGULAR);
+    MPIR_CHKPMEM_DECL(3);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_MAP_TYPE__IRREGULAR);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_MAP_TYPE__IRREGULAR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_MAP_TYPE__IRREGULAR);
 
-    MPIU_CHKPMEM_MALLOC(mapper, MPIR_Comm_map_t *, sizeof(MPIR_Comm_map_t), mpi_errno, "mapper");
+    MPIR_CHKPMEM_MALLOC(mapper, MPIR_Comm_map_t *, sizeof(MPIR_Comm_map_t), mpi_errno, "mapper");
 
     mapper->type = MPIR_COMM_MAP_TYPE__IRREGULAR;
     mapper->src_comm = src_comm;
@@ -431,7 +431,7 @@ int MPIR_Comm_map_irregular(MPIR_Comm * newcomm, MPIR_Comm * src_comm,
         mapper->free_mapping = 0;
     }
     else {
-        MPIU_CHKPMEM_MALLOC(mapper->src_mapping, int *,
+        MPIR_CHKPMEM_MALLOC(mapper->src_mapping, int *,
                             src_mapping_size * sizeof(int), mpi_errno, "mapper mapping");
         mapper->free_mapping = 1;
     }
@@ -444,11 +444,11 @@ int MPIR_Comm_map_irregular(MPIR_Comm * newcomm, MPIR_Comm * src_comm,
         *map = mapper;
 
   fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_MAP_TYPE__IRREGULAR);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_MAP_TYPE__IRREGULAR);
     return mpi_errno;
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -460,12 +460,12 @@ int MPIR_Comm_map_dup(MPIR_Comm * newcomm, MPIR_Comm * src_comm, MPIR_Comm_map_d
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm_map_t *mapper;
-    MPIU_CHKPMEM_DECL(1);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_MAP_TYPE__DUP);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_MAP_TYPE__DUP);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_MAP_TYPE__DUP);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_MAP_TYPE__DUP);
 
-    MPIU_CHKPMEM_MALLOC(mapper, MPIR_Comm_map_t *, sizeof(MPIR_Comm_map_t), mpi_errno, "mapper");
+    MPIR_CHKPMEM_MALLOC(mapper, MPIR_Comm_map_t *, sizeof(MPIR_Comm_map_t), mpi_errno, "mapper");
 
     mapper->type = MPIR_COMM_MAP_TYPE__DUP;
     mapper->src_comm = src_comm;
@@ -476,11 +476,11 @@ int MPIR_Comm_map_dup(MPIR_Comm * newcomm, MPIR_Comm * src_comm, MPIR_Comm_map_d
     MPL_LL_APPEND(newcomm->mapper_head, newcomm->mapper_tail, mapper);
 
   fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_MAP_TYPE__DUP);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_MAP_TYPE__DUP);
     return mpi_errno;
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -493,9 +493,9 @@ int MPIR_Comm_map_free(MPIR_Comm * comm)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm_map_t *mapper, *tmp;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_MAP_FREE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_MAP_FREE);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_MAP_FREE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_MAP_FREE);
 
     for (mapper = comm->mapper_head; mapper;) {
         tmp = mapper->next;
@@ -507,7 +507,7 @@ int MPIR_Comm_map_free(MPIR_Comm * comm)
     comm->mapper_head = NULL;
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_MAP_FREE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_MAP_FREE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -528,15 +528,15 @@ int MPIR_Comm_commit(MPIR_Comm * comm)
     int num_local = -1, num_external = -1;
     int local_rank = -1, external_rank = -1;
     int *local_procs = NULL, *external_procs = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_COMMIT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_COMMIT);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_COMMIT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_COMMIT);
 
     /* It's OK to relax these assertions, but we should do so very
      * intentionally.  For now this function is the only place that we create
      * our hierarchy of communicators */
-    MPIU_Assert(comm->node_comm == NULL);
-    MPIU_Assert(comm->node_roots_comm == NULL);
+    MPIR_Assert(comm->node_comm == NULL);
+    MPIR_Assert(comm->node_roots_comm == NULL);
 
     mpi_errno = set_collops(comm);
     if (mpi_errno)
@@ -551,7 +551,7 @@ int MPIR_Comm_commit(MPIR_Comm * comm)
 
     if (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) {
 
-        mpi_errno = MPIU_Find_local_and_external(comm,
+        mpi_errno = MPIR_Find_local_and_external(comm,
                                                  &num_local, &local_rank, &local_procs,
                                                  &num_external, &external_rank, &external_procs,
                                                  &comm->intranode_table, &comm->internode_table);
@@ -562,7 +562,7 @@ int MPIR_Comm_commit(MPIR_Comm * comm)
 
             /* Non-fatal errors simply mean that this communicator will not have
              * any node awareness.  Node-aware collectives are an optimization. */
-            MPL_DBG_MSG_P(MPIR_DBG_COMM, VERBOSE, "MPIU_Find_local_and_external failed for comm_ptr=%p",
+            MPL_DBG_MSG_P(MPIR_DBG_COMM, VERBOSE, "MPIR_Find_local_and_external failed for comm_ptr=%p",
                            comm);
             if (comm->intranode_table)
                 MPL_free(comm->intranode_table);
@@ -575,14 +575,14 @@ int MPIR_Comm_commit(MPIR_Comm * comm)
         /* --END ERROR HANDLING-- */
 
         /* defensive checks */
-        MPIU_Assert(num_local > 0);
-        MPIU_Assert(num_local > 1 || external_rank >= 0);
-        MPIU_Assert(external_rank < 0 || external_procs != NULL);
+        MPIR_Assert(num_local > 0);
+        MPIR_Assert(num_local > 1 || external_rank >= 0);
+        MPIR_Assert(external_rank < 0 || external_procs != NULL);
 
         /* if the node_roots_comm and comm would be the same size, then creating
          * the second communicator is useless and wasteful. */
         if (num_external == comm->remote_size) {
-            MPIU_Assert(num_local == 1);
+            MPIR_Assert(num_local == 1);
             goto fn_exit;
         }
 
@@ -661,7 +661,7 @@ int MPIR_Comm_commit(MPIR_Comm * comm)
     if (local_procs != NULL)
         MPL_free(local_procs);
 
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_COMMIT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_COMMIT);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -678,7 +678,7 @@ int MPIR_Comm_is_node_aware(MPIR_Comm * comm)
 /* Returns true if the communicator is node-aware and processes in all the nodes
    are consecutive. For example, if node 0 contains "0, 1, 2, 3", node 1
    contains "4, 5, 6", and node 2 contains "7", we shall return true. */
-int MPIR_Comm_is_node_consecutive(MPIR_Comm * comm)
+int MPII_Comm_is_node_consecutive(MPIR_Comm * comm)
 {
     int i = 0, curr_nodeidx = 0;
     int *internode_table = comm->internode_table;
@@ -709,18 +709,18 @@ int MPIR_Comm_is_node_consecutive(MPIR_Comm * comm)
  * Used by cart_create, graph_create, and dup_create
  */
 #undef FUNCNAME
-#define FUNCNAME MPIR_Comm_copy
+#define FUNCNAME MPII_Comm_copy
 #undef FCNAME
-#define FCNAME "MPIR_Comm_copy"
-int MPIR_Comm_copy(MPIR_Comm * comm_ptr, int size, MPIR_Comm ** outcomm_ptr)
+#define FCNAME "MPII_Comm_copy"
+int MPII_Comm_copy(MPIR_Comm * comm_ptr, int size, MPIR_Comm ** outcomm_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIU_Context_id_t new_context_id, new_recvcontext_id;
+    MPIR_Context_id_t new_context_id, new_recvcontext_id;
     MPIR_Comm *newcomm_ptr = NULL;
     MPIR_Comm_map_t *map;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_COPY);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_COPY);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_COPY);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_COPY);
 
     /* Get a new context first.  We need this to be collective over the
      * input communicator */
@@ -737,7 +737,7 @@ int MPIR_Comm_copy(MPIR_Comm * comm_ptr, int size, MPIR_Comm ** outcomm_ptr)
         new_recvcontext_id = new_context_id;
         if (mpi_errno)
             MPIR_ERR_POP(mpi_errno);
-        MPIU_Assert(new_context_id != 0);
+        MPIR_Assert(new_context_id != 0);
     }
 
     /* This is the local size, not the remote size, in the case of
@@ -826,7 +826,7 @@ int MPIR_Comm_copy(MPIR_Comm * comm_ptr, int size, MPIR_Comm ** outcomm_ptr)
     mpi_errno = MPIR_Info_dup_impl(comm_ptr->info, &(newcomm_ptr->info));
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPIR_Comm_apply_hints(newcomm_ptr, newcomm_ptr->info);
+    mpi_errno = MPII_Comm_apply_hints(newcomm_ptr, newcomm_ptr->info);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
 
@@ -835,7 +835,7 @@ int MPIR_Comm_copy(MPIR_Comm * comm_ptr, int size, MPIR_Comm ** outcomm_ptr)
   fn_fail:
   fn_exit:
 
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_COPY);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_COPY);
 
     return mpi_errno;
 }
@@ -847,16 +847,16 @@ int MPIR_Comm_copy(MPIR_Comm * comm_ptr, int size, MPIR_Comm ** outcomm_ptr)
  * Used by comm_idup.
  */
 #undef FUNCNAME
-#define FUNCNAME MPIR_Comm_copy_data
+#define FUNCNAME MPII_Comm_copy_data
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Comm_copy_data(MPIR_Comm * comm_ptr, MPIR_Comm ** outcomm_ptr)
+int MPII_Comm_copy_data(MPIR_Comm * comm_ptr, MPIR_Comm ** outcomm_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *newcomm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_COPY_DATA);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_COPY_DATA);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_COPY_DATA);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_COPY_DATA);
 
     mpi_errno = MPIR_Comm_create(&newcomm_ptr);
     if (mpi_errno)
@@ -902,7 +902,7 @@ int MPIR_Comm_copy_data(MPIR_Comm * comm_ptr, MPIR_Comm ** outcomm_ptr)
 
   fn_fail:
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_COPY_DATA);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_COPY_DATA);
     return mpi_errno;
 }
 
@@ -922,11 +922,11 @@ int MPIR_Comm_delete_internal(MPIR_Comm * comm_ptr)
 {
     int in_use;
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_COMM_DELETE_INTERNAL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_COMM_DELETE_INTERNAL);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_COMM_DELETE_INTERNAL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_COMM_DELETE_INTERNAL);
 
-    MPIU_Assert(MPIU_Object_get_ref(comm_ptr) == 0);    /* sanity check */
+    MPIR_Assert(MPIR_Object_get_ref(comm_ptr) == 0);    /* sanity check */
 
     /* Remove the attributes, executing the attribute delete routine.
      * Do this only if the attribute functions are defined.
@@ -935,11 +935,11 @@ int MPIR_Comm_delete_internal(MPIR_Comm * comm_ptr)
     if (MPIR_Process.attr_free && comm_ptr->attributes) {
         /* Temporarily add a reference to this communicator because
          * the attr_free code requires a valid communicator */
-        MPIU_Object_add_ref(comm_ptr);
+        MPIR_Object_add_ref(comm_ptr);
         mpi_errno = MPIR_Process.attr_free(comm_ptr->handle, &comm_ptr->attributes);
         /* Release the temporary reference added before the call to
          * attr_free */
-        MPIU_Object_release_ref(comm_ptr, &in_use);
+        MPIR_Object_release_ref(comm_ptr, &in_use);
     }
 
     /* If the attribute delete functions return failure, the
@@ -959,7 +959,7 @@ int MPIR_Comm_delete_internal(MPIR_Comm * comm_ptr)
 
         /* Free info hints */
         if (comm_ptr->info != NULL) {
-            MPIU_Info_free(comm_ptr->info);
+            MPIR_Info_free(comm_ptr->info);
         }
 
         /* release our reference to the collops structure, comes after the
@@ -1004,7 +1004,7 @@ int MPIR_Comm_delete_internal(MPIR_Comm * comm_ptr)
             int errhInuse;
             MPIR_Errhandler_release_ref(comm_ptr->errhandler, &errhInuse);
             if (!errhInuse) {
-                MPIU_Handle_obj_free(&MPIR_Errhandler_mem, comm_ptr->errhandler);
+                MPIR_Handle_obj_free(&MPIR_Errhandler_mem, comm_ptr->errhandler);
             }
         }
 
@@ -1012,12 +1012,12 @@ int MPIR_Comm_delete_internal(MPIR_Comm * comm_ptr)
          * we are supporting message-queue debugging.  We make this
          * conditional on having debugger support since the
          * operation is not constant-time */
-        MPIR_COMML_FORGET(comm_ptr);
+        MPII_COMML_FORGET(comm_ptr);
 
         /* Check for predefined communicators - these should not
          * be freed */
         if (!(HANDLE_GET_KIND(comm_ptr->handle) == HANDLE_KIND_BUILTIN))
-            MPIU_Handle_obj_free(&MPIR_Comm_mem, comm_ptr);
+            MPIR_Handle_obj_free(&MPIR_Comm_mem, comm_ptr);
     }
     else {
         /* If the user attribute free function returns an error,
@@ -1026,7 +1026,7 @@ int MPIR_Comm_delete_internal(MPIR_Comm * comm_ptr)
     }
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_COMM_DELETE_INTERNAL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_COMM_DELETE_INTERNAL);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -1044,13 +1044,13 @@ int MPIR_Comm_release_always(MPIR_Comm * comm_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
     int in_use;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
 
     /* we want to short-circuit any optimization that avoids reference counting
      * predefined communicators, such as MPI_COMM_WORLD or MPI_COMM_SELF. */
-    MPIU_Object_release_ref_always(comm_ptr, &in_use);
+    MPIR_Object_release_ref_always(comm_ptr, &in_use);
     if (!in_use) {
         mpi_errno = MPIR_Comm_delete_internal(comm_ptr);
         if (mpi_errno)
@@ -1058,7 +1058,7 @@ int MPIR_Comm_release_always(MPIR_Comm * comm_ptr)
     }
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_RELEASE_ALWAYS);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -1067,18 +1067,18 @@ int MPIR_Comm_release_always(MPIR_Comm * comm_ptr)
 /* Apply all known info hints in the specified info chain to the given
  * communicator. */
 #undef FUNCNAME
-#define FUNCNAME MPIR_Comm_apply_hints
+#define FUNCNAME MPII_Comm_apply_hints
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Comm_apply_hints(MPIR_Comm * comm_ptr, MPIR_Info * info_ptr)
+int MPII_Comm_apply_hints(MPIR_Comm * comm_ptr, MPIR_Info * info_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Info *hint = NULL;
     char hint_name[MPI_MAX_INFO_KEY] = { 0 };
     struct MPIR_Comm_hint_fn_elt *hint_fn = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_APPLY_HINTS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_APPLY_HINTS);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_APPLY_HINTS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_APPLY_HINTS);
 
     MPL_LL_FOREACH(info_ptr, hint) {
         /* Have we hit the default, empty info hint? */
@@ -1098,7 +1098,7 @@ int MPIR_Comm_apply_hints(MPIR_Comm * comm_ptr, MPIR_Info * info_ptr)
     }
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_APPLY_HINTS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_APPLY_HINTS);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -1112,9 +1112,9 @@ static int free_hint_handles(void *ignore)
 {
     int mpi_errno = MPI_SUCCESS;
     struct MPIR_Comm_hint_fn_elt *curr_hint = NULL, *tmp = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_FREE_HINT_HANDLES);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_FREE_HINT_HANDLES);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_FREE_HINT_HANDLES);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_FREE_HINT_HANDLES);
 
     if (MPID_hint_fns) {
         HASH_ITER(hh, MPID_hint_fns, curr_hint, tmp) {
@@ -1124,7 +1124,7 @@ static int free_hint_handles(void *ignore)
     }
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_FREE_HINT_HANDLES);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_FREE_HINT_HANDLES);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -1140,9 +1140,9 @@ int MPIR_Comm_register_hint(const char *hint_key, MPIR_Comm_hint_fn_t fn, void *
 {
     int mpi_errno = MPI_SUCCESS;
     struct MPIR_Comm_hint_fn_elt *hint_elt = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_REGISTER_HINT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_REGISTER_HINT);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_REGISTER_HINT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_REGISTER_HINT);
 
     if (MPID_hint_fns == NULL) {
         MPIR_Add_finalize(free_hint_handles, NULL, MPIR_FINALIZE_CALLBACK_PRIO - 1);
@@ -1156,7 +1156,7 @@ int MPIR_Comm_register_hint(const char *hint_key, MPIR_Comm_hint_fn_t fn, void *
     HASH_ADD_STR(MPID_hint_fns, name, hint_elt);
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_REGISTER_HINT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_REGISTER_HINT);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
diff --git a/src/mpi/comm/contextid.c b/src/mpi/comm/contextid.c
index 096e79f..5951b60 100644
--- a/src/mpi/comm/contextid.c
+++ b/src/mpi/comm/contextid.c
@@ -6,7 +6,7 @@
 
 #include "mpiimpl.h"
 #include "mpicomm.h"
-#include "mpir_info.h"    /* MPIU_Info_free */
+#include "mpir_info.h"    /* MPIR_Info_free */
 
 #include "mpl_utlist.h"
 #include "mpir_uthash.h"
@@ -46,7 +46,7 @@ const int ALL_OWN_MASK_FLAG = MPIR_MAX_CONTEXT_MASK;
 /* utility function to pretty print a context ID for debugging purposes, see
  * mpiimpl.h for more info on the various fields */
 #ifdef MPL_USE_DBG_LOGGING
-static void dump_context_id(MPIU_Context_id_t context_id, char *out_str, int len)
+static void dump_context_id(MPIR_Context_id_t context_id, char *out_str, int len)
 {
     int subcomm_type = MPIR_CONTEXT_READ_FIELD(SUBCOMM, context_id);
     const char *subcomm_type_name = NULL;
@@ -62,7 +62,7 @@ static void dump_context_id(MPIU_Context_id_t context_id, char *out_str, int len
         subcomm_type_name = "internode";
         break;
     default:
-        MPIU_Assert(FALSE);
+        MPIR_Assert(FALSE);
         break;
     }
     MPL_snprintf(out_str, len,
@@ -233,7 +233,7 @@ static int locate_context_bit(uint32_t local_mask[])
 /* Allocates a context ID from the given mask by clearing the bit
  * corresponding to the the given id.  Returns 0 on failure, id on
  * success. */
-static int allocate_context_bit(uint32_t mask[], MPIU_Context_id_t id)
+static int allocate_context_bit(uint32_t mask[], MPIR_Context_id_t id)
 {
     int raw_prefix, idx, bitpos;
     raw_prefix = MPIR_CONTEXT_READ_FIELD(PREFIX, id);
@@ -241,7 +241,7 @@ static int allocate_context_bit(uint32_t mask[], MPIU_Context_id_t id)
     bitpos = raw_prefix % MPIR_CONTEXT_INT_BITS;
 
     /* the bit should not already be cleared (allocated) */
-    MPIU_Assert(mask[idx] & (1 << bitpos));
+    MPIR_Assert(mask[idx] & (1 << bitpos));
 
     /* clear the bit */
     mask[idx] &= ~(1 << bitpos);
@@ -259,7 +259,7 @@ static int allocate_context_bit(uint32_t mask[], MPIU_Context_id_t id)
  * Returns 0 on failure.  Returns the allocated context ID on success. */
 static int find_and_allocate_context_id(uint32_t local_mask[])
 {
-    MPIU_Context_id_t context_id;
+    MPIR_Context_id_t context_id;
     context_id = locate_context_bit(local_mask);
     if (context_id != 0) {
         context_id = allocate_context_bit(context_mask, context_id);
@@ -293,7 +293,7 @@ static volatile int mask_in_use = 0;
 #define FUNCNAME MPIR_Get_contextid_sparse
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Get_contextid_sparse(MPIR_Comm * comm_ptr, MPIU_Context_id_t * context_id, int ignore_id)
+int MPIR_Get_contextid_sparse(MPIR_Comm * comm_ptr, MPIR_Context_id_t * context_id, int ignore_id)
 {
     return MPIR_Get_contextid_sparse_group(comm_ptr, NULL /*group_ptr */ ,
                                            MPIR_Process.attrs.tag_ub /*tag */ ,
@@ -301,15 +301,15 @@ int MPIR_Get_contextid_sparse(MPIR_Comm * comm_ptr, MPIU_Context_id_t * context_
 }
 
 struct gcn_state {
-    MPIU_Context_id_t *ctx0;
-    MPIU_Context_id_t *ctx1;
+    MPIR_Context_id_t *ctx0;
+    MPIR_Context_id_t *ctx1;
     int own_mask;
     int own_eager_mask;
     int first_iter;
     uint64_t tag;
     MPIR_Comm *comm_ptr;
     MPIR_Comm *comm_ptr_inter;
-    MPID_Sched_t s;
+    MPIR_Sched_t s;
     MPIR_Comm *new_comm;
     MPIR_Comm_kind_t gcn_cid_kind;
     uint32_t local_mask[MPIR_MAX_CONTEXT_MASK + 1];
@@ -362,7 +362,7 @@ static int add_gcn_to_list(struct gcn_state *new_state)
  * obtain the best performance and utilization of the context ID space.
  *
  * Processes that pass ignore_id==TRUE will receive
- * (*context_id==MPIU_INVALID_CONTEXT_ID) and should not attempt to use it.
+ * (*context_id==MPIR_INVALID_CONTEXT_ID) and should not attempt to use it.
  *
  * If a group pointer is given, the call is _not_ sparse, and only processes
  * in the group should call this routine.  That is, it is collective only over
@@ -373,15 +373,15 @@ static int add_gcn_to_list(struct gcn_state *new_state)
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Get_contextid_sparse_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr, int tag,
-                                    MPIU_Context_id_t * context_id, int ignore_id)
+                                    MPIR_Context_id_t * context_id, int ignore_id)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
     struct gcn_state st;
     struct gcn_state *tmp;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_CONTEXTID);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GET_CONTEXTID);
 
     st.first_iter = 1;
     st.comm_ptr = comm_ptr;
@@ -389,7 +389,7 @@ int MPIR_Get_contextid_sparse_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr
     st.own_mask = 0;
     st.own_eager_mask = 0;
     /* Group-collective and ignore_id should never be combined */
-    MPIU_Assert(!(group_ptr != NULL && ignore_id));
+    MPIR_Assert(!(group_ptr != NULL && ignore_id));
 
     *context_id = 0;
 
@@ -409,7 +409,7 @@ int MPIR_Get_contextid_sparse_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr
         if (eager_nelem < 0) {
             /* Ensure that at least one word of deadlock-free context IDs is
              * always set aside for the base protocol */
-            MPIU_Assert(MPIR_CVAR_CTXID_EAGER_SIZE >= 0 &&
+            MPIR_Assert(MPIR_CVAR_CTXID_EAGER_SIZE >= 0 &&
                         MPIR_CVAR_CTXID_EAGER_SIZE < MPIR_MAX_CONTEXT_MASK - 1);
             eager_nelem = MPIR_CVAR_CTXID_EAGER_SIZE;
         }
@@ -442,7 +442,7 @@ int MPIR_Get_contextid_sparse_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr
         }
 
         else {
-            MPIU_Assert(next_gcn != NULL);
+            MPIR_Assert(next_gcn != NULL);
             /*If we are here, at least one element must be in the list, at least myself */
 
             /* only the first element in the list can own the mask. However, maybe the mask is used
@@ -481,7 +481,7 @@ int MPIR_Get_contextid_sparse_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr
             st.local_mask[ALL_OWN_MASK_FLAG] = 0;
 
         /* Now, try to get a context id */
-        MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
+        MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
         /* In the global and brief-global cases, note that this routine will
          * release that global lock when it needs to wait.  That will allow
          * other processes to enter the global or brief global critical section.
@@ -625,9 +625,9 @@ int MPIR_Get_contextid_sparse_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr
 
   fn_exit:
     if (ignore_id)
-        *context_id = MPIU_INVALID_CONTEXT_ID;
+        *context_id = MPIR_INVALID_CONTEXT_ID;
     MPL_DBG_MSG_S(MPIR_DBG_COMM, VERBOSE, "Context mask = %s", context_mask_to_str());
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GET_CONTEXTID);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GET_CONTEXTID);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -688,30 +688,30 @@ static int sched_cb_gcn_bcast(MPIR_Comm * comm, int tag, void *state)
     if (st->gcn_cid_kind == MPIR_COMM_KIND__INTERCOMM) {
         if (st->comm_ptr_inter->rank == 0) {
             mpi_errno =
-                MPID_Sched_recv(st->ctx1, 1, MPIU_CONTEXT_ID_T_DATATYPE, 0, st->comm_ptr_inter,
+                MPIR_Sched_recv(st->ctx1, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, st->comm_ptr_inter,
                                 st->s);
             if (mpi_errno)
                 MPIR_ERR_POP(mpi_errno);
             mpi_errno =
-                MPID_Sched_send(st->ctx0, 1, MPIU_CONTEXT_ID_T_DATATYPE, 0, st->comm_ptr_inter,
+                MPIR_Sched_send(st->ctx0, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, st->comm_ptr_inter,
                                 st->s);
             if (mpi_errno)
                 MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(st->s);
+            MPIR_SCHED_BARRIER(st->s);
         }
 
         mpi_errno = st->comm_ptr->coll_fns->Ibcast_sched(st->ctx1, 1,
-                                                         MPIU_CONTEXT_ID_T_DATATYPE, 0,
+                                                         MPIR_CONTEXT_ID_T_DATATYPE, 0,
                                                          st->comm_ptr, st->s);
         if (mpi_errno)
             MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(st->s);
+        MPIR_SCHED_BARRIER(st->s);
     }
 
-    mpi_errno = MPID_Sched_cb(&sched_cb_commit_comm, st, st->s);
+    mpi_errno = MPIR_Sched_cb(&sched_cb_commit_comm, st, st->s);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_cb(&MPIR_Sched_cb_free_buf, st, st->s);
+    mpi_errno = MPIR_Sched_cb(&MPIR_Sched_cb_free_buf, st, st->s);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
 
@@ -740,7 +740,7 @@ static int sched_cb_gcn_allocate_cid(MPIR_Comm * comm, int tag, void *state)
 {
     int mpi_errno = MPI_SUCCESS;
     struct gcn_state *st = state, *tmp;
-    MPIU_Context_id_t newctxid;
+    MPIR_Context_id_t newctxid;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
     if (st->own_eager_mask) {
         newctxid = find_and_allocate_context_id(st->local_mask);
@@ -809,24 +809,24 @@ static int sched_cb_gcn_allocate_cid(MPIR_Comm * comm, int tag, void *state)
                  *       are not necessarily completed in the same order as they are issued, also on the
                  *       same communicator. To avoid deadlocks, we cannot add the elements to the
                  *       list bevfore the first iallreduce is completed. The "tag" is created for the
-                 *       scheduling - by calling  MPID_Sched_next_tag(comm_ptr, &tag) - and the same
+                 *       scheduling - by calling  MPIR_Sched_next_tag(comm_ptr, &tag) - and the same
                  *       for a idup operation on all processes. So we use it here. */
                 /* FIXME I'm not sure if there can be an overflows for this tag */
                 st->tag = (uint64_t) tag + MPIR_Process.attrs.tag_ub;
                 add_gcn_to_list(st);
             }
-            mpi_errno = MPID_Sched_cb(&sched_cb_gcn_copy_mask, st, st->s);
+            mpi_errno = MPIR_Sched_cb(&sched_cb_gcn_copy_mask, st, st->s);
             if (mpi_errno)
                 MPIR_ERR_POP(mpi_errno);
-            MPID_SCHED_BARRIER(st->s);
+            MPIR_SCHED_BARRIER(st->s);
         }
     }
     else {
         /* Successfully allocated a context id */
-        mpi_errno = MPID_Sched_cb(&sched_cb_gcn_bcast, st, st->s);
+        mpi_errno = MPIR_Sched_cb(&sched_cb_gcn_bcast, st, st->s);
         if (mpi_errno)
             MPIR_ERR_POP(mpi_errno);
-        MPID_SCHED_BARRIER(st->s);
+        MPIR_SCHED_BARRIER(st->s);
     }
 
   fn_exit:
@@ -845,7 +845,7 @@ static int sched_cb_gcn_allocate_cid(MPIR_Comm * comm, int tag, void *state)
     /* In the case of failure, the new communicator was half created.
      * So we need to clean the memory allocated for it. */
     MPIR_Comm_map_free(st->new_comm);
-    MPIU_Handle_obj_free(&MPIR_Comm_mem, st->new_comm);
+    MPIR_Handle_obj_free(&MPIR_Comm_mem, st->new_comm);
     MPL_free(st);
     goto fn_exit;
 }
@@ -899,12 +899,12 @@ static int sched_cb_gcn_copy_mask(MPIR_Comm * comm, int tag, void *state)
                                                  st->comm_ptr, st->s);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
-    MPID_SCHED_BARRIER(st->s);
+    MPIR_SCHED_BARRIER(st->s);
 
-    mpi_errno = MPID_Sched_cb(&sched_cb_gcn_allocate_cid, st, st->s);
+    mpi_errno = MPIR_Sched_cb(&sched_cb_gcn_allocate_cid, st, st->s);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
-    MPID_SCHED_BARRIER(st->s);
+    MPIR_SCHED_BARRIER(st->s);
 
   fn_fail:
     return mpi_errno;
@@ -952,18 +952,18 @@ static int sched_cb_gcn_copy_mask(MPIR_Comm * comm, int tag, void *state)
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static int sched_get_cid_nonblock(MPIR_Comm * comm_ptr, MPIR_Comm * newcomm,
-                                  MPIU_Context_id_t * ctx0, MPIU_Context_id_t * ctx1,
-                                  MPID_Sched_t s, MPIR_Comm_kind_t gcn_cid_kind)
+                                  MPIR_Context_id_t * ctx0, MPIR_Context_id_t * ctx1,
+                                  MPIR_Sched_t s, MPIR_Comm_kind_t gcn_cid_kind)
 {
     int mpi_errno = MPI_SUCCESS;
     struct gcn_state *st = NULL;
-    MPIU_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_DECL(1);
 
     if (initialize_context_mask) {
         context_id_init();
     }
 
-    MPIU_CHKPMEM_MALLOC(st, struct gcn_state *, sizeof(struct gcn_state), mpi_errno, "gcn_state");
+    MPIR_CHKPMEM_MALLOC(st, struct gcn_state *, sizeof(struct gcn_state), mpi_errno, "gcn_state");
     st->ctx0 = ctx0;
     st->ctx1 = ctx1;
     if (gcn_cid_kind == MPIR_COMM_KIND__INTRACOMM) {
@@ -984,21 +984,21 @@ static int sched_get_cid_nonblock(MPIR_Comm * comm_ptr, MPIR_Comm * newcomm,
     if (eager_nelem < 0) {
         /* Ensure that at least one word of deadlock-free context IDs is
          * always set aside for the base protocol */
-        MPIU_Assert(MPIR_CVAR_CTXID_EAGER_SIZE >= 0 &&
+        MPIR_Assert(MPIR_CVAR_CTXID_EAGER_SIZE >= 0 &&
                     MPIR_CVAR_CTXID_EAGER_SIZE < MPIR_MAX_CONTEXT_MASK - 1);
         eager_nelem = MPIR_CVAR_CTXID_EAGER_SIZE;
     }
-    mpi_errno = MPID_Sched_cb(&sched_cb_gcn_copy_mask, st, s);
+    mpi_errno = MPIR_Sched_cb(&sched_cb_gcn_copy_mask, st, s);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
   fn_exit:
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
@@ -1011,17 +1011,17 @@ int MPIR_Get_contextid_nonblock(MPIR_Comm * comm_ptr, MPIR_Comm * newcommp, MPIR
 {
     int mpi_errno = MPI_SUCCESS;
     int tag;
-    MPID_Sched_t s;
+    MPIR_Sched_t s;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID_NONBLOCK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID_NONBLOCK);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_CONTEXTID_NONBLOCK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GET_CONTEXTID_NONBLOCK);
 
     /* now create a schedule */
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
 
@@ -1033,12 +1033,12 @@ int MPIR_Get_contextid_nonblock(MPIR_Comm * comm_ptr, MPIR_Comm * newcommp, MPIR
         MPIR_ERR_POP(mpi_errno);
 
     /* finally, kick off the schedule and give the caller a request */
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, req);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, req);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GET_CONTEXTID_NONBLOCK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GET_CONTEXTID_NONBLOCK);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1055,23 +1055,23 @@ int MPIR_Get_intercomm_contextid_nonblock(MPIR_Comm * comm_ptr, MPIR_Comm * newc
 {
     int mpi_errno = MPI_SUCCESS;
     int tag;
-    MPID_Sched_t s;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID_NONBLOCK);
+    MPIR_Sched_t s;
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID_NONBLOCK);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID_NONBLOCK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID_NONBLOCK);
 
     /* do as much local setup as possible */
     if (!comm_ptr->local_comm) {
-        mpi_errno = MPIR_Setup_intercomm_localcomm(comm_ptr);
+        mpi_errno = MPII_Setup_intercomm_localcomm(comm_ptr);
         if (mpi_errno)
             MPIR_ERR_POP(mpi_errno);
     }
 
     /* now create a schedule */
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
 
@@ -1085,12 +1085,12 @@ int MPIR_Get_intercomm_contextid_nonblock(MPIR_Comm * comm_ptr, MPIR_Comm * newc
         MPIR_ERR_POP(mpi_errno);
 
     /* finally, kick off the schedule and give the caller a request */
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, req);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, req);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
 
   fn_fail:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID_NONBLOCK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID_NONBLOCK);
     return mpi_errno;
 }
 
@@ -1119,10 +1119,10 @@ int MPIR_Get_intercomm_contextid_nonblock(MPIR_Comm * comm_ptr, MPIR_Comm * newc
 #define FUNCNAME MPIR_Get_intercomm_contextid
 #undef FCNAME
 #define FCNAME "MPIR_Get_intercomm_contextid"
-int MPIR_Get_intercomm_contextid(MPIR_Comm * comm_ptr, MPIU_Context_id_t * context_id,
-                                 MPIU_Context_id_t * recvcontext_id)
+int MPIR_Get_intercomm_contextid(MPIR_Comm * comm_ptr, MPIR_Context_id_t * context_id,
+                                 MPIR_Context_id_t * recvcontext_id)
 {
-    MPIU_Context_id_t mycontext_id, remote_context_id;
+    MPIR_Context_id_t mycontext_id, remote_context_id;
     int mpi_errno = MPI_SUCCESS;
     int tag = 31567;            /* FIXME  - we need an internal tag or
                                  * communication channel.  Can we use a different
@@ -1130,13 +1130,13 @@ int MPIR_Get_intercomm_contextid(MPIR_Comm * comm_ptr, MPIU_Context_id_t * conte
                                  * provided in the intercomm routine? (not on a dup,
                                  * but in that case it can use the collective context) */
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID);
 
     if (!comm_ptr->local_comm) {
         /* Manufacture the local communicator */
-        mpi_errno = MPIR_Setup_intercomm_localcomm(comm_ptr);
+        mpi_errno = MPII_Setup_intercomm_localcomm(comm_ptr);
         if (mpi_errno)
             MPIR_ERR_POP(mpi_errno);
     }
@@ -1144,14 +1144,14 @@ int MPIR_Get_intercomm_contextid(MPIR_Comm * comm_ptr, MPIU_Context_id_t * conte
     mpi_errno = MPIR_Get_contextid_sparse(comm_ptr->local_comm, &mycontext_id, FALSE);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
-    MPIU_Assert(mycontext_id != 0);
+    MPIR_Assert(mycontext_id != 0);
 
     /* MPIC routine uses an internal context id.  The local leads (process 0)
      * exchange data */
     remote_context_id = -1;
     if (comm_ptr->rank == 0) {
-        mpi_errno = MPIC_Sendrecv(&mycontext_id, 1, MPIU_CONTEXT_ID_T_DATATYPE, 0, tag,
-                                  &remote_context_id, 1, MPIU_CONTEXT_ID_T_DATATYPE, 0, tag,
+        mpi_errno = MPIC_Sendrecv(&mycontext_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, tag,
+                                  &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, tag,
                                   comm_ptr, MPI_STATUS_IGNORE, &errflag);
         if (mpi_errno)
             MPIR_ERR_POP(mpi_errno);
@@ -1159,7 +1159,7 @@ int MPIR_Get_intercomm_contextid(MPIR_Comm * comm_ptr, MPIU_Context_id_t * conte
 
     /* Make sure that all of the local processes now have this
      * id */
-    mpi_errno = MPID_Bcast(&remote_context_id, 1, MPIU_CONTEXT_ID_T_DATATYPE,
+    mpi_errno = MPID_Bcast(&remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE,
                                 0, comm_ptr->local_comm, &errflag);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
@@ -1171,7 +1171,7 @@ int MPIR_Get_intercomm_contextid(MPIR_Comm * comm_ptr, MPIU_Context_id_t * conte
     *context_id = remote_context_id;
     *recvcontext_id = mycontext_id;
   fn_fail:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID);
     return mpi_errno;
 }
 
@@ -1179,12 +1179,12 @@ int MPIR_Get_intercomm_contextid(MPIR_Comm * comm_ptr, MPIU_Context_id_t * conte
 #define FUNCNAME MPIR_Free_contextid
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-void MPIR_Free_contextid(MPIU_Context_id_t context_id)
+void MPIR_Free_contextid(MPIR_Context_id_t context_id)
 {
     int idx, bitpos, raw_prefix;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_FREE_CONTEXTID);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_FREE_CONTEXTID);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_FREE_CONTEXTID);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_FREE_CONTEXTID);
 
     /* Convert the context id to the bit position */
     raw_prefix = MPIR_CONTEXT_READ_FIELD(PREFIX, context_id);
@@ -1249,5 +1249,5 @@ void MPIR_Free_contextid(MPIU_Context_id_t context_id)
                       "Freed context %d, mask[%d] bit %d (prefix=%#x)",
                       context_id, idx, bitpos, raw_prefix));
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_FREE_CONTEXTID);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_FREE_CONTEXTID);
 }
diff --git a/src/mpi/comm/intercomm_create.c b/src/mpi/comm/intercomm_create.c
index 807b06a..4b0bcbe 100644
--- a/src/mpi/comm/intercomm_create.c
+++ b/src/mpi/comm/intercomm_create.c
@@ -49,7 +49,7 @@ PMPI_LOCAL int MPIR_CheckDisjointLpids( int lpids1[], int n1,
     int mpi_errno = MPI_SUCCESS;
     uint32_t lpidmaskPrealloc[N_STATIC_LPID32];
     uint32_t *lpidmask;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     /* Find the max lpid */
     for (i=0; i<n1; i++) {
@@ -62,7 +62,7 @@ PMPI_LOCAL int MPIR_CheckDisjointLpids( int lpids1[], int n1,
     mask_size = (maxlpid / 32) + 1;
 
     if (mask_size > N_STATIC_LPID32) {
-	MPIU_CHKLMEM_MALLOC(lpidmask,uint32_t*,mask_size*sizeof(uint32_t),
+	MPIR_CHKLMEM_MALLOC(lpidmask,uint32_t*,mask_size*sizeof(uint32_t),
 			    mpi_errno,"lpidmask");
     }
     else {
@@ -77,7 +77,7 @@ PMPI_LOCAL int MPIR_CheckDisjointLpids( int lpids1[], int n1,
 	idx = lpids1[i] / 32;
 	bit = lpids1[i] % 32;
 	lpidmask[idx] = lpidmask[idx] | (1 << bit);
-        MPIU_Assert(idx < mask_size);
+        MPIR_Assert(idx < mask_size);
     }    
 
     /* Look for any duplicates in the second array */
@@ -91,12 +91,12 @@ PMPI_LOCAL int MPIR_CheckDisjointLpids( int lpids1[], int n1,
 	}
 	/* Add a check on duplicates *within* group 2 */
 	lpidmask[idx] = lpidmask[idx] | (1 << bit);
-        MPIU_Assert(idx < mask_size);
+        MPIR_Assert(idx < mask_size);
     }
 
     /* Also fall through for normal return */
  fn_fail:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
     
 }
@@ -108,7 +108,7 @@ PMPI_LOCAL int MPID_LPID_GetAllInComm( MPIR_Comm *comm_ptr, int local_size,
     int i;
     
     /* FIXME: Should be using the local_size argument */
-    MPIU_Assert( comm_ptr->local_size == local_size );
+    MPIR_Assert( comm_ptr->local_size == local_size );
     for (i=0; i<comm_ptr->local_size; i++) {
 	(void)MPID_Comm_get_lpid( comm_ptr, i, &local_lpids[i], FALSE );
     }
@@ -124,7 +124,7 @@ int MPIR_Intercomm_create_impl(MPIR_Comm *local_comm_ptr, int local_leader,
                                MPIR_Comm **new_intercomm_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIU_Context_id_t final_context_id, recvcontext_id;
+    MPIR_Context_id_t final_context_id, recvcontext_id;
     int remote_size, *remote_lpids=0, singlePG;
     int local_size,*local_lpids=0;
     MPIR_Gpid *local_gpids=NULL, *remote_gpids=NULL;
@@ -132,10 +132,10 @@ int MPIR_Intercomm_create_impl(MPIR_Comm *local_comm_ptr, int local_leader,
     int is_low_group = 0;
     int cts_tag;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPIU_CHKLMEM_DECL(4);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_CREATE_IMPL);
+    MPIR_CHKLMEM_DECL(4);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_CREATE_IMPL);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_CREATE_IMPL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_CREATE_IMPL);
 
     /* Shift tag into the tagged coll space (tag provided by the user 
        is ignored as of MPI 3.0) */
@@ -177,10 +177,10 @@ int MPIR_Intercomm_create_impl(MPIR_Comm *local_comm_ptr, int local_leader,
                                        remote_size ));
         /* With this information, we can now send and receive the
            global process ids from the peer. */
-        MPIU_CHKLMEM_MALLOC(remote_gpids,MPIR_Gpid*,remote_size*sizeof(MPIR_Gpid), mpi_errno,"remote_gpids");
-        MPIU_CHKLMEM_MALLOC(remote_lpids,int*,remote_size*sizeof(int), mpi_errno,"remote_lpids");
-        MPIU_CHKLMEM_MALLOC(local_gpids,MPIR_Gpid*,local_size*sizeof(MPIR_Gpid), mpi_errno,"local_gpids");
-        MPIU_CHKLMEM_MALLOC(local_lpids,int*,local_size*sizeof(int), mpi_errno,"local_lpids");
+        MPIR_CHKLMEM_MALLOC(remote_gpids,MPIR_Gpid*,remote_size*sizeof(MPIR_Gpid), mpi_errno,"remote_gpids");
+        MPIR_CHKLMEM_MALLOC(remote_lpids,int*,remote_size*sizeof(int), mpi_errno,"remote_lpids");
+        MPIR_CHKLMEM_MALLOC(local_gpids,MPIR_Gpid*,local_size*sizeof(MPIR_Gpid), mpi_errno,"local_gpids");
+        MPIR_CHKLMEM_MALLOC(local_lpids,int*,local_size*sizeof(int), mpi_errno,"local_lpids");
 
         mpi_errno = MPID_GPID_GetAllInComm( local_comm_ptr, local_size, local_gpids, &singlePG );
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -238,16 +238,16 @@ int MPIR_Intercomm_create_impl(MPIR_Comm *local_comm_ptr, int local_leader,
     /* TODO: Make sure this is tag-safe */
     mpi_errno = MPIR_Get_contextid_sparse( local_comm_ptr, &recvcontext_id, FALSE );
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_Assert(recvcontext_id != 0);
+    MPIR_Assert(recvcontext_id != 0);
     MPL_DBG_MSG_FMT(MPIR_DBG_COMM,VERBOSE, (MPL_DBG_FDEST,"Got contextid=%d", recvcontext_id));
 
     /* Leaders can now swap context ids and then broadcast the value
        to the local group of processes */
     if (local_comm_ptr->rank == local_leader) {
-        MPIU_Context_id_t remote_context_id;
+        MPIR_Context_id_t remote_context_id;
 
-        mpi_errno = MPIC_Sendrecv( &recvcontext_id, 1, MPIU_CONTEXT_ID_T_DATATYPE, remote_leader, cts_tag,
-                                      &remote_context_id, 1, MPIU_CONTEXT_ID_T_DATATYPE, remote_leader, cts_tag,
+        mpi_errno = MPIC_Sendrecv( &recvcontext_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, remote_leader, cts_tag,
+                                      &remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, remote_leader, cts_tag,
                                       peer_comm_ptr, MPI_STATUS_IGNORE, &errflag );
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
@@ -277,8 +277,8 @@ int MPIR_Intercomm_create_impl(MPIR_Comm *local_comm_ptr, int local_leader,
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
         remote_size = comm_info[0];
-        MPIU_CHKLMEM_MALLOC(remote_gpids,MPIR_Gpid*,remote_size*sizeof(MPIR_Gpid), mpi_errno,"remote_gpids");
-        MPIU_CHKLMEM_MALLOC(remote_lpids,int*,remote_size*sizeof(int), mpi_errno,"remote_lpids");
+        MPIR_CHKLMEM_MALLOC(remote_gpids,MPIR_Gpid*,remote_size*sizeof(MPIR_Gpid), mpi_errno,"remote_gpids");
+        MPIR_CHKLMEM_MALLOC(remote_lpids,int*,remote_size*sizeof(int), mpi_errno,"remote_lpids");
         mpi_errno = MPID_Bcast( remote_gpids, remote_size*sizeof(MPIR_Gpid), MPI_BYTE, local_leader,
                                      local_comm_ptr, &errflag );
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -352,8 +352,8 @@ int MPIR_Intercomm_create_impl(MPIR_Comm *local_comm_ptr, int local_leader,
 
 
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_CREATE_IMPL);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_CREATE_IMPL);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -423,12 +423,12 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader,
     MPIR_Comm *local_comm_ptr = NULL;
     MPIR_Comm *peer_comm_ptr = NULL;
     MPIR_Comm *new_intercomm_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INTERCOMM_CREATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INTERCOMM_CREATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INTERCOMM_CREATE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INTERCOMM_CREATE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -524,7 +524,7 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INTERCOMM_CREATE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INTERCOMM_CREATE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/comm/intercomm_merge.c b/src/mpi/comm/intercomm_merge.c
index b5a13ff..5d01edb 100644
--- a/src/mpi/comm/intercomm_merge.c
+++ b/src/mpi/comm/intercomm_merge.c
@@ -73,15 +73,15 @@ int MPIR_Intercomm_merge_impl(MPIR_Comm *comm_ptr, int high, MPIR_Comm **new_int
 {
     int mpi_errno = MPI_SUCCESS;
     int  local_high, remote_high, new_size;
-    MPIU_Context_id_t new_context_id;
+    MPIR_Context_id_t new_context_id;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_MERGE_IMPL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_MERGE_IMPL);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_MERGE_IMPL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_MERGE_IMPL);
     /* Make sure that we have a local intercommunicator */
     if (!comm_ptr->local_comm) {
         /* Manufacture the local communicator */
-        mpi_errno = MPIR_Setup_intercomm_localcomm( comm_ptr );
+        mpi_errno = MPII_Setup_intercomm_localcomm( comm_ptr );
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
@@ -122,7 +122,7 @@ int MPIR_Intercomm_merge_impl(MPIR_Comm *comm_ptr, int high, MPIR_Comm **new_int
                 {
                     /* req#3930: The merge algorithm will deadlock if the gpids are inadvertently the
                        same due to implementation bugs in the MPID_GPID_Get() function */
-                    MPIU_Assert(rc != 0);
+                    MPIR_Assert(rc != 0);
                 }
             }
         }
@@ -174,7 +174,7 @@ int MPIR_Intercomm_merge_impl(MPIR_Comm *comm_ptr, int high, MPIR_Comm **new_int
     new_context_id = 0;
     mpi_errno = MPIR_Get_contextid_sparse( (*new_intracomm_ptr), &new_context_id, FALSE );
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_Assert(new_context_id != 0);
+    MPIR_Assert(new_context_id != 0);
 
     /* We release this communicator that was involved just to
      * get valid context id and create true one
@@ -198,7 +198,7 @@ int MPIR_Intercomm_merge_impl(MPIR_Comm *comm_ptr, int high, MPIR_Comm **new_int
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_MERGE_IMPL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_MERGE_IMPL);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -255,12 +255,12 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintracomm)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Comm *new_intracomm_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INTERCOMM_MERGE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INTERCOMM_MERGE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);  
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INTERCOMM_MERGE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INTERCOMM_MERGE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -298,7 +298,7 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintracomm)
     /* Make sure that we have a local intercommunicator */
     if (!comm_ptr->local_comm) {
 	/* Manufacture the local communicator */
-	MPIR_Setup_intercomm_localcomm( comm_ptr );
+	MPII_Setup_intercomm_localcomm( comm_ptr );
     }
 
 #   ifdef HAVE_ERROR_CHECKING
@@ -339,7 +339,7 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintracomm)
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INTERCOMM_MERGE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INTERCOMM_MERGE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/comm/mpicomm.h b/src/mpi/comm/mpicomm.h
index 190fe5a..fcc0187 100644
--- a/src/mpi/comm/mpicomm.h
+++ b/src/mpi/comm/mpicomm.h
@@ -5,4 +5,4 @@
  */
 
 /* Function prototypes for communicator helper functions */
-int MPIR_Get_intercomm_contextid( MPIR_Comm *, MPIU_Context_id_t *, MPIU_Context_id_t * );
+int MPIR_Get_intercomm_contextid( MPIR_Comm *, MPIR_Context_id_t *, MPIR_Context_id_t * );
diff --git a/src/mpi/datatype/address.c b/src/mpi/datatype/address.c
index bd5819a..7681c6f 100644
--- a/src/mpi/datatype/address.c
+++ b/src/mpi/datatype/address.c
@@ -62,11 +62,11 @@ The replacement for this routine is 'MPI_Get_address'.
 int MPI_Address(void *location, MPI_Aint *address)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ADDRESS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ADDRESS);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ADDRESS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ADDRESS);
 
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -89,7 +89,7 @@ int MPI_Address(void *location, MPI_Aint *address)
        standard, I can't tell if this is a compiler bug or a language bug.
     */
 #ifdef CHAR_PTR_IS_ADDRESS
-    *address = MPIU_VOID_PTR_CAST_TO_MPI_AINT ((char *) location);
+    *address = MPIR_VOID_PTR_CAST_TO_MPI_AINT ((char *) location);
 #else
     /* Note that this is the "portable" way to generate an address.
        The difference of two pointers is the number of elements
@@ -98,7 +98,7 @@ int MPI_Address(void *location, MPI_Aint *address)
        of bytes from 0 to location */
     /* To cover the case where a pointer is 32 bits and MPI_Aint is 64 bits,
        add cast to unsigned so the high order address bit is not sign-extended. */
-    *address = MPIU_VOID_PTR_CAST_TO_MPI_AINT ((char *) location - (char *) MPI_BOTTOM);
+    *address = MPIR_VOID_PTR_CAST_TO_MPI_AINT ((char *) location - (char *) MPI_BOTTOM);
 #endif
     /* The same code is used in MPI_Get_address */
 
@@ -107,7 +107,7 @@ int MPI_Address(void *location, MPI_Aint *address)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ADDRESS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ADDRESS);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/datatype/get_address.c b/src/mpi/datatype/get_address.c
index 2f291bf..2977321 100644
--- a/src/mpi/datatype/get_address.c
+++ b/src/mpi/datatype/get_address.c
@@ -72,11 +72,11 @@ Output Parameters:
 int MPI_Get_address(const void *location, MPI_Aint *address)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GET_ADDRESS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GET_ADDRESS);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GET_ADDRESS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GET_ADDRESS);
 
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -99,14 +99,14 @@ int MPI_Get_address(const void *location, MPI_Aint *address)
        standard, I can't tell if this is a compiler bug or a language bug.
     */
 #ifdef CHAR_PTR_IS_ADDRESS
-    *address = MPIU_VOID_PTR_CAST_TO_MPI_AINT ((char *) location);
+    *address = MPIR_VOID_PTR_CAST_TO_MPI_AINT ((char *) location);
 #else
     /* Note that this is the "portable" way to generate an address.
        The difference of two pointers is the number of elements
        between them, so this gives the number of chars between location
        and ptr.  As long as sizeof(char) represents one byte,
        of bytes from 0 to location */
-    *address = MPIU_VOID_PTR_CAST_TO_MPI_AINT ((char *) location - (char *) MPI_BOTTOM);
+    *address = MPIR_VOID_PTR_CAST_TO_MPI_AINT ((char *) location - (char *) MPI_BOTTOM);
 #endif
     /* The same code is used in MPI_Address */
 
@@ -115,7 +115,7 @@ int MPI_Get_address(const void *location, MPI_Aint *address)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GET_ADDRESS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GET_ADDRESS);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/datatype/get_count.c b/src/mpi/datatype/get_count.c
index 8170db3..1ed418d 100644
--- a/src/mpi/datatype/get_count.c
+++ b/src/mpi/datatype/get_count.c
@@ -33,7 +33,7 @@ void MPIR_Get_count_impl(const MPI_Status *status, MPI_Datatype datatype, int *c
     MPI_Count size;
 
     MPID_Datatype_get_size_macro(datatype, size);
-    MPIU_Assert(size >= 0 && MPIR_STATUS_GET_COUNT(*status) >= 0);
+    MPIR_Assert(size >= 0 && MPIR_STATUS_GET_COUNT(*status) >= 0);
     if (size != 0) {
         /* MPI-3 says return MPI_UNDEFINED if too large for an int */
 	if ((MPIR_STATUS_GET_COUNT(*status) % size) != 0 || ((MPIR_STATUS_GET_COUNT(*status) / size) > INT_MAX))
@@ -91,11 +91,11 @@ size of 'datatype' (so that 'count' would not be integral), a 'count' of
 int MPI_Get_count( const MPI_Status *status, MPI_Datatype datatype, int *count )
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GET_COUNT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GET_COUNT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GET_COUNT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GET_COUNT);
 
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -128,7 +128,7 @@ int MPI_Get_count( const MPI_Status *status, MPI_Datatype datatype, int *count )
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GET_COUNT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GET_COUNT);
     return mpi_errno;
     
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/datatype/get_elements.c b/src/mpi/datatype/get_elements.c
index 64c7d07..01c9419 100644
--- a/src/mpi/datatype/get_elements.c
+++ b/src/mpi/datatype/get_elements.c
@@ -60,11 +60,11 @@ int MPI_Get_elements(const MPI_Status *status, MPI_Datatype datatype, int *count
     int mpi_errno = MPI_SUCCESS;
     MPI_Count count_x;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GET_ELEMENTS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GET_ELEMENTS);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GET_ELEMENTS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GET_ELEMENTS);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -113,7 +113,7 @@ int MPI_Get_elements(const MPI_Status *status, MPI_Datatype datatype, int *count
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GET_ELEMENTS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GET_ELEMENTS);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/datatype/get_elements_x.c b/src/mpi/datatype/get_elements_x.c
index 682a299..2f6351b 100644
--- a/src/mpi/datatype/get_elements_x.c
+++ b/src/mpi/datatype/get_elements_x.c
@@ -253,7 +253,7 @@ PMPI_LOCAL MPI_Count MPIR_Type_get_elements(MPI_Count *bytes_p,
                                                                   types[i]);
                         nr_elements += last_nr_elements;
 
-                        MPIU_Assert(last_nr_elements >= 0);
+                        MPIR_Assert(last_nr_elements >= 0);
 
                         if (last_nr_elements < ints[i+1]) break;
                     }
@@ -267,7 +267,7 @@ PMPI_LOCAL MPI_Count MPIR_Type_get_elements(MPI_Count *bytes_p,
             case MPI_COMBINER_F90_INTEGER:
             default:
                 /* --BEGIN ERROR HANDLING-- */
-                MPIU_Assert(0);
+                MPIR_Assert(0);
                 return -1;
                 break;
                 /* --END ERROR HANDLING-- */
@@ -323,7 +323,7 @@ int MPIR_Get_elements_x_impl(const MPI_Status *status, MPI_Datatype datatype, MP
                                                               -1,
                                                               datatype);
         }
-        MPIU_Assert(byte_count >= 0);
+        MPIR_Assert(byte_count >= 0);
     }
     else if (datatype_ptr->size == 0) {
         if (MPIR_STATUS_GET_COUNT(*status) > 0) {
@@ -343,7 +343,7 @@ int MPIR_Get_elements_x_impl(const MPI_Status *status, MPI_Datatype datatype, MP
         }
     }
     else /* derived type with weird element type or weird size */ {
-        MPIU_Assert(datatype_ptr->builtin_element_size == -1);
+        MPIR_Assert(datatype_ptr->builtin_element_size == -1);
 
         byte_count = MPIR_STATUS_GET_COUNT(*status);
         *elements = MPIR_Type_get_elements(&byte_count, -1, datatype);
@@ -384,10 +384,10 @@ Output Parameters:
 int MPI_Get_elements_x(const MPI_Status *status, MPI_Datatype datatype, MPI_Count *count)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GET_ELEMENTS_X);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GET_ELEMENTS_X);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GET_ELEMENTS_X);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GET_ELEMENTS_X);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -432,7 +432,7 @@ int MPI_Get_elements_x(const MPI_Status *status, MPI_Datatype datatype, MPI_Coun
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GET_ELEMENTS_X);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GET_ELEMENTS_X);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/pack.c b/src/mpi/datatype/pack.c
index 545e593..1e01a71 100644
--- a/src/mpi/datatype/pack.c
+++ b/src/mpi/datatype/pack.c
@@ -62,7 +62,7 @@ int MPIR_Pack_impl(const void *inbuf,
     }
 
     if (contig) {
-        MPIU_Memcpy((char *) outbuf + *position, (char *)inbuf + dt_true_lb, data_sz);
+        MPIR_Memcpy((char *) outbuf + *position, (char *)inbuf + dt_true_lb, data_sz);
         *position = (int)((MPI_Aint)*position + data_sz);
         goto fn_exit;
     }
@@ -85,7 +85,7 @@ int MPIR_Pack_impl(const void *inbuf,
     last  = SEGMENT_IGNORE_LAST;
 
     /* Ensure that pointer increment fits in a pointer */
-    MPIU_Ensure_Aint_fits_in_pointer((MPIU_VOID_PTR_CAST_TO_MPI_AINT outbuf) +
+    MPIR_Ensure_Aint_fits_in_pointer((MPIR_VOID_PTR_CAST_TO_MPI_AINT outbuf) +
 				     (MPI_Aint) *position);
 
     MPID_Segment_pack(segp,
@@ -94,7 +94,7 @@ int MPIR_Pack_impl(const void *inbuf,
 		      (void *) ((char *) outbuf + *position));
 
     /* Ensure that calculation fits into an int datatype. */
-    MPIU_Ensure_Aint_fits_in_int((MPI_Aint)*position + last);
+    MPIR_Ensure_Aint_fits_in_int((MPI_Aint)*position + last);
 
     *position = (int)((MPI_Aint)*position + last);
 
@@ -157,11 +157,11 @@ int MPI_Pack(const void *inbuf,
     MPI_Aint position_x;
     MPIR_Comm *comm_ptr = NULL;
     
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_PACK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_PACK);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_PACK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_PACK);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -246,13 +246,13 @@ int MPI_Pack(const void *inbuf,
 
     position_x = *position;
     mpi_errno = MPIR_Pack_impl(inbuf, incount, datatype, outbuf, outsize, &position_x);
-    MPIU_Assign_trunc(*position, position_x, int);
+    MPIR_Assign_trunc(*position, position_x, int);
     if (mpi_errno) goto fn_fail;
     
    /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_PACK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_PACK);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/datatype/pack_external.c b/src/mpi/datatype/pack_external.c
index 0ac8e56..659bcdb 100644
--- a/src/mpi/datatype/pack_external.c
+++ b/src/mpi/datatype/pack_external.c
@@ -72,11 +72,11 @@ int MPI_Pack_external(const char datarep[],
     MPI_Aint first, last;
 
     MPID_Segment *segp;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_PACK_EXTERNAL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_PACK_EXTERNAL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_PACK_EXTERNAL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_PACK_EXTERNAL);
 
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -137,7 +137,7 @@ int MPI_Pack_external(const char datarep[],
     last  = SEGMENT_IGNORE_LAST;
 
     /* Ensure that pointer increment fits in a pointer */
-    MPIU_Ensure_Aint_fits_in_pointer((MPIU_VOID_PTR_CAST_TO_MPI_AINT outbuf) + *position);
+    MPIR_Ensure_Aint_fits_in_pointer((MPIR_VOID_PTR_CAST_TO_MPI_AINT outbuf) + *position);
 
     MPID_Segment_pack_external32(segp,
 				 first,
@@ -151,7 +151,7 @@ int MPI_Pack_external(const char datarep[],
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_PACK_EXTERNAL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_PACK_EXTERNAL);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/datatype/pack_external_size.c b/src/mpi/datatype/pack_external_size.c
index 6962306..6a23c94 100644
--- a/src/mpi/datatype/pack_external_size.c
+++ b/src/mpi/datatype/pack_external_size.c
@@ -60,11 +60,11 @@ int MPI_Pack_external_size(const char datarep[],
 			   MPI_Aint *size)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_PACK_EXTERNAL_SIZE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_PACK_EXTERNAL_SIZE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_PACK_EXTERNAL_SIZE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_PACK_EXTERNAL_SIZE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -106,7 +106,7 @@ int MPI_Pack_external_size(const char datarep[],
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_PACK_EXTERNAL_SIZE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_PACK_EXTERNAL_SIZE);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/datatype/pack_size.c b/src/mpi/datatype/pack_size.c
index 9bdab7c..4984d98 100644
--- a/src/mpi/datatype/pack_size.c
+++ b/src/mpi/datatype/pack_size.c
@@ -81,11 +81,11 @@ int MPI_Pack_size(int incount,
 #endif
     int mpi_errno = MPI_SUCCESS;
     MPI_Aint size_x = MPI_UNDEFINED;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_PACK_SIZE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_PACK_SIZE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_PACK_SIZE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_PACK_SIZE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -130,14 +130,14 @@ int MPI_Pack_size(int incount,
     /* ... body of routine ... */
 
     MPIR_Pack_size_impl(incount, datatype, &size_x);
-    MPIU_Assign_trunc(*size, size_x, int);
+    MPIR_Assign_trunc(*size, size_x, int);
 
     /* ... end of body of routine ... */
 
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_PACK_SIZE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_PACK_SIZE);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/datatype/register_datarep.c b/src/mpi/datatype/register_datarep.c
index 7f136eb..6865d72 100644
--- a/src/mpi/datatype/register_datarep.c
+++ b/src/mpi/datatype/register_datarep.c
@@ -55,12 +55,12 @@ int MPI_Register_datarep(char *datarep,
 {
     static const char FCNAME[] = "MPI_Register_datarep";
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_REGISTER_DATAREP);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_REGISTER_DATAREP);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_REGISTER_DATAREP);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_REGISTER_DATAREP);
     
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -83,7 +83,7 @@ int MPI_Register_datarep(char *datarep,
     if (mpi_errno != MPI_SUCCESS) goto fn_fail;
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_REGISTER_DATAREP);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_REGISTER_DATAREP);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/status_set_elements.c b/src/mpi/datatype/status_set_elements.c
index 534e3f7..4f86363 100644
--- a/src/mpi/datatype/status_set_elements.c
+++ b/src/mpi/datatype/status_set_elements.c
@@ -53,11 +53,11 @@ int MPI_Status_set_elements(MPI_Status *status, MPI_Datatype datatype,
 			    int count)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_STATUS_SET_ELEMENTS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_STATUS_SET_ELEMENTS);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_STATUS_SET_ELEMENTS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_STATUS_SET_ELEMENTS);
     
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -88,7 +88,7 @@ int MPI_Status_set_elements(MPI_Status *status, MPI_Datatype datatype,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_STATUS_SET_ELEMENTS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_STATUS_SET_ELEMENTS);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/datatype/status_set_elements_x.c b/src/mpi/datatype/status_set_elements_x.c
index b429f73..007e00e 100644
--- a/src/mpi/datatype/status_set_elements_x.c
+++ b/src/mpi/datatype/status_set_elements_x.c
@@ -39,8 +39,8 @@ int MPIR_Status_set_elements_x_impl(MPI_Status *status, MPI_Datatype datatype, M
 
     /* overflow check, should probably be a real error check? */
     if (count != 0) {
-        MPIU_Assert(size_x >= 0 && count > 0);
-        MPIU_Assert(count * size_x < MPIR_COUNT_MAX);
+        MPIR_Assert(size_x >= 0 && count > 0);
+        MPIR_Assert(count * size_x < MPIR_COUNT_MAX);
     }
 
     MPIR_STATUS_SET_COUNT(*status, size_x * count);
@@ -76,10 +76,10 @@ Input Parameters:
 int MPI_Status_set_elements_x(MPI_Status *status, MPI_Datatype datatype, MPI_Count count)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_STATUS_SET_ELEMENTS_X);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_STATUS_SET_ELEMENTS_X);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_STATUS_SET_ELEMENTS_X);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_STATUS_SET_ELEMENTS_X);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -124,7 +124,7 @@ int MPI_Status_set_elements_x(MPI_Status *status, MPI_Datatype datatype, MPI_Cou
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_STATUS_SET_ELEMENTS_X);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_STATUS_SET_ELEMENTS_X);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_commit.c b/src/mpi/datatype/type_commit.c
index be63e27..43e971a 100644
--- a/src/mpi/datatype/type_commit.c
+++ b/src/mpi/datatype/type_commit.c
@@ -74,12 +74,12 @@ Input Parameters:
 int MPI_Type_commit(MPI_Datatype *datatype)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_COMMIT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_COMMIT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_COMMIT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_COMMIT);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -119,7 +119,7 @@ int MPI_Type_commit(MPI_Datatype *datatype)
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_COMMIT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_COMMIT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_contiguous.c b/src/mpi/datatype/type_contiguous.c
index bd682af..f9361f7 100644
--- a/src/mpi/datatype/type_contiguous.c
+++ b/src/mpi/datatype/type_contiguous.c
@@ -85,7 +85,7 @@ int MPIR_Type_contiguous_x_impl(MPI_Count count,
     /* truly stupendously large counts will overflow an integer with this math,
      * but that is a problem for a few decades from now.  Sorry, few decades
      * from now! */
-    MPIU_Assert(count/INT_MAX == (int)(count/INT_MAX));
+    MPIR_Assert(count/INT_MAX == (int)(count/INT_MAX));
     int c = (int)(count/INT_MAX); /* OK to cast until 'count' is 256 bits */
     int r = count%INT_MAX;
 
@@ -142,12 +142,12 @@ int MPI_Type_contiguous(int count,
 			MPI_Datatype *newtype)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_CONTIGUOUS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_CONTIGUOUS);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_CONTIGUOUS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_CONTIGUOUS);
 
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -176,7 +176,7 @@ int MPI_Type_contiguous(int count,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_CONTIGUOUS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_CONTIGUOUS);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_create_darray.c b/src/mpi/datatype/type_create_darray.c
index 5760aff..30c115c 100644
--- a/src/mpi/datatype/type_create_darray.c
+++ b/src/mpi/datatype/type_create_darray.c
@@ -387,13 +387,13 @@ int MPI_Type_create_darray(int size,
 
     int *ints;
     MPIR_Datatype *datatype_ptr = NULL;
-    MPIU_CHKLMEM_DECL(3);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_DARRAY);
+    MPIR_CHKLMEM_DECL(3);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_DARRAY);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_CREATE_DARRAY);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_CREATE_DARRAY);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -527,7 +527,7 @@ int MPI_Type_create_darray(int size,
 
 /* calculate position in Cartesian grid as MPI would (row-major
    ordering) */
-    MPIU_CHKLMEM_MALLOC_ORJUMP(coords, int *, ndims * sizeof(int), mpi_errno, "position is Cartesian grid");
+    MPIR_CHKLMEM_MALLOC_ORJUMP(coords, int *, ndims * sizeof(int), mpi_errno, "position is Cartesian grid");
 
     procs = size;
     tmp_rank = rank;
@@ -537,7 +537,7 @@ int MPI_Type_create_darray(int size,
 	tmp_rank = tmp_rank % procs;
     }
 
-    MPIU_CHKLMEM_MALLOC_ORJUMP(st_offsets, MPI_Aint *, ndims * sizeof(MPI_Aint), mpi_errno, "st_offsets");
+    MPIR_CHKLMEM_MALLOC_ORJUMP(st_offsets, MPI_Aint *, ndims * sizeof(MPI_Aint), mpi_errno, "st_offsets");
 
     type_old = oldtype;
 
@@ -707,7 +707,7 @@ int MPI_Type_create_darray(int size,
      */
 
     /* Save contents */
-    MPIU_CHKLMEM_MALLOC_ORJUMP(ints, int *, (4 * ndims + 4) * sizeof(int), mpi_errno, "content description");
+    MPIR_CHKLMEM_MALLOC_ORJUMP(ints, int *, (4 * ndims + 4) * sizeof(int), mpi_errno, "content description");
 
     ints[0] = size;
     ints[1] = rank;
@@ -743,8 +743,8 @@ int MPI_Type_create_darray(int size,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_CREATE_DARRAY);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_CREATE_DARRAY);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_create_hindexed.c b/src/mpi/datatype/type_create_hindexed.c
index 228b9f6..704af44 100644
--- a/src/mpi/datatype/type_create_hindexed.c
+++ b/src/mpi/datatype/type_create_hindexed.c
@@ -66,13 +66,13 @@ int MPI_Type_create_hindexed(int count,
     MPI_Datatype new_handle;
     MPIR_Datatype *new_dtp;
     int i, *ints;
-    MPIU_CHKLMEM_DECL(1);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_HINDEXED);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_HINDEXED);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_CREATE_HINDEXED);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_CREATE_HINDEXED);
 
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -113,7 +113,7 @@ int MPI_Type_create_hindexed(int count,
 
     if (mpi_errno != MPI_SUCCESS) goto fn_fail;
 
-    MPIU_CHKLMEM_MALLOC_ORJUMP(ints, int *, (count + 1) * sizeof(int), mpi_errno, "content description");
+    MPIR_CHKLMEM_MALLOC_ORJUMP(ints, int *, (count + 1) * sizeof(int), mpi_errno, "content description");
 
     ints[0] = count;
 
@@ -137,8 +137,8 @@ int MPI_Type_create_hindexed(int count,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_CREATE_HINDEXED);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_CREATE_HINDEXED);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_create_hindexed_block.c b/src/mpi/datatype/type_create_hindexed_block.c
index 3cb7ce2..89bda44 100644
--- a/src/mpi/datatype/type_create_hindexed_block.c
+++ b/src/mpi/datatype/type_create_hindexed_block.c
@@ -102,12 +102,12 @@ int MPI_Type_create_hindexed_block(int count,
                                     MPI_Datatype oldtype, MPI_Datatype * newtype)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_HINDEXED_BLOCK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_HINDEXED_BLOCK);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_CREATE_HINDEXED_BLOCK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_CREATE_HINDEXED_BLOCK);
 
     /* Validate parameters and objects */
 #ifdef HAVE_ERROR_CHECKING
@@ -146,7 +146,7 @@ int MPI_Type_create_hindexed_block(int count,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_CREATE_HINDEXED_BLOCK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_CREATE_HINDEXED_BLOCK);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_create_hvector.c b/src/mpi/datatype/type_create_hvector.c
index 81b6261..614d212 100644
--- a/src/mpi/datatype/type_create_hvector.c
+++ b/src/mpi/datatype/type_create_hvector.c
@@ -64,12 +64,12 @@ int MPI_Type_create_hvector(int count,
     MPI_Datatype new_handle;
     MPIR_Datatype *new_dtp;
     int ints[2];
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_HVECTOR);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_HVECTOR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_CREATE_HVECTOR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_CREATE_HVECTOR);
 
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -120,7 +120,7 @@ int MPI_Type_create_hvector(int count,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_CREATE_HVECTOR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_CREATE_HVECTOR);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_create_indexed_block.c b/src/mpi/datatype/type_create_indexed_block.c
index eae6ea1..e71c3ea 100644
--- a/src/mpi/datatype/type_create_indexed_block.c
+++ b/src/mpi/datatype/type_create_indexed_block.c
@@ -37,7 +37,7 @@ int MPIR_Type_create_indexed_block_impl(int count,
                                         MPI_Datatype *newtype)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
     MPI_Datatype new_handle;
     MPIR_Datatype *new_dtp;
     int i, *ints;
@@ -50,7 +50,7 @@ int MPIR_Type_create_indexed_block_impl(int count,
 				       &new_handle);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_CHKLMEM_MALLOC_ORJUMP(ints, int *, (count + 2) * sizeof(int), mpi_errno, "content description");
+    MPIR_CHKLMEM_MALLOC_ORJUMP(ints, int *, (count + 2) * sizeof(int), mpi_errno, "content description");
 
     ints[0] = count;
     ints[1] = blocklength;
@@ -72,7 +72,7 @@ int MPIR_Type_create_indexed_block_impl(int count,
     MPIR_OBJ_PUBLISH_HANDLE(*newtype, new_handle);
 
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -136,12 +136,12 @@ int MPI_Type_create_indexed_block(int count,
 				  MPI_Datatype *newtype)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_INDEXED_BLOCK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_INDEXED_BLOCK);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_CREATE_INDEXED_BLOCK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_CREATE_INDEXED_BLOCK);
     
     /* Validate parameters and objects */
 #   ifdef HAVE_ERROR_CHECKING
@@ -179,7 +179,7 @@ int MPI_Type_create_indexed_block(int count,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_CREATE_INDEXED_BLOCK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_CREATE_INDEXED_BLOCK);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_create_resized.c b/src/mpi/datatype/type_create_resized.c
index afe91c9..b3ce957 100644
--- a/src/mpi/datatype/type_create_resized.c
+++ b/src/mpi/datatype/type_create_resized.c
@@ -61,12 +61,12 @@ int MPI_Type_create_resized(MPI_Datatype oldtype,
     MPI_Datatype new_handle;
     MPIR_Datatype *new_dtp;
     MPI_Aint aints[2];
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_RESIZED);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_RESIZED);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_CREATE_RESIZED);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_CREATE_RESIZED);
 
     /* Get handles to MPI objects. */
 #   ifdef HAVE_ERROR_CHECKING
@@ -114,7 +114,7 @@ int MPI_Type_create_resized(MPI_Datatype oldtype,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_CREATE_RESIZED);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_CREATE_RESIZED);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_create_struct.c b/src/mpi/datatype/type_create_struct.c
index fad34ab..2460eb8 100644
--- a/src/mpi/datatype/type_create_struct.c
+++ b/src/mpi/datatype/type_create_struct.c
@@ -41,7 +41,7 @@ int MPIR_Type_create_struct_impl(int count,
     int i, *ints;
     MPI_Datatype new_handle;
     MPIR_Datatype *new_dtp;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     mpi_errno = MPID_Type_struct(count,
 				 array_of_blocklengths,
@@ -52,7 +52,7 @@ int MPIR_Type_create_struct_impl(int count,
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
 
-    MPIU_CHKLMEM_MALLOC_ORJUMP(ints, int *, (count + 1) * sizeof(int), mpi_errno, "content description");
+    MPIR_CHKLMEM_MALLOC_ORJUMP(ints, int *, (count + 1) * sizeof(int), mpi_errno, "content description");
 
     ints[0] = count;
     for (i=0; i < count; i++)
@@ -72,7 +72,7 @@ int MPIR_Type_create_struct_impl(int count,
     MPIR_OBJ_PUBLISH_HANDLE(*newtype, new_handle);
         
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -115,12 +115,12 @@ int MPI_Type_create_struct(int count,
 			   MPI_Datatype *newtype)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_STRUCT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_STRUCT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_CREATE_STRUCT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_CREATE_STRUCT);
 
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -161,7 +161,7 @@ int MPI_Type_create_struct(int count,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_CREATE_STRUCT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_CREATE_STRUCT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_create_subarray.c b/src/mpi/datatype/type_create_subarray.c
index 2c249fd..8b5d7ba 100644
--- a/src/mpi/datatype/type_create_subarray.c
+++ b/src/mpi/datatype/type_create_subarray.c
@@ -83,13 +83,13 @@ int MPI_Type_create_subarray(int ndims,
     int *ints;
     MPIR_Datatype *new_dtp;
 
-    MPIU_CHKLMEM_DECL(1);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_SUBARRAY);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_SUBARRAY);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_CREATE_SUBARRAY);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_CREATE_SUBARRAY);
 
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -147,7 +147,7 @@ int MPI_Type_create_subarray(int ndims,
                 goto fn_fail;
 	    }
 
-	    MPIR_Type_extent_impl(oldtype, &extent);
+	    MPID_Datatype_get_extent_macro(oldtype, extent);
 
 	    /* check if MPI_Aint is large enough for size of global array.
 	       if not, complain. */
@@ -185,7 +185,7 @@ int MPI_Type_create_subarray(int ndims,
     /* TODO: CHECK THE ERROR RETURNS FROM ALL THESE!!! */
 
     /* TODO: GRAB EXTENT WITH A MACRO OR SOMETHING FASTER */
-    MPIR_Type_extent_impl(oldtype, &extent);
+    MPID_Datatype_get_extent_macro(oldtype, extent);
 
     if (order == MPI_ORDER_FORTRAN) {
 	if (ndims == 1)
@@ -301,7 +301,7 @@ int MPI_Type_create_subarray(int ndims,
      */
 
     /* Save contents */
-    MPIU_CHKLMEM_MALLOC_ORJUMP(ints, int *, (3 * ndims + 2) * sizeof(int), mpi_errno, "content description");
+    MPIR_CHKLMEM_MALLOC_ORJUMP(ints, int *, (3 * ndims + 2) * sizeof(int), mpi_errno, "content description");
 
     ints[0] = ndims;
     for (i=0; i < ndims; i++) {
@@ -331,8 +331,8 @@ int MPI_Type_create_subarray(int ndims,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_CREATE_SUBARRAY);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_CREATE_SUBARRAY);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_dup.c b/src/mpi/datatype/type_dup.c
index 83cf8bb..b3f6151 100644
--- a/src/mpi/datatype/type_dup.c
+++ b/src/mpi/datatype/type_dup.c
@@ -54,12 +54,12 @@ int MPI_Type_dup(MPI_Datatype oldtype, MPI_Datatype *newtype)
     MPI_Datatype new_handle;
     MPIR_Datatype *datatype_ptr = NULL;
     MPIR_Datatype *new_dtp;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_DUP);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_DUP);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_DUP);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_DUP);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -133,7 +133,7 @@ int MPI_Type_dup(MPI_Datatype oldtype, MPI_Datatype *newtype)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_DUP);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_DUP);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_extent.c b/src/mpi/datatype/type_extent.c
index 92d79f0..25dac42 100644
--- a/src/mpi/datatype/type_extent.c
+++ b/src/mpi/datatype/type_extent.c
@@ -54,11 +54,11 @@ The replacement for this routine is 'MPI_Type_get_extent'.
 int MPI_Type_extent(MPI_Datatype datatype, MPI_Aint *extent)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_EXTENT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_EXTENT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_EXTENT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_EXTENT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -91,14 +91,14 @@ int MPI_Type_extent(MPI_Datatype datatype, MPI_Aint *extent)
 
     /* ... body of routine ...  */
 
-    MPIR_Type_extent_impl(datatype, extent);
+    MPID_Datatype_get_extent_macro(datatype, *extent);
 
     /* ... end of body of routine ... */
 
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_EXTENT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_EXTENT);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/datatype/type_free.c b/src/mpi/datatype/type_free.c
index c3544bf..9cf2e9f 100644
--- a/src/mpi/datatype/type_free.c
+++ b/src/mpi/datatype/type_free.c
@@ -73,12 +73,12 @@ it clear that it is an error to free a null datatype.
 int MPI_Type_free(MPI_Datatype *datatype)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_FREE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_FREE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_FREE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_FREE);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -145,7 +145,7 @@ int MPI_Type_free(MPI_Datatype *datatype)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_FREE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_FREE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_get_contents.c b/src/mpi/datatype/type_get_contents.c
index 776d199..303f3d3 100644
--- a/src/mpi/datatype/type_get_contents.c
+++ b/src/mpi/datatype/type_get_contents.c
@@ -63,12 +63,12 @@ int MPI_Type_get_contents(MPI_Datatype datatype,
 {
     static const char FCNAME[] = "MPI_Type_get_contents";
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_GET_CONTENTS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_GET_CONTENTS);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_GET_CONTENTS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_GET_CONTENTS);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -141,7 +141,7 @@ int MPI_Type_get_contents(MPI_Datatype datatype,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_GET_CONTENTS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_GET_CONTENTS);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_get_envelope.c b/src/mpi/datatype/type_get_envelope.c
index 3594733..95ae900 100644
--- a/src/mpi/datatype/type_get_envelope.c
+++ b/src/mpi/datatype/type_get_envelope.c
@@ -93,11 +93,11 @@ int MPI_Type_get_envelope(MPI_Datatype datatype,
 			  int *combiner)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_GET_ENVELOPE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_GET_ENVELOPE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_GET_ENVELOPE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_GET_ENVELOPE);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -142,7 +142,7 @@ int MPI_Type_get_envelope(MPI_Datatype datatype,
 #   ifdef HAVE_ERROR_CHECKING
  fn_exit:
 #   endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_GET_ENVELOPE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_GET_ENVELOPE);
     return mpi_errno;
 
 #   ifdef HAVE_ERROR_CHECKING
diff --git a/src/mpi/datatype/type_get_extent.c b/src/mpi/datatype/type_get_extent.c
index 2ec1ba9..9c3628a 100644
--- a/src/mpi/datatype/type_get_extent.c
+++ b/src/mpi/datatype/type_get_extent.c
@@ -66,11 +66,11 @@ Output Parameters:
 int MPI_Type_get_extent(MPI_Datatype datatype, MPI_Aint *lb, MPI_Aint *extent)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_GET_EXTENT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_GET_EXTENT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_GET_EXTENT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_GET_EXTENT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -110,7 +110,7 @@ int MPI_Type_get_extent(MPI_Datatype datatype, MPI_Aint *lb, MPI_Aint *extent)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_GET_EXTENT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_GET_EXTENT);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/datatype/type_get_extent_x.c b/src/mpi/datatype/type_get_extent_x.c
index 561c958..30720b4 100644
--- a/src/mpi/datatype/type_get_extent_x.c
+++ b/src/mpi/datatype/type_get_extent_x.c
@@ -71,10 +71,10 @@ Output Parameters:
 int MPI_Type_get_extent_x(MPI_Datatype datatype, MPI_Count *lb, MPI_Count *extent)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_GET_EXTENT_X);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_GET_EXTENT_X);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_GET_EXTENT_X);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_GET_EXTENT_X);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -117,7 +117,7 @@ int MPI_Type_get_extent_x(MPI_Datatype datatype, MPI_Count *lb, MPI_Count *exten
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_GET_EXTENT_X);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_GET_EXTENT_X);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_get_name.c b/src/mpi/datatype/type_get_name.c
index e4a4d0d..3d7ed40 100644
--- a/src/mpi/datatype/type_get_name.c
+++ b/src/mpi/datatype/type_get_name.c
@@ -240,11 +240,11 @@ int MPI_Type_get_name(MPI_Datatype datatype, char *type_name, int *resultlen)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Datatype *datatype_ptr = NULL;
     static int setup = 0;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_GET_NAME);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_GET_NAME);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_GET_NAME);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_GET_NAME);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -296,7 +296,7 @@ int MPI_Type_get_name(MPI_Datatype datatype, char *type_name, int *resultlen)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_GET_NAME);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_GET_NAME);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/datatype/type_get_true_extent.c b/src/mpi/datatype/type_get_true_extent.c
index 090f757..8b86d4e 100644
--- a/src/mpi/datatype/type_get_true_extent.c
+++ b/src/mpi/datatype/type_get_true_extent.c
@@ -68,11 +68,11 @@ int MPI_Type_get_true_extent(MPI_Datatype datatype, MPI_Aint *true_lb,
 			     MPI_Aint *true_extent)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_GET_TRUE_EXTENT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_GET_TRUE_EXTENT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_GET_TRUE_EXTENT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_GET_TRUE_EXTENT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -112,7 +112,7 @@ int MPI_Type_get_true_extent(MPI_Datatype datatype, MPI_Aint *true_lb,
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_GET_TRUE_EXTENT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_GET_TRUE_EXTENT);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/datatype/type_get_true_extent_x.c b/src/mpi/datatype/type_get_true_extent_x.c
index 119a72c..e2ad197 100644
--- a/src/mpi/datatype/type_get_true_extent_x.c
+++ b/src/mpi/datatype/type_get_true_extent_x.c
@@ -71,10 +71,10 @@ Output Parameters:
 int MPI_Type_get_true_extent_x(MPI_Datatype datatype, MPI_Count *true_lb, MPI_Count *true_extent)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_GET_TRUE_EXTENT_X);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_GET_TRUE_EXTENT_X);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_GET_TRUE_EXTENT_X);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_GET_TRUE_EXTENT_X);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -117,7 +117,7 @@ int MPI_Type_get_true_extent_x(MPI_Datatype datatype, MPI_Count *true_lb, MPI_Co
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_GET_TRUE_EXTENT_X);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_GET_TRUE_EXTENT_X);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_hindexed.c b/src/mpi/datatype/type_hindexed.c
index 913c072..009d5d0 100644
--- a/src/mpi/datatype/type_hindexed.c
+++ b/src/mpi/datatype/type_hindexed.c
@@ -91,13 +91,13 @@ int MPI_Type_hindexed(int count,
     MPI_Datatype new_handle;
     MPIR_Datatype *new_dtp;
     int i, *ints;
-    MPIU_CHKLMEM_DECL(1);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_HINDEXED);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_HINDEXED);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_HINDEXED);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_HINDEXED);
 
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -139,7 +139,7 @@ int MPI_Type_hindexed(int count,
 				  &new_handle);
     if (mpi_errno != MPI_SUCCESS) goto fn_fail;
 
-    MPIU_CHKLMEM_MALLOC(ints, int *, (count + 1) * sizeof(int), mpi_errno, "contents integer array");
+    MPIR_CHKLMEM_MALLOC(ints, int *, (count + 1) * sizeof(int), mpi_errno, "contents integer array");
 
     /* copy ints into temporary buffer (count and blocklengths) */
     ints[0] = count;
@@ -163,8 +163,8 @@ int MPI_Type_hindexed(int count,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_HINDEXED);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_HINDEXED);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_hvector.c b/src/mpi/datatype/type_hvector.c
index b5b959d..12a23c6 100644
--- a/src/mpi/datatype/type_hvector.c
+++ b/src/mpi/datatype/type_hvector.c
@@ -100,12 +100,12 @@ int MPI_Type_hvector(int count,
 		     MPI_Datatype *newtype)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_HVECTOR);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_HVECTOR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_HVECTOR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_HVECTOR);
 
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -137,7 +137,7 @@ int MPI_Type_hvector(int count,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_HVECTOR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_HVECTOR);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_indexed.c b/src/mpi/datatype/type_indexed.c
index 425a8b8..8284c4e 100644
--- a/src/mpi/datatype/type_indexed.c
+++ b/src/mpi/datatype/type_indexed.c
@@ -39,7 +39,7 @@ int MPIR_Type_indexed_impl(int count, const int *array_of_blocklengths,
     MPI_Datatype new_handle;
     MPIR_Datatype *new_dtp;
     int i, *ints;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     mpi_errno = MPID_Type_indexed(count,
 				  array_of_blocklengths,
@@ -52,7 +52,7 @@ int MPIR_Type_indexed_impl(int count, const int *array_of_blocklengths,
     /* copy all integer values into a temporary buffer; this
      * includes the count, the blocklengths, and the displacements.
      */
-    MPIU_CHKLMEM_MALLOC(ints, int *, (2 * count + 1) * sizeof(int), mpi_errno, "contents integer array");
+    MPIR_CHKLMEM_MALLOC(ints, int *, (2 * count + 1) * sizeof(int), mpi_errno, "contents integer array");
 
     ints[0] = count;
 
@@ -76,7 +76,7 @@ int MPIR_Type_indexed_impl(int count, const int *array_of_blocklengths,
     MPIR_OBJ_PUBLISH_HANDLE(*newtype, new_handle);
 
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -141,12 +141,12 @@ int MPI_Type_indexed(int count,
 		     MPI_Datatype *newtype)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_INDEXED);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_INDEXED);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_INDEXED);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_INDEXED);
 
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -186,7 +186,7 @@ int MPI_Type_indexed(int count,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_INDEXED);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_INDEXED);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_lb.c b/src/mpi/datatype/type_lb.c
index 9ec1107..22f9d12 100644
--- a/src/mpi/datatype/type_lb.c
+++ b/src/mpi/datatype/type_lb.c
@@ -71,11 +71,11 @@ The replacement for this routine is 'MPI_Type_Get_extent'.
 int MPI_Type_lb(MPI_Datatype datatype, MPI_Aint *displacement)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_LB);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_LB);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_LB);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_LB);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -115,7 +115,7 @@ int MPI_Type_lb(MPI_Datatype datatype, MPI_Aint *displacement)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_LB);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_LB);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/datatype/type_match_size.c b/src/mpi/datatype/type_match_size.c
index b8bbbe8..578e278 100644
--- a/src/mpi/datatype/type_match_size.c
+++ b/src/mpi/datatype/type_match_size.c
@@ -83,13 +83,13 @@ int MPI_Type_match_size(int typeclass, int size, MPI_Datatype *datatype)
     MPI_Datatype matched_datatype = MPI_DATATYPE_NULL;
     int i;
     MPI_Aint tsize;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_MATCH_SIZE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_MATCH_SIZE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     /* FIXME: This routine does not require the global critical section */
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_MATCH_SIZE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_MATCH_SIZE);
 
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -122,7 +122,7 @@ int MPI_Type_match_size(int typeclass, int size, MPI_Datatype *datatype)
 #endif
 	    for (i=0; i<nRealTypes; i++) {
 		if (real_types[i] == MPI_DATATYPE_NULL) { continue; }
-		MPIR_Type_size_impl( real_types[i], &tsize );
+		MPID_Datatype_get_size_macro( real_types[i], tsize );
 		if (tsize == size) {
 		    matched_datatype = real_types[i];
 		    break;
@@ -138,7 +138,7 @@ int MPI_Type_match_size(int typeclass, int size, MPI_Datatype *datatype)
 #endif
 	    for (i=0; i<nIntTypes; i++) {
 		if (int_types[i] == MPI_DATATYPE_NULL) { continue; }
-		MPIR_Type_size_impl( int_types[i], &tsize );
+		MPID_Datatype_get_size_macro( int_types[i], tsize );
 		if (tsize == size) {
 		    matched_datatype = int_types[i];
 		    break;
@@ -154,7 +154,7 @@ int MPI_Type_match_size(int typeclass, int size, MPI_Datatype *datatype)
 #endif
 	    for (i=0; i<nComplexTypes; i++) {
 		if (complex_types[i] == MPI_DATATYPE_NULL) { continue; }
-		MPIR_Type_size_impl( complex_types[i], &tsize );
+		MPID_Datatype_get_size_macro( complex_types[i], tsize );
 		if (tsize == size) {
 		    matched_datatype = complex_types[i];
 		    break;
@@ -183,7 +183,7 @@ int MPI_Type_match_size(int typeclass, int size, MPI_Datatype *datatype)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_MATCH_SIZE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_MATCH_SIZE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_set_name.c b/src/mpi/datatype/type_set_name.c
index b71c7a7..e89b4a3 100644
--- a/src/mpi/datatype/type_set_name.c
+++ b/src/mpi/datatype/type_set_name.c
@@ -53,11 +53,11 @@ int MPI_Type_set_name(MPI_Datatype datatype, const char *type_name)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Datatype *datatype_ptr = NULL;
     static int setup = 0;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_SET_NAME);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_SET_NAME);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_SET_NAME);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_SET_NAME);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -111,7 +111,7 @@ int MPI_Type_set_name(MPI_Datatype datatype, const char *type_name)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_SET_NAME);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_SET_NAME);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/datatype/type_size.c b/src/mpi/datatype/type_size.c
index a1e3db0..6d05d3d 100644
--- a/src/mpi/datatype/type_size.c
+++ b/src/mpi/datatype/type_size.c
@@ -55,11 +55,11 @@ int MPI_Type_size(MPI_Datatype datatype, int *size)
 {
     int mpi_errno = MPI_SUCCESS;
     MPI_Count size_x = MPI_UNDEFINED;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_SIZE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_SIZE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_SIZE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_SIZE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -102,14 +102,14 @@ int MPI_Type_size(MPI_Datatype datatype, int *size)
     mpi_errno = MPIR_Type_size_x_impl(datatype, &size_x);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(size_x >= 0);
+    MPIR_Assert(size_x >= 0);
     /* handle overflow: see MPI-3 p.104 */
     *size = (size_x > INT_MAX) ? MPI_UNDEFINED : (int)size_x;
 
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_SIZE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_SIZE);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/datatype/type_size_x.c b/src/mpi/datatype/type_size_x.c
index 81bf558..4cfd973 100644
--- a/src/mpi/datatype/type_size_x.c
+++ b/src/mpi/datatype/type_size_x.c
@@ -67,10 +67,10 @@ Output Parameters:
 int MPI_Type_size_x(MPI_Datatype datatype, MPI_Count *size)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_SIZE_X);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_SIZE_X);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_SIZE_X);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_SIZE_X);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -114,7 +114,7 @@ int MPI_Type_size_x(MPI_Datatype datatype, MPI_Count *size)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_SIZE_X);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_SIZE_X);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_struct.c b/src/mpi/datatype/type_struct.c
index cb2d62c..903415e 100644
--- a/src/mpi/datatype/type_struct.c
+++ b/src/mpi/datatype/type_struct.c
@@ -38,7 +38,7 @@ int MPIR_Type_struct_impl(int count, const int *array_of_blocklengths,
     MPI_Datatype new_handle;
     int i, *ints;
     MPIR_Datatype *new_dtp;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     mpi_errno = MPID_Type_struct(count,
 				 array_of_blocklengths,
@@ -48,7 +48,7 @@ int MPIR_Type_struct_impl(int count, const int *array_of_blocklengths,
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
 
-    MPIU_CHKLMEM_MALLOC(ints, int *, (count + 1) * sizeof(int), mpi_errno, "contents integer array");
+    MPIR_CHKLMEM_MALLOC(ints, int *, (count + 1) * sizeof(int), mpi_errno, "contents integer array");
 
     ints[0] = count;
     for (i=0; i < count; i++) {
@@ -70,7 +70,7 @@ int MPIR_Type_struct_impl(int count, const int *array_of_blocklengths,
     MPIR_OBJ_PUBLISH_HANDLE(*newtype, new_handle);
 
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -155,12 +155,12 @@ int MPI_Type_struct(int count,
 		    MPI_Datatype *newtype)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_STRUCT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_STRUCT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_STRUCT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_STRUCT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -201,7 +201,7 @@ int MPI_Type_struct(int count,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_STRUCT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_STRUCT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/type_ub.c b/src/mpi/datatype/type_ub.c
index 96e97ee..62ea0ea 100644
--- a/src/mpi/datatype/type_ub.c
+++ b/src/mpi/datatype/type_ub.c
@@ -58,11 +58,11 @@ int MPI_Type_ub(MPI_Datatype datatype, MPI_Aint *displacement)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Datatype *datatype_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_UB);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_UB);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_UB);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_UB);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -103,7 +103,7 @@ int MPI_Type_ub(MPI_Datatype datatype, MPI_Aint *displacement)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_UB);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_UB);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/datatype/type_vector.c b/src/mpi/datatype/type_vector.c
index cedc8e7..f972b82 100644
--- a/src/mpi/datatype/type_vector.c
+++ b/src/mpi/datatype/type_vector.c
@@ -103,12 +103,12 @@ int MPI_Type_vector(int count,
 		    MPI_Datatype *newtype)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_VECTOR);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_VECTOR);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_VECTOR);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_VECTOR);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -139,7 +139,7 @@ int MPI_Type_vector(int count,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_VECTOR);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_VECTOR);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/datatype/typeutil.c b/src/mpi/datatype/typeutil.c
index 33949cd..954a57f 100644
--- a/src/mpi/datatype/typeutil.c
+++ b/src/mpi/datatype/typeutil.c
@@ -18,12 +18,12 @@
 /* Preallocated datatype objects */
 MPIR_Datatype MPIR_Datatype_builtin[MPIR_DATATYPE_N_BUILTIN + 1] = { {0} };
 MPIR_Datatype MPIR_Datatype_direct[MPIR_DATATYPE_PREALLOC] = { {0} };
-MPIU_Object_alloc_t MPIR_Datatype_mem = { 0, 0, 0, 0, MPIR_DATATYPE,
+MPIR_Object_alloc_t MPIR_Datatype_mem = { 0, 0, 0, 0, MPIR_DATATYPE,
 			      sizeof(MPIR_Datatype), MPIR_Datatype_direct,
 					  MPIR_DATATYPE_PREALLOC};
 
 static int MPIR_Datatype_finalize(void *dummy );
-static int MPIR_DatatypeAttrFinalizeCallback(void *dummy );
+static int datatype_attr_finalize_cb(void *dummy );
 
 /* Call this routine to associate a MPIR_Datatype with each predefined
    datatype.  We do this with lazy initialization because many MPI 
@@ -149,8 +149,8 @@ int MPIR_Datatype_init(void)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Datatype *ptr;
 
-    MPIU_Assert(MPIR_Datatype_mem.initialized == 0);
-    MPIU_Assert(MPIR_DATATYPE_PREALLOC >= 5);
+    MPIR_Assert(MPIR_Datatype_mem.initialized == 0);
+    MPIR_Assert(MPIR_DATATYPE_PREALLOC >= 5);
 
     for (i=0; mpi_pairtypes[i] != (MPI_Datatype) -1; ++i) {
         /* types based on 'long long' and 'long double', may be disabled at
@@ -168,12 +168,12 @@ int MPIR_Datatype_init(void)
         /* we use the _unsafe version because we are still in MPI_Init, before
          * multiple threads are permitted and possibly before support for
          * critical sections is entirely setup */
-        ptr = (MPIR_Datatype *)MPIU_Handle_obj_alloc_unsafe( &MPIR_Datatype_mem );
+        ptr = (MPIR_Datatype *)MPIR_Handle_obj_alloc_unsafe( &MPIR_Datatype_mem );
 
-        MPIU_Assert(ptr);
-        MPIU_Assert(ptr->handle == mpi_pairtypes[i]);
+        MPIR_Assert(ptr);
+        MPIR_Assert(ptr->handle == mpi_pairtypes[i]);
         /* this is a redundant alternative to the previous statement */
-        MPIU_Assert((void *) ptr == (void *) (MPIR_Datatype_direct + HANDLE_INDEX(mpi_pairtypes[i])));
+        MPIR_Assert((void *) ptr == (void *) (MPIR_Datatype_direct + HANDLE_INDEX(mpi_pairtypes[i])));
 
         mpi_errno = MPID_Type_create_pairtype(mpi_pairtypes[i], (MPIR_Datatype *) ptr);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -259,7 +259,7 @@ int MPIR_Datatype_builtin_fillin(void)
 	    dptr->handle	   = d;
 	    dptr->is_permanent = 1;
 	    dptr->is_contig	   = 1;
-	    MPIU_Object_set_ref( dptr, 1 );
+	    MPIR_Object_set_ref( dptr, 1 );
 	    MPID_Datatype_get_size_macro(mpi_dtypes[i], dptr->size);
 	    dptr->extent	   = dptr->size;
 	    dptr->ub	   = dptr->size;
@@ -295,7 +295,7 @@ void MPIR_Datatype_iscontig(MPI_Datatype datatype, int *flag)
 
 /* If an attribute is added to a predefined type, we free the attributes 
    in Finalize */
-static int MPIR_DatatypeAttrFinalizeCallback(void *dummy ATTRIBUTE((unused)) )
+static int datatype_attr_finalize_cb(void *dummy ATTRIBUTE((unused)) )
 {
     MPIR_Datatype *dtype;
     int i, mpi_errno=MPI_SUCCESS;
@@ -311,14 +311,14 @@ static int MPIR_DatatypeAttrFinalizeCallback(void *dummy ATTRIBUTE((unused)) )
     return mpi_errno;
 }
 
-void MPIR_DatatypeAttrFinalize( void )
+void MPII_Datatype_attr_finalize( void )
 {
     static int called=0;
 
     /* FIXME: This needs to be make thread safe */
     if (!called) {
 	called = 1;
-	MPIR_Add_finalize(MPIR_DatatypeAttrFinalizeCallback, 0, 
+	MPIR_Add_finalize(datatype_attr_finalize_cb, 0,
 			  MPIR_FINALIZE_CALLBACK_PRIO-1);
     }
 }
diff --git a/src/mpi/datatype/unpack.c b/src/mpi/datatype/unpack.c
index cf1acb3..124cdab 100644
--- a/src/mpi/datatype/unpack.c
+++ b/src/mpi/datatype/unpack.c
@@ -57,7 +57,7 @@ int MPIR_Unpack_impl(const void *inbuf, MPI_Aint insize, MPI_Aint *position,
     }
 
     if (contig) {
-        MPIU_Memcpy((char *) outbuf + dt_true_lb, (char *)inbuf + *position, data_sz);
+        MPIR_Memcpy((char *) outbuf + dt_true_lb, (char *)inbuf + *position, data_sz);
         *position = (int)((MPI_Aint)*position + data_sz);
         goto fn_exit;
     }
@@ -67,7 +67,7 @@ int MPIR_Unpack_impl(const void *inbuf, MPI_Aint insize, MPI_Aint *position,
     segp = MPID_Segment_alloc();
     MPIR_ERR_CHKANDJUMP1(segp == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
     mpi_errno = MPID_Segment_init(outbuf, outcount, datatype, segp, 0);
-    MPIU_Assert(mpi_errno == MPI_SUCCESS);
+    MPIR_Assert(mpi_errno == MPI_SUCCESS);
 
     /* NOTE: the use of buffer values and positions in MPI_Unpack and in
      * MPID_Segment_unpack are quite different.  See code or docs or something.
@@ -76,7 +76,7 @@ int MPIR_Unpack_impl(const void *inbuf, MPI_Aint insize, MPI_Aint *position,
     last  = SEGMENT_IGNORE_LAST;
 
     /* Ensure that pointer increment fits in a pointer */
-    MPIU_Ensure_Aint_fits_in_pointer((MPIU_VOID_PTR_CAST_TO_MPI_AINT inbuf) +
+    MPIR_Ensure_Aint_fits_in_pointer((MPIR_VOID_PTR_CAST_TO_MPI_AINT inbuf) +
 				     (MPI_Aint) *position);
 
     MPID_Segment_unpack(segp,
@@ -85,7 +85,7 @@ int MPIR_Unpack_impl(const void *inbuf, MPI_Aint insize, MPI_Aint *position,
 			(void *) ((char *) inbuf + *position));
 
     /* Ensure that calculation fits into an int datatype. */
-    MPIU_Ensure_Aint_fits_in_int((MPI_Aint)*position + last);
+    MPIR_Ensure_Aint_fits_in_int((MPI_Aint)*position + last);
 
     *position = (int)((MPI_Aint)*position + last);
 
@@ -142,11 +142,11 @@ int MPI_Unpack(const void *inbuf, int insize, int *position,
     int mpi_errno = MPI_SUCCESS;
     MPI_Aint position_x;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_UNPACK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_UNPACK);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_UNPACK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_UNPACK);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -200,12 +200,12 @@ int MPI_Unpack(const void *inbuf, int insize, int *position,
     position_x = *position;
     mpi_errno = MPIR_Unpack_impl(inbuf, insize, &position_x, outbuf, outcount, datatype);
     if (mpi_errno) goto fn_fail;
-    MPIU_Assign_trunc(*position, position_x, int);
+    MPIR_Assign_trunc(*position, position_x, int);
     
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_UNPACK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_UNPACK);
     return mpi_errno;
 
 
diff --git a/src/mpi/datatype/unpack_external.c b/src/mpi/datatype/unpack_external.c
index 25ee12b..a6f8d49 100644
--- a/src/mpi/datatype/unpack_external.c
+++ b/src/mpi/datatype/unpack_external.c
@@ -70,11 +70,11 @@ int MPI_Unpack_external(const char datarep[],
     int mpi_errno = MPI_SUCCESS;
     MPI_Aint first, last;
     MPID_Segment *segp;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_UNPACK_EXTERNAL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_UNPACK_EXTERNAL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_UNPACK_EXTERNAL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_UNPACK_EXTERNAL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -122,7 +122,7 @@ int MPI_Unpack_external(const char datarep[],
     last  = SEGMENT_IGNORE_LAST;
 
     /* Ensure that pointer increment fits in a pointer */
-    MPIU_Ensure_Aint_fits_in_pointer((MPIU_VOID_PTR_CAST_TO_MPI_AINT inbuf) + *position);
+    MPIR_Ensure_Aint_fits_in_pointer((MPIR_VOID_PTR_CAST_TO_MPI_AINT inbuf) + *position);
 
     MPID_Segment_unpack_external32(segp,
 				   first,
@@ -137,7 +137,7 @@ int MPI_Unpack_external(const char datarep[],
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_UNPACK_EXTERNAL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_UNPACK_EXTERNAL);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/debugger/dbginit.c b/src/mpi/debugger/dbginit.c
index 1838087..f82b5f3 100644
--- a/src/mpi/debugger/dbginit.c
+++ b/src/mpi/debugger/dbginit.c
@@ -38,7 +38,7 @@ cvars:
       verbosity   : MPI_T_VERBOSITY_USER_BASIC
       scope       : MPI_T_SCOPE_ALL_EQ
       description : >-
-        If true, dump the proctable entries at MPIR_WaitForDebugger-time.
+        If true, dump the proctable entries at MPII_Wait_for_debugger-time.
 
 === END_MPI_T_CVAR_INFO_BLOCK ===
 */
@@ -101,7 +101,7 @@ void *MPIR_Breakpoint(void);
  * offered to the debugger.  Typical spots are in MPI_Init/MPI_Init_thread
  * after initialization is completed and in MPI_Abort before exiting.
  *
- * MPIR_DebuggerSetAborting( const char *msg )
+ * MPIR_Debugger_set_aborting( const char *msg )
  *
  * This routine should be called when MPI is exiting (either in finalize
  * or abort.  If a message is provided, it will call MPIR_Breakpoint.
@@ -111,7 +111,7 @@ void *MPIR_Breakpoint(void);
  * MPIR_being_debugged, and MPIR_debug_gate where exported globally.  
  * In MPICH, while these are global variables (so that the debugger can
  * find them easily), they are not explicitly exported or referenced outside
- * of a few routines.  In particular, MPID_Abort uses MPIR_DebuggerSetAborting
+ * of a few routines.  In particular, MPID_Abort uses MPIR_Debugger_set_aborting
  * instead of directly accessing these variables.
  */
 
@@ -177,7 +177,7 @@ static int SendqFreePool( void * );
 
 /*
  * If MPICH is built with the --enable-debugger option, MPI_Init and 
- * MPI_Init_thread will call MPIR_WaitForDebugger.  This ensures both that
+ * MPI_Init_thread will call MPII_Wait_for_debugger.  This ensures both that
  * the debugger can gather information on the MPI job before the MPI_Init
  * returns to the user and that the necessary symbols for providing 
  * information such as message queues is available.
@@ -186,7 +186,7 @@ static int SendqFreePool( void * );
  * all MPI processes to wait in this routine until the variable 
  * MPIR_debug_gate is set to 1.
  */
-void MPIR_WaitForDebugger( void )
+void MPII_Wait_for_debugger( void )
 {
 #ifdef MPIU_PROCTABLE_NEEDED
     int rank = MPIR_Process.comm_world->rank;
@@ -303,7 +303,7 @@ void * MPIR_Breakpoint( void )
  * If there is an abort message, call the MPIR_Breakpoint routine (which 
  * allows a tool such as a debugger to gain control.
  */
-void MPIR_DebuggerSetAborting( const char *msg )
+void MPIR_Debugger_set_aborting( const char *msg )
 {
     MPIR_debug_abort_string = (char *)msg;
     MPIR_debug_state        = MPIR_DEBUG_ABORTING;
@@ -348,7 +348,7 @@ static MPIR_Sendq *pool = 0;
 /* This routine is used to establish a queue of send requests to allow the
    debugger easier access to the active requests.  Some devices may be able
    to provide this information without requiring this separate queue. */
-void MPIR_Sendq_remember( MPIR_Request *req,
+void MPII_Sendq_remember( MPIR_Request *req,
 			  int rank, int tag, int context_id )
 {
 #if defined HAVE_DEBUGGER_SUPPORT
@@ -381,7 +381,7 @@ fn_exit:
 #endif  /* HAVE_DEBUGGER_SUPPORT */
 }
 
-void MPIR_Sendq_forget( MPIR_Request *req )
+void MPII_Sendq_forget( MPIR_Request *req )
 {
 #if defined HAVE_DEBUGGER_SUPPORT
     MPIR_Sendq *p, *prev;
@@ -454,7 +454,7 @@ typedef struct MPIR_Comm_list {
 
 MPIR_Comm_list MPIR_All_communicators = { 0, 0 };
 
-void MPIR_CommL_remember( MPIR_Comm *comm_ptr )
+void MPII_CommL_remember( MPIR_Comm *comm_ptr )
 {   
     MPL_DBG_MSG_P(MPIR_DBG_COMM,VERBOSE,
 		   "Adding communicator %p to remember list",comm_ptr);
@@ -474,7 +474,7 @@ void MPIR_CommL_remember( MPIR_Comm *comm_ptr )
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
 }
 
-void MPIR_CommL_forget( MPIR_Comm *comm_ptr )
+void MPII_CommL_forget( MPIR_Comm *comm_ptr )
 {
     MPIR_Comm *p, *prev;
 
diff --git a/src/mpi/debugger/dll_mpich.c b/src/mpi/debugger/dll_mpich.c
index b0af7f3..71a9ea8 100644
--- a/src/mpi/debugger/dll_mpich.c
+++ b/src/mpi/debugger/dll_mpich.c
@@ -266,7 +266,7 @@ int mqs_image_has_queues (mqs_image *image, char **message)
     /* Force in the file containing our wait-for-debugger function, to ensure 
      * that types have been read from there before we try to look them up.
      */
-    dbgr_find_function (image, (char *)"MPIR_WaitForDebugger", mqs_lang_c, NULL);
+    dbgr_find_function (image, (char *)"MPII_Wait_for_debugger", mqs_lang_c, NULL);
 
     /* Find the various global variables and structure definitions 
        that describe the communicator and message queue structures for
diff --git a/src/mpi/debugger/tvtest.c b/src/mpi/debugger/tvtest.c
index 14a2c7e..7affa3a 100644
--- a/src/mpi/debugger/tvtest.c
+++ b/src/mpi/debugger/tvtest.c
@@ -332,19 +332,19 @@ static int dbgrI_find_function( mqs_image *image, char *name,
 }
 
 
-#define MPIU_Memcpy memcpy
+#define MPIR_Memcpy memcpy
 /* Simulate requesting the debugger to fetch data from within this process */
 static int dbgrI_fetch_data( mqs_process *proc, mqs_taddr_t addr, int asize, 
 			     void *data )
 {
-    MPIU_Memcpy( data, (void *)addr, (size_t) asize );
+    MPIR_Memcpy( data, (void *)addr, (size_t) asize );
     return mqs_ok;
 }
 /* Simulate converting data to debuggers byte ordering */
 static void dbgrI_target_to_host (mqs_process *proc, const void *in_data, 
 				  void *out_data, int asize )
 {
-    MPIU_Memcpy( out_data, in_data, asize );
+    MPIR_Memcpy( out_data, in_data, asize );
 }
 
 /* Return the "debuggers" image structure (statically allocated above) */
diff --git a/src/mpi/errhan/add_error_class.c b/src/mpi/errhan/add_error_class.c
index 9b3c232..4713b36 100644
--- a/src/mpi/errhan/add_error_class.c
+++ b/src/mpi/errhan/add_error_class.c
@@ -50,12 +50,12 @@ int MPI_Add_error_class(int *errorclass)
     static const char FCNAME[] = "MPI_Add_error_class";
     int mpi_errno = MPI_SUCCESS;
     int new_class;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ADD_ERROR_CLASS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ADD_ERROR_CLASS);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ADD_ERROR_CLASS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ADD_ERROR_CLASS);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -83,7 +83,7 @@ int MPI_Add_error_class(int *errorclass)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ADD_ERROR_CLASS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ADD_ERROR_CLASS);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/errhan/add_error_code.c b/src/mpi/errhan/add_error_code.c
index f560a05..b2a152f 100644
--- a/src/mpi/errhan/add_error_code.c
+++ b/src/mpi/errhan/add_error_code.c
@@ -53,12 +53,12 @@ int MPI_Add_error_code(int errorclass, int *errorcode)
     static const char FCNAME[] = "MPI_Add_error_code";
     int mpi_errno = MPI_SUCCESS;
     int new_code;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ADD_ERROR_CODE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ADD_ERROR_CODE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ADD_ERROR_CODE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ADD_ERROR_CODE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -82,7 +82,7 @@ int MPI_Add_error_code(int errorclass, int *errorcode)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ADD_ERROR_CODE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ADD_ERROR_CODE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/errhan/add_error_string.c b/src/mpi/errhan/add_error_string.c
index d226f59..b8ea233 100644
--- a/src/mpi/errhan/add_error_string.c
+++ b/src/mpi/errhan/add_error_string.c
@@ -62,12 +62,12 @@ int MPI_Add_error_string(int errorcode, const char *string)
 {
     static const char FCNAME[] = "MPI_Add_error_string";
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ADD_ERROR_STRING);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ADD_ERROR_STRING);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ADD_ERROR_STRING);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ADD_ERROR_STRING);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -88,7 +88,7 @@ int MPI_Add_error_string(int errorcode, const char *string)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ADD_ERROR_STRING);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ADD_ERROR_STRING);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/errhan/comm_call_errhandler.c b/src/mpi/errhan/comm_call_errhandler.c
index cb3245e..6bcd174 100644
--- a/src/mpi/errhan/comm_call_errhandler.c
+++ b/src/mpi/errhan/comm_call_errhandler.c
@@ -57,11 +57,11 @@ int MPI_Comm_call_errhandler(MPI_Comm comm, int errorcode)
     int mpi_errno = MPI_SUCCESS;
     int in_cs = FALSE;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_CALL_ERRHANDLER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_CALL_ERRHANDLER);
     
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_CALL_ERRHANDLER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_CALL_ERRHANDLER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -158,7 +158,7 @@ int MPI_Comm_call_errhandler(MPI_Comm comm, int errorcode)
     if (in_cs)
         MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
 
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_CALL_ERRHANDLER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_CALL_ERRHANDLER);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/errhan/comm_create_errhandler.c b/src/mpi/errhan/comm_create_errhandler.c
index 4dbb52e..0d530ee 100644
--- a/src/mpi/errhan/comm_create_errhandler.c
+++ b/src/mpi/errhan/comm_create_errhandler.c
@@ -36,12 +36,12 @@ int MPIR_Comm_create_errhandler_impl(MPI_Comm_errhandler_function *comm_errhandl
     int mpi_errno = MPI_SUCCESS;
     MPIR_Errhandler *errhan_ptr;
         
-    errhan_ptr = (MPIR_Errhandler *)MPIU_Handle_obj_alloc( &MPIR_Errhandler_mem );
+    errhan_ptr = (MPIR_Errhandler *)MPIR_Handle_obj_alloc( &MPIR_Errhandler_mem );
     MPIR_ERR_CHKANDJUMP(!errhan_ptr, mpi_errno, MPI_ERR_OTHER, "**nomem");
 
     errhan_ptr->language = MPIR_LANG__C;
     errhan_ptr->kind	 = MPIR_COMM;
-    MPIU_Object_set_ref(errhan_ptr,1);
+    MPIR_Object_set_ref(errhan_ptr,1);
     errhan_ptr->errfn.C_Comm_Handler_function = comm_errhandler_fn;
 
     MPIR_OBJ_PUBLISH_HANDLE(*errhandler, errhan_ptr->handle);
@@ -85,12 +85,12 @@ int MPI_Comm_create_errhandler(MPI_Comm_errhandler_function *comm_errhandler_fn,
                                MPI_Errhandler *errhandler)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_CREATE_ERRHANDLER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_CREATE_ERRHANDLER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_CREATE_ERRHANDLER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_CREATE_ERRHANDLER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -112,7 +112,7 @@ int MPI_Comm_create_errhandler(MPI_Comm_errhandler_function *comm_errhandler_fn,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_CREATE_ERRHANDLER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_CREATE_ERRHANDLER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/errhan/comm_get_errhandler.c b/src/mpi/errhan/comm_get_errhandler.c
index 3955c31..26a3ce6 100644
--- a/src/mpi/errhan/comm_get_errhandler.c
+++ b/src/mpi/errhan/comm_get_errhandler.c
@@ -70,12 +70,12 @@ int MPI_Comm_get_errhandler(MPI_Comm comm, MPI_Errhandler *errhandler)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errhandler *errhandler_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_GET_ERRHANDLER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_GET_ERRHANDLER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_GET_ERRHANDLER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_GET_ERRHANDLER);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -118,7 +118,7 @@ int MPI_Comm_get_errhandler(MPI_Comm comm, MPI_Errhandler *errhandler)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_GET_ERRHANDLER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_GET_ERRHANDLER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/errhan/comm_set_errhandler.c b/src/mpi/errhan/comm_set_errhandler.c
index b487309..481dc54 100644
--- a/src/mpi/errhan/comm_set_errhandler.c
+++ b/src/mpi/errhan/comm_set_errhandler.c
@@ -82,12 +82,12 @@ int MPI_Comm_set_errhandler(MPI_Comm comm, MPI_Errhandler errhandler)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errhandler *errhan_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_SET_ERRHANDLER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_SET_ERRHANDLER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_SET_ERRHANDLER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_SET_ERRHANDLER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -130,7 +130,7 @@ int MPI_Comm_set_errhandler(MPI_Comm comm, MPI_Errhandler errhandler)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_SET_ERRHANDLER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_SET_ERRHANDLER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/errhan/errhandler_create.c b/src/mpi/errhan/errhandler_create.c
index cfde34d..3a33582 100644
--- a/src/mpi/errhan/errhandler_create.c
+++ b/src/mpi/errhan/errhandler_create.c
@@ -65,12 +65,12 @@ int MPI_Errhandler_create(MPI_Handler_function *function,
 {
     static const char FCNAME[] = "MPI_Errhandler_create";
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ERRHANDLER_CREATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ERRHANDLER_CREATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ERRHANDLER_CREATE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ERRHANDLER_CREATE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -91,7 +91,7 @@ int MPI_Errhandler_create(MPI_Handler_function *function,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ERRHANDLER_CREATE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ERRHANDLER_CREATE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/errhan/errhandler_free.c b/src/mpi/errhan/errhandler_free.c
index 91b45da..d6a5f94 100644
--- a/src/mpi/errhan/errhandler_free.c
+++ b/src/mpi/errhan/errhandler_free.c
@@ -53,12 +53,12 @@ int MPI_Errhandler_free(MPI_Errhandler *errhandler)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Errhandler *errhan_ptr = NULL;
     int in_use;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ERRHANDLER_FREE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ERRHANDLER_FREE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ERRHANDLER_FREE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ERRHANDLER_FREE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -91,7 +91,7 @@ int MPI_Errhandler_free(MPI_Errhandler *errhandler)
     
     MPIR_Errhandler_release_ref( errhan_ptr,&in_use);
     if (!in_use) {
-	MPIU_Handle_obj_free( &MPIR_Errhandler_mem, errhan_ptr );
+	MPIR_Handle_obj_free( &MPIR_Errhandler_mem, errhan_ptr );
     }
     *errhandler = MPI_ERRHANDLER_NULL;
     
@@ -100,7 +100,7 @@ int MPI_Errhandler_free(MPI_Errhandler *errhandler)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ERRHANDLER_FREE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ERRHANDLER_FREE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/errhan/errhandler_get.c b/src/mpi/errhan/errhandler_get.c
index bacd7f2..fee11fa 100644
--- a/src/mpi/errhan/errhandler_get.c
+++ b/src/mpi/errhan/errhandler_get.c
@@ -66,12 +66,12 @@ int MPI_Errhandler_get(MPI_Comm comm, MPI_Errhandler *errhandler)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errhandler *errhandler_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ERRHANDLER_GET);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ERRHANDLER_GET);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ERRHANDLER_GET);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ERRHANDLER_GET);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -114,7 +114,7 @@ int MPI_Errhandler_get(MPI_Comm comm, MPI_Errhandler *errhandler)
 #   ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #   endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ERRHANDLER_GET);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ERRHANDLER_GET);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/errhan/errhandler_set.c b/src/mpi/errhan/errhandler_set.c
index 722a40d..eb777ff 100644
--- a/src/mpi/errhan/errhandler_set.c
+++ b/src/mpi/errhan/errhandler_set.c
@@ -57,12 +57,12 @@ int MPI_Errhandler_set(MPI_Comm comm, MPI_Errhandler errhandler)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Errhandler *errhan_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ERRHANDLER_SET);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ERRHANDLER_SET);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ERRHANDLER_SET);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ERRHANDLER_SET);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -107,7 +107,7 @@ int MPI_Errhandler_set(MPI_Comm comm, MPI_Errhandler errhandler)
 #   ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #   endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ERRHANDLER_SET);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ERRHANDLER_SET);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/errhan/errnames.txt b/src/mpi/errhan/errnames.txt
index de94a8b..af540d1 100644
--- a/src/mpi/errhan/errnames.txt
+++ b/src/mpi/errhan/errnames.txt
@@ -1499,8 +1499,8 @@ is too big (> MPIU_SHMW_GHND_SZ)
 **mpir_comm_get_attr %C %d %p %p:MPIR_Comm_get_attr(%C, comm_keyval=%d, attribute_val=%p, flag=%p) failed
 **mpir_type_get_attr:MPIR_Type_get_attr failed
 **mpir_type_get_attr %D %d %p %p:MPIR_Type_get_attr(%D, type_keyval=%d, attribute_val=%p, flag=%p) failed
-**mpir_wingetattr:MPIR_WinGetAttr failed
-**mpir_wingetattr %W %d %p %p:MPIR_WinGetAttr(%W, win_keyval=%d, attribute_val=%p, flag=%p) failed
+**mpir_wingetattr:MPII_Win_get_attr failed
+**mpir_wingetattr %W %d %p %p:MPII_Win_get_attr(%W, win_keyval=%d, attribute_val=%p, flag=%p) failed
 
 ## MPI-3 Mprobe routines
 **mpi_improbe: MPI_Improbe failed
diff --git a/src/mpi/errhan/error_class.c b/src/mpi/errhan/error_class.c
index 44b309b..06024c0 100644
--- a/src/mpi/errhan/error_class.c
+++ b/src/mpi/errhan/error_class.c
@@ -53,11 +53,11 @@ int MPI_Error_class(int errorcode, int *errorclass)
     static const char FCNAME[] = "MPI_Error_class";
 #endif
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ERROR_CLASS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ERROR_CLASS);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ERROR_CLASS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ERROR_CLASS);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -81,7 +81,7 @@ int MPI_Error_class(int errorcode, int *errorclass)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ERROR_CLASS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ERROR_CLASS);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/errhan/error_string.c b/src/mpi/errhan/error_string.c
index e967c66..5f6f9c5 100644
--- a/src/mpi/errhan/error_string.c
+++ b/src/mpi/errhan/error_string.c
@@ -59,11 +59,11 @@ int MPI_Error_string(int errorcode, char *string, int *resultlen)
     static const char FCNAME[] = "MPI_Error_string";
 #endif
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ERROR_STRING);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ERROR_STRING);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ERROR_STRING);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ERROR_STRING);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -87,7 +87,7 @@ int MPI_Error_string(int errorcode, char *string, int *resultlen)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ERROR_STRING);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ERROR_STRING);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/errhan/errutil.c b/src/mpi/errhan/errutil.c
index 582bbac..fa0f676 100644
--- a/src/mpi/errhan/errutil.c
+++ b/src/mpi/errhan/errutil.c
@@ -85,20 +85,20 @@ cvars:
  * The third group of code handles the error messages.  There are four
  * options, controlled by the value of MPICH_ERROR_MSG_LEVEL. 
  *
- * MPICH_ERROR_MSG_NONE - No text messages at all
- * MPICH_ERROR_MSG_CLASS - Only messages for the MPI error classes
- * MPICH_ERROR_MSG_GENERIC - Only predefiend messages for the MPI error codes
- * MPICH_ERROR_MSG_ALL - Instance specific error messages (and error message
+ * MPICH_ERROR_MSG__NONE - No text messages at all
+ * MPICH_ERROR_MSG__CLASS - Only messages for the MPI error classes
+ * MPICH_ERROR_MSG__GENERIC - Only predefiend messages for the MPI error codes
+ * MPICH_ERROR_MSG__ALL - Instance specific error messages (and error message
  *                       stack)
  *
- * In only the latter (MPICH_ERROR_MSG_ALL) case are instance-specific
+ * In only the latter (MPICH_ERROR_MSG__ALL) case are instance-specific
  * messages maintained (including the error message "stack" that you may
  * see mentioned in various places.  In the other cases, an error code 
- * identifies a fixed message string (unless MPICH_ERROR_MSG_NONE,
+ * identifies a fixed message string (unless MPICH_ERROR_MSG__NONE,
  * when there are no strings) from the "generic" strings defined in defmsg.h
  *
  * A major subgroup in this section is the code to handle the instance-specific
- * messages (MPICH_ERROR_MSG_ALL only).  
+ * messages (MPICH_ERROR_MSG__ALL only).
  *
  * An MPI error code is made up of a number of fields (see errcodes.h)
  * These ar 
@@ -118,7 +118,7 @@ static int did_err_init = FALSE; /* helps us solve a bootstrapping problem */
 
 static int checkValidErrcode( int, const char [], int * );
 
-#if MPICH_ERROR_MSG_LEVEL >= MPICH_ERROR_MSG_ALL
+#if MPICH_ERROR_MSG_LEVEL >= MPICH_ERROR_MSG__ALL
 static int ErrGetInstanceString( int, char [], int );
 static void MPIR_Err_stack_init( void );
 static int checkForUserErrcode( int );
@@ -146,14 +146,14 @@ static int checkForUserErrcode( int );
 MPIR_Errhandler MPIR_Errhandler_builtin[3] = { {0} };
 MPIR_Errhandler MPIR_Errhandler_direct[MPIR_ERRHANDLER_PREALLOC] =
     { {0} };
-MPIU_Object_alloc_t MPIR_Errhandler_mem = { 0, 0, 0, 0, MPIR_ERRHANDLER,
+MPIR_Object_alloc_t MPIR_Errhandler_mem = { 0, 0, 0, 0, MPIR_ERRHANDLER,
 					    sizeof(MPIR_Errhandler),
 					    MPIR_Errhandler_direct,
 					    MPIR_ERRHANDLER_PREALLOC, };
 
 void MPIR_Errhandler_free(MPIR_Errhandler *errhan_ptr)
 {
-    MPIU_Handle_obj_free(&MPIR_Errhandler_mem, errhan_ptr);
+    MPIR_Handle_obj_free(&MPIR_Errhandler_mem, errhan_ptr);
 }
 
 void MPIR_Err_init( void )
@@ -164,7 +164,7 @@ void MPIR_Err_init( void )
     MPIR_Errhandler_builtin[1].handle = MPI_ERRORS_RETURN;
     MPIR_Errhandler_builtin[2].handle = MPIR_ERRORS_THROW_EXCEPTIONS;
 
-#   if MPICH_ERROR_MSG_LEVEL >= MPICH_ERROR_MSG_ALL
+#   if MPICH_ERROR_MSG_LEVEL >= MPICH_ERROR_MSG__ALL
     MPIR_Err_stack_init();
 #   endif
     did_err_init = TRUE;
@@ -176,7 +176,7 @@ void MPIR_Err_init( void )
 /* This routine is used to install a callback used by the C++ binding
  to invoke the (C++) error handler.  The callback routine is a C routine,
  defined in the C++ binding. */
-void MPIR_Errhandler_set_cxx( MPI_Errhandler errhand, void (*errcall)(void) )
+void MPII_Errhandler_set_cxx( MPI_Errhandler errhand, void (*errcall)(void) )
 {
     MPIR_Errhandler *errhand_ptr;
     
@@ -188,7 +188,7 @@ void MPIR_Errhandler_set_cxx( MPI_Errhandler errhand, void (*errcall)(void) )
 #endif /* HAVE_CXX_BINDING */
 
 #if defined(HAVE_FORTRAN_BINDING) && !defined(HAVE_FINT_IS_INT)
-void MPIR_Errhandler_set_fc( MPI_Errhandler errhand )
+void MPII_Errhandler_set_fc( MPI_Errhandler errhand )
 {
     MPIR_Errhandler *errhand_ptr;
     
@@ -279,7 +279,7 @@ int MPIR_Err_return_comm( MPIR_Comm  *comm_ptr, const char fcname[],
     }
     /* --END ERROR HANDLING-- */
 
-    MPIU_Assert(comm_ptr != NULL);
+    MPIR_Assert(comm_ptr != NULL);
 
     /* comm_ptr may have changed to comm_world.  Keep this locked as long as we
      * are using the errhandler to prevent it from disappearing out from under
@@ -557,7 +557,7 @@ void MPIR_Err_get_string( int errorcode, char * msg, int length,
 
     /* The fn (fourth) argument was added improperly and is no longer 
        used. */
-    MPIU_Assert( fn == NULL );
+    MPIR_Assert( fn == NULL );
     
     /* There was code to set num_remaining to MPI_MAX_ERROR_STRING
        if it was zero.  But based on the usage of this routine, 
@@ -628,10 +628,10 @@ void MPIR_Err_get_string( int errorcode, char * msg, int length,
 
         /* FIXME: Replace with function to add instance string or
            error code string */
-#       if MPICH_ERROR_MSG_LEVEL >= MPICH_ERROR_MSG_ALL
+#       if MPICH_ERROR_MSG_LEVEL >= MPICH_ERROR_MSG__ALL
 	if (ErrGetInstanceString( errorcode, msg, num_remaining )) 
 	    goto fn_exit;
-#elif MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG_CLASS
+#elif MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG__CLASS
 	{
 	    int generic_idx;
 	    
@@ -650,7 +650,7 @@ fn_exit:
     return;
 }
 
-#if MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG_NONE
+#if MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG__NONE
 /* No error message support */
 int MPIR_Err_create_code( int lastcode, int fatal, const char fcname[], 
 			  int line, int error_class, const char generic_msg[],
@@ -678,7 +678,7 @@ static const char *get_class_msg( int error_class )
     return "Error message texts are not available";
 }
 
-#elif MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG_CLASS
+#elif MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG__CLASS
 /* Only class error messages.  Note this is nearly the same as
    MPICH_ERROR_MSG_LEVEL == NONE, since the handling of error codes
    is the same */
@@ -714,7 +714,7 @@ static const char *get_class_msg( int error_class )
     }
 }
 
-#elif MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG_GENERIC
+#elif MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG__GENERIC
 #define NEEDS_FIND_GENERIC_MSG_INDEX
 static int FindGenericMsgIndex( const char [] );
 
@@ -769,7 +769,7 @@ static const char *get_class_msg( int error_class )
     }
 }
 
-#elif MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG_ALL
+#elif MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG__ALL
 /* General error message support, including the error message stack */
 
 static int checkErrcodeIsValid( int );
@@ -1582,7 +1582,7 @@ static int vsnprintf_mpi(char *str, size_t maxlen, const char *fmt_orig,
 	}
 	if (len)
 	{
-	    MPIU_Memcpy(str, begin, len);
+	    MPIR_Memcpy(str, begin, len);
 	    str += len;
 	    maxlen -= len;
 	}
@@ -1769,7 +1769,7 @@ static int vsnprintf_mpi(char *str, size_t maxlen, const char *fmt_orig,
 	    break;
         case (int)'c':
             c = va_arg(list, MPI_Count);
-            MPIU_Assert(sizeof(long long) >= sizeof(MPI_Count));
+            MPIR_Assert(sizeof(long long) >= sizeof(MPI_Count));
             MPL_snprintf(str, maxlen, "%lld", (long long)c);
             break;
 	default:
diff --git a/src/mpi/errhan/file_call_errhandler.c b/src/mpi/errhan/file_call_errhandler.c
index 48aa6cd..8e3a71e 100644
--- a/src/mpi/errhan/file_call_errhandler.c
+++ b/src/mpi/errhan/file_call_errhandler.c
@@ -54,11 +54,11 @@ int MPI_File_call_errhandler(MPI_File fh, int errorcode)
     MPIR_Errhandler *e;
     MPI_Errhandler eh;
 #endif
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_FILE_CALL_ERRHANDLER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_FILE_CALL_ERRHANDLER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_FILE_CALL_ERRHANDLER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_FILE_CALL_ERRHANDLER);
 
 #ifdef MPI_MODE_RDONLY
     /* Validate parameters, especially handles needing to be converted */
@@ -136,7 +136,7 @@ int MPI_File_call_errhandler(MPI_File fh, int errorcode)
 #else 
  fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_FILE_CALL_ERRHANDLER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_FILE_CALL_ERRHANDLER);
     return mpi_errno;
 }
 
diff --git a/src/mpi/errhan/file_create_errhandler.c b/src/mpi/errhan/file_create_errhandler.c
index 18fac11..b382111 100644
--- a/src/mpi/errhan/file_create_errhandler.c
+++ b/src/mpi/errhan/file_create_errhandler.c
@@ -53,12 +53,12 @@ int MPI_File_create_errhandler(MPI_File_errhandler_function *file_errhandler_fn,
     static const char FCNAME[] = "MPI_File_create_errhandler";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Errhandler *errhan_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_FILE_CREATE_ERRHANDLER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_FILE_CREATE_ERRHANDLER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_FILE_CREATE_ERRHANDLER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_FILE_CREATE_ERRHANDLER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -74,18 +74,18 @@ int MPI_File_create_errhandler(MPI_File_errhandler_function *file_errhandler_fn,
     
     /* ... body of routine ...  */
     
-    errhan_ptr = (MPIR_Errhandler *)MPIU_Handle_obj_alloc( &MPIR_Errhandler_mem );
+    errhan_ptr = (MPIR_Errhandler *)MPIR_Handle_obj_alloc( &MPIR_Errhandler_mem );
     MPIR_ERR_CHKANDJUMP(!errhan_ptr,mpi_errno,MPI_ERR_OTHER,"**nomem");
     errhan_ptr->language = MPIR_LANG__C;
     errhan_ptr->kind	 = MPIR_FILE;
-    MPIU_Object_set_ref(errhan_ptr,1);
+    MPIR_Object_set_ref(errhan_ptr,1);
     errhan_ptr->errfn.C_File_Handler_function = file_errhandler_fn;
 
     MPIR_OBJ_PUBLISH_HANDLE(*errhandler, errhan_ptr->handle);
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_FILE_CREATE_ERRHANDLER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_FILE_CREATE_ERRHANDLER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/errhan/file_get_errhandler.c b/src/mpi/errhan/file_get_errhandler.c
index 7112917..3d6c223 100644
--- a/src/mpi/errhan/file_get_errhandler.c
+++ b/src/mpi/errhan/file_get_errhandler.c
@@ -57,12 +57,12 @@ int MPI_File_get_errhandler(MPI_File file, MPI_Errhandler *errhandler)
     MPI_Errhandler eh;
     MPIR_Errhandler *e;
 #endif
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_FILE_GET_ERRHANDLER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_FILE_GET_ERRHANDLER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_FILE_GET_ERRHANDLER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_FILE_GET_ERRHANDLER);
 
 #ifdef MPI_MODE_RDONLY
     /* Validate parameters, especially handles needing to be converted */
@@ -103,7 +103,7 @@ int MPI_File_get_errhandler(MPI_File file, MPI_Errhandler *errhandler)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_FILE_GET_ERRHANDLER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_FILE_GET_ERRHANDLER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/errhan/file_set_errhandler.c b/src/mpi/errhan/file_set_errhandler.c
index 06c0cf1..9ffeba9 100644
--- a/src/mpi/errhan/file_set_errhandler.c
+++ b/src/mpi/errhan/file_set_errhandler.c
@@ -56,11 +56,11 @@ int MPI_File_set_errhandler(MPI_File file, MPI_Errhandler errhandler)
     MPIR_Errhandler *errhan_ptr = NULL, *old_errhandler_ptr;
     MPI_Errhandler old_errhandler;
 #endif
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_FILE_SET_ERRHANDLER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_FILE_SET_ERRHANDLER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_FILE_SET_ERRHANDLER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_FILE_SET_ERRHANDLER);
 
 #ifdef MPI_MODE_RDONLY
 
@@ -134,7 +134,7 @@ int MPI_File_set_errhandler(MPI_File file, MPI_Errhandler errhandler)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_FILE_SET_ERRHANDLER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_FILE_SET_ERRHANDLER);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/errhan/win_call_errhandler.c b/src/mpi/errhan/win_call_errhandler.c
index bd76666..973f4ae 100644
--- a/src/mpi/errhan/win_call_errhandler.c
+++ b/src/mpi/errhan/win_call_errhandler.c
@@ -57,11 +57,11 @@ int MPI_Win_call_errhandler(MPI_Win win, int errorcode)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
     int in_cs = FALSE;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_CALL_ERRHANDLER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_CALL_ERRHANDLER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_CALL_ERRHANDLER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_CALL_ERRHANDLER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -150,7 +150,7 @@ int MPI_Win_call_errhandler(MPI_Win win, int errorcode)
     if (in_cs)
         MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_WIN_MUTEX(win_ptr));
 
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_CALL_ERRHANDLER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_CALL_ERRHANDLER);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/errhan/win_create_errhandler.c b/src/mpi/errhan/win_create_errhandler.c
index 3cbbea8..5ffcdc8 100644
--- a/src/mpi/errhan/win_create_errhandler.c
+++ b/src/mpi/errhan/win_create_errhandler.c
@@ -55,12 +55,12 @@ int MPI_Win_create_errhandler(MPI_Win_errhandler_function *win_errhandler_fn,
     static const char FCNAME[] = "MPI_Win_create_errhandler";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Errhandler *errhan_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_CREATE_ERRHANDLER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_CREATE_ERRHANDLER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_CREATE_ERRHANDLER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_CREATE_ERRHANDLER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -76,19 +76,19 @@ int MPI_Win_create_errhandler(MPI_Win_errhandler_function *win_errhandler_fn,
     
     /* ... body of routine ...  */
     
-    errhan_ptr = (MPIR_Errhandler *)MPIU_Handle_obj_alloc( &MPIR_Errhandler_mem );
+    errhan_ptr = (MPIR_Errhandler *)MPIR_Handle_obj_alloc( &MPIR_Errhandler_mem );
     MPIR_ERR_CHKANDJUMP1(!errhan_ptr,mpi_errno,MPI_ERR_OTHER,"**nomem",
 			 "**nomem %s", "MPI_Errhandler");
     errhan_ptr->language = MPIR_LANG__C;
     errhan_ptr->kind	 = MPIR_WIN;
-    MPIU_Object_set_ref(errhan_ptr,1);
+    MPIR_Object_set_ref(errhan_ptr,1);
     errhan_ptr->errfn.C_Win_Handler_function = win_errhandler_fn;
 
     MPIR_OBJ_PUBLISH_HANDLE(*errhandler, errhan_ptr->handle);
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_CREATE_ERRHANDLER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_CREATE_ERRHANDLER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/errhan/win_get_errhandler.c b/src/mpi/errhan/win_get_errhandler.c
index ba96dcd..bae75e1 100644
--- a/src/mpi/errhan/win_get_errhandler.c
+++ b/src/mpi/errhan/win_get_errhandler.c
@@ -55,12 +55,12 @@ int MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler)
 #endif
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_GET_ERRHANDLER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_GET_ERRHANDLER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_GET_ERRHANDLER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_GET_ERRHANDLER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -111,7 +111,7 @@ int MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_GET_ERRHANDLER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_GET_ERRHANDLER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/errhan/win_set_errhandler.c b/src/mpi/errhan/win_set_errhandler.c
index 5c5a4c4..9ec2968 100644
--- a/src/mpi/errhan/win_set_errhandler.c
+++ b/src/mpi/errhan/win_set_errhandler.c
@@ -54,11 +54,11 @@ int MPI_Win_set_errhandler(MPI_Win win, MPI_Errhandler errhandler)
     MPIR_Win *win_ptr = NULL;
     int  in_use;
     MPIR_Errhandler *errhan_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_SET_ERRHANDLER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_SET_ERRHANDLER);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_SET_ERRHANDLER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_SET_ERRHANDLER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -123,7 +123,7 @@ int MPI_Win_set_errhandler(MPI_Win win, MPI_Errhandler errhandler)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_SET_ERRHANDLER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_SET_ERRHANDLER);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/errhan/windefmsg.h b/src/mpi/errhan/windefmsg.h
index 57b2d0f..f347d16 100644
--- a/src/mpi/errhan/windefmsg.h
+++ b/src/mpi/errhan/windefmsg.h
@@ -10,7 +10,7 @@ typedef struct msgpair {
         const unsigned int sentinal1;
         const char *short_name, *long_name; 
         const unsigned int sentinal2; } msgpair;
-#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG_NONE
+#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG__NONE
 /* The names are in sorted order, allowing the use of a simple
   linear search or bisection algorithm to find the message corresponding to
   a particular message */
@@ -1997,7 +1997,7 @@ static const msgpair generic_err_msgs[] = {
 };
 #endif
 
-#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG_GENERIC
+#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG__GENERIC
 static const char short_spc0[] = "**CreateFileMapping %d";
 static const char long_spc0[]  = "CreateFileMapping failed, error %d";
 static const char short_spc1[] = "**CreateThread %d";
@@ -3285,7 +3285,7 @@ static const msgpair specific_err_msgs[] = {
 };
 #endif
 
-#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG_NONE
+#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG__NONE
 #define MPIR_MAX_ERROR_CLASS_INDEX 54
 static int class_to_index[] = {
 641,42,113,126,642,104,548,564,164,504,
diff --git a/src/mpi/group/group.h b/src/mpi/group/group.h
index f0ede34..31d7f6c 100644
--- a/src/mpi/group/group.h
+++ b/src/mpi/group/group.h
@@ -6,7 +6,7 @@
 
 /* MPIR_Group_create is needed by some of the routines that return groups
    from communicators, so it is in mpidimpl.h */
-void MPIR_Group_setup_lpid_list( MPIR_Group * );
+void MPII_Group_setup_lpid_list( MPIR_Group * );
 int MPIR_Group_check_valid_ranks( MPIR_Group *, const int [], int );
 int MPIR_Group_check_valid_ranges( MPIR_Group *, int [][3], int );
 void MPIR_Group_setup_lpid_pairs( MPIR_Group *, MPIR_Group * );
diff --git a/src/mpi/group/group_compare.c b/src/mpi/group/group_compare.c
index 3a7a718..dd92714 100644
--- a/src/mpi/group/group_compare.c
+++ b/src/mpi/group/group_compare.c
@@ -43,11 +43,11 @@ int MPIR_Group_compare_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2, int
     g2_idx = group_ptr2->idx_of_first_lpid;
     /* If the lpid list hasn't been created, do it now */
     if (g1_idx < 0) {
-	MPIR_Group_setup_lpid_list( group_ptr1 );
+	MPII_Group_setup_lpid_list( group_ptr1 );
 	g1_idx = group_ptr1->idx_of_first_lpid;
     }
     if (g2_idx < 0) {
-	MPIR_Group_setup_lpid_list( group_ptr2 );
+	MPII_Group_setup_lpid_list( group_ptr2 );
 	g2_idx = group_ptr2->idx_of_first_lpid;
     }
     while (g1_idx >= 0 && g2_idx >= 0) {
@@ -112,7 +112,7 @@ int MPI_Group_compare(MPI_Group group1, MPI_Group group2, int *result)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Group *group_ptr1 = NULL;
     MPIR_Group *group_ptr2 = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GROUP_COMPARE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GROUP_COMPARE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
@@ -120,7 +120,7 @@ int MPI_Group_compare(MPI_Group group1, MPI_Group group2, int *result)
        within a mutex.  As most of the group routines are not performance
        critical, we simple run these routines within the SINGLE_CS */
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GROUP_COMPARE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GROUP_COMPARE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -162,7 +162,7 @@ int MPI_Group_compare(MPI_Group group1, MPI_Group group2, int *result)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GROUP_COMPARE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GROUP_COMPARE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/group/group_difference.c b/src/mpi/group/group_difference.c
index 446e93e..bc85663 100644
--- a/src/mpi/group/group_difference.c
+++ b/src/mpi/group/group_difference.c
@@ -34,9 +34,9 @@ int MPIR_Group_difference_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2, M
 {
     int mpi_errno = MPI_SUCCESS;
     int size1, i, k, g1_idx, g2_idx, l1_pid, l2_pid, nnew;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GROUP_DIFFERENCE_IMPL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GROUP_DIFFERENCE_IMPL);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GROUP_DIFFERENCE_IMPL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GROUP_DIFFERENCE_IMPL);
     /* Return a group consisting of the members of group1 that are *not*
        in group2 */
     size1 = group_ptr1->size;
@@ -97,7 +97,7 @@ int MPIR_Group_difference_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2, M
 
 
  fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GROUP_DIFFERENCE_IMPL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GROUP_DIFFERENCE_IMPL);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -142,12 +142,12 @@ int MPI_Group_difference(MPI_Group group1, MPI_Group group2, MPI_Group *newgroup
     MPIR_Group *group_ptr1 = NULL;
     MPIR_Group *group_ptr2 = NULL;
     MPIR_Group *new_group_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GROUP_DIFFERENCE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GROUP_DIFFERENCE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GROUP_DIFFERENCE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GROUP_DIFFERENCE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -190,7 +190,7 @@ int MPI_Group_difference(MPI_Group group1, MPI_Group group2, MPI_Group *newgroup
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GROUP_DIFFERENCE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GROUP_DIFFERENCE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/group/group_excl.c b/src/mpi/group/group_excl.c
index 321d773..fb3e0ce 100644
--- a/src/mpi/group/group_excl.c
+++ b/src/mpi/group/group_excl.c
@@ -34,9 +34,9 @@ int MPIR_Group_excl_impl(MPIR_Group *group_ptr, int n, const int ranks[], MPIR_G
 {
     int mpi_errno = MPI_SUCCESS;
     int size, i, newi;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GROUP_EXCL_IMPL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GROUP_EXCL_IMPL);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GROUP_EXCL_IMPL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GROUP_EXCL_IMPL);
 
     size = group_ptr->size;
 
@@ -69,7 +69,7 @@ int MPIR_Group_excl_impl(MPIR_Group *group_ptr, int n, const int ranks[], MPIR_G
     /* TODO calculate is_local_dense_monotonic */
 
  fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GROUP_EXCL_IMPL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GROUP_EXCL_IMPL);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -119,12 +119,12 @@ int MPI_Group_excl(MPI_Group group, int n, const int ranks[], MPI_Group *newgrou
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Group *group_ptr = NULL, *new_group_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GROUP_EXCL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GROUP_EXCL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GROUP_EXCL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GROUP_EXCL);
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -171,7 +171,7 @@ int MPI_Group_excl(MPI_Group group, int n, const int ranks[], MPI_Group *newgrou
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GROUP_EXCL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GROUP_EXCL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/group/group_free.c b/src/mpi/group/group_free.c
index 102f6ef..e3b9491 100644
--- a/src/mpi/group/group_free.c
+++ b/src/mpi/group/group_free.c
@@ -74,12 +74,12 @@ int MPI_Group_free(MPI_Group *group)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Group *group_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GROUP_FREE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GROUP_FREE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GROUP_FREE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GROUP_FREE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -127,7 +127,7 @@ int MPI_Group_free(MPI_Group *group)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GROUP_FREE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GROUP_FREE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/group/group_incl.c b/src/mpi/group/group_incl.c
index 5194dae..a85c02a 100644
--- a/src/mpi/group/group_incl.c
+++ b/src/mpi/group/group_incl.c
@@ -34,9 +34,9 @@ int MPIR_Group_incl_impl(MPIR_Group *group_ptr, int n, const int ranks[], MPIR_G
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GROUP_INCL_IMPL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GROUP_INCL_IMPL);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GROUP_INCL_IMPL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GROUP_INCL_IMPL);
 
     if (n == 0) {
         *new_group_ptr = MPIR_Group_empty;
@@ -59,7 +59,7 @@ int MPIR_Group_incl_impl(MPIR_Group *group_ptr, int n, const int ranks[], MPIR_G
 
 
  fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GROUP_INCL_IMPL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GROUP_INCL_IMPL);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -105,12 +105,12 @@ int MPI_Group_incl(MPI_Group group, int n, const int ranks[], MPI_Group *newgrou
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Group *group_ptr = NULL, *new_group_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GROUP_INCL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GROUP_INCL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GROUP_INCL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GROUP_INCL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -160,7 +160,7 @@ int MPI_Group_incl(MPI_Group group, int n, const int ranks[], MPI_Group *newgrou
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GROUP_INCL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GROUP_INCL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/group/group_intersection.c b/src/mpi/group/group_intersection.c
index 4a938a7..5da9730 100644
--- a/src/mpi/group/group_intersection.c
+++ b/src/mpi/group/group_intersection.c
@@ -34,9 +34,9 @@ int MPIR_Group_intersection_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2,
 {
     int mpi_errno = MPI_SUCCESS;
     int size1, i, k, g1_idx, g2_idx, l1_pid, l2_pid, nnew;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GROUP_INTERSECTION_IMPL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GROUP_INTERSECTION_IMPL);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GROUP_INTERSECTION_IMPL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GROUP_INTERSECTION_IMPL);
     /* Return a group consisting of the members of group1 that are 
        in group2 */
     size1 = group_ptr1->size;
@@ -95,7 +95,7 @@ int MPIR_Group_intersection_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2,
     }
 
  fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GROUP_INTERSECTION_IMPL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GROUP_INTERSECTION_IMPL);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -142,12 +142,12 @@ int MPI_Group_intersection(MPI_Group group1, MPI_Group group2, MPI_Group *newgro
     MPIR_Group *group_ptr1 = NULL;
     MPIR_Group *group_ptr2 = NULL;
     MPIR_Group *new_group_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GROUP_INTERSECTION);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GROUP_INTERSECTION);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GROUP_INTERSECTION);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GROUP_INTERSECTION);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -190,7 +190,7 @@ int MPI_Group_intersection(MPI_Group group1, MPI_Group group2, MPI_Group *newgro
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GROUP_INTERSECTION);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GROUP_INTERSECTION);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/group/group_range_excl.c b/src/mpi/group/group_range_excl.c
index 8337836..176e52c 100644
--- a/src/mpi/group/group_range_excl.c
+++ b/src/mpi/group/group_range_excl.c
@@ -33,9 +33,9 @@ int MPIR_Group_range_excl_impl(MPIR_Group *group_ptr, int n, int ranges[][3], MP
 {
     int mpi_errno = MPI_SUCCESS;
     int size, i, j, k, nnew, first, last, stride;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GROUP_RANGE_EXCL_IMPL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GROUP_RANGE_EXCL_IMPL);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GROUP_RANGE_EXCL_IMPL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GROUP_RANGE_EXCL_IMPL);
     /* Compute size, assuming that included ranks are valid (and distinct) */
     size = group_ptr->size;
     nnew = 0;
@@ -103,7 +103,7 @@ int MPIR_Group_range_excl_impl(MPIR_Group *group_ptr, int n, int ranges[][3], MP
 
 
  fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GROUP_RANGE_EXCL_IMPL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GROUP_RANGE_EXCL_IMPL);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -155,12 +155,12 @@ int MPI_Group_range_excl(MPI_Group group, int n, int ranges[][3],
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Group *group_ptr = NULL, *new_group_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GROUP_RANGE_EXCL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GROUP_RANGE_EXCL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GROUP_RANGE_EXCL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GROUP_RANGE_EXCL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -207,7 +207,7 @@ int MPI_Group_range_excl(MPI_Group group, int n, int ranges[][3],
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GROUP_RANGE_EXCL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GROUP_RANGE_EXCL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/group/group_range_incl.c b/src/mpi/group/group_range_incl.c
index 99b2d9d..b00cc85 100644
--- a/src/mpi/group/group_range_incl.c
+++ b/src/mpi/group/group_range_incl.c
@@ -34,9 +34,9 @@ int MPIR_Group_range_incl_impl(MPIR_Group *group_ptr, int n, int ranges[][3], MP
 {
     int mpi_errno = MPI_SUCCESS;
     int first, last, stride, nnew, i, j, k;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GROUP_RANGE_INCL_IMPL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GROUP_RANGE_INCL_IMPL);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GROUP_RANGE_INCL_IMPL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GROUP_RANGE_INCL_IMPL);
 
     /* Compute size, assuming that included ranks are valid (and distinct) */
     nnew = 0;
@@ -86,7 +86,7 @@ int MPIR_Group_range_incl_impl(MPIR_Group *group_ptr, int n, int ranges[][3], MP
     /* TODO calculate is_local_dense_monotonic */
 
  fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GROUP_RANGE_INCL_IMPL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GROUP_RANGE_INCL_IMPL);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -134,12 +134,12 @@ int MPI_Group_range_incl(MPI_Group group, int n, int ranges[][3],
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Group *group_ptr = NULL, *new_group_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GROUP_RANGE_INCL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GROUP_RANGE_INCL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GROUP_RANGE_INCL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GROUP_RANGE_INCL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -183,7 +183,7 @@ int MPI_Group_range_incl(MPI_Group group, int n, int ranges[][3],
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GROUP_RANGE_INCL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GROUP_RANGE_INCL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/group/group_rank.c b/src/mpi/group/group_rank.c
index 2f4371f..b420c23 100644
--- a/src/mpi/group/group_rank.c
+++ b/src/mpi/group/group_rank.c
@@ -56,11 +56,11 @@ int MPI_Group_rank(MPI_Group group, int *rank)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Group *group_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GROUP_RANK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GROUP_RANK);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GROUP_RANK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GROUP_RANK);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -97,7 +97,7 @@ int MPI_Group_rank(MPI_Group group, int *rank)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GROUP_RANK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GROUP_RANK);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/group/group_size.c b/src/mpi/group/group_size.c
index ea55b9c..67290fa 100644
--- a/src/mpi/group/group_size.c
+++ b/src/mpi/group/group_size.c
@@ -55,11 +55,11 @@ int MPI_Group_size(MPI_Group group, int *size)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Group *group_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GROUP_SIZE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GROUP_SIZE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GROUP_SIZE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GROUP_SIZE);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -96,7 +96,7 @@ int MPI_Group_size(MPI_Group group, int *size)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GROUP_SIZE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GROUP_SIZE);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/group/group_translate_ranks.c b/src/mpi/group/group_translate_ranks.c
index 8ff8ea6..dad9886 100644
--- a/src/mpi/group/group_translate_ranks.c
+++ b/src/mpi/group/group_translate_ranks.c
@@ -47,7 +47,7 @@ int MPIR_Group_translate_ranks_impl(MPIR_Group *gp1, int n, const int ranks1[],
         /* g2 probably == group_of(MPI_COMM_WORLD); use fast, constant-time lookup */
         int lpid_offset = gp2->lrank_to_lpid[0].lpid;
 
-        MPIU_Assert(lpid_offset >= 0);
+        MPIR_Assert(lpid_offset >= 0);
         for (i = 0; i < n; ++i) {
             int g1_lpid;
 
@@ -67,7 +67,7 @@ int MPIR_Group_translate_ranks_impl(MPIR_Group *gp1, int n, const int ranks1[],
         /* general, slow path; lookup time is dependent on the user-provided rank values! */
         g2_idx = gp2->idx_of_first_lpid;
         if (g2_idx < 0) {
-            MPIR_Group_setup_lpid_list( gp2 );
+            MPII_Group_setup_lpid_list( gp2 );
             g2_idx = gp2->idx_of_first_lpid;
         }
         if (g2_idx >= 0) {
@@ -143,7 +143,7 @@ int MPI_Group_translate_ranks(MPI_Group group1, int n, const int ranks1[],
     int mpi_errno = MPI_SUCCESS;
     MPIR_Group *group_ptr1 = NULL;
     MPIR_Group *group_ptr2 = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GROUP_TRANSLATE_RANKS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GROUP_TRANSLATE_RANKS);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
@@ -151,7 +151,7 @@ int MPI_Group_translate_ranks(MPI_Group group1, int n, const int ranks1[],
        within a mutex.  As most of the group routines are not performance
        critical, we simple run these routines within the SINGLE_CS */
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GROUP_TRANSLATE_RANKS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GROUP_TRANSLATE_RANKS);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -209,7 +209,7 @@ int MPI_Group_translate_ranks(MPI_Group group1, int n, const int ranks1[],
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GROUP_TRANSLATE_RANKS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GROUP_TRANSLATE_RANKS);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/group/group_union.c b/src/mpi/group/group_union.c
index 49f7da0..aba34e3 100644
--- a/src/mpi/group/group_union.c
+++ b/src/mpi/group/group_union.c
@@ -34,9 +34,9 @@ int MPIR_Group_union_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2, MPIR_G
 {
     int mpi_errno = MPI_SUCCESS;
     int g1_idx, g2_idx, nnew, i, k, size1, size2, mylpid;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GROUP_UNION_IMPL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GROUP_UNION_IMPL);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GROUP_UNION_IMPL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GROUP_UNION_IMPL);
 
     /* Determine the size of the new group.  The new group consists of all
        members of group1 plus the members of group2 that are not in group1.
@@ -46,11 +46,11 @@ int MPIR_Group_union_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2, MPIR_G
 
     /* If the lpid list hasn't been created, do it now */
     if (g1_idx < 0) { 
-        MPIR_Group_setup_lpid_list( group_ptr1 ); 
+        MPII_Group_setup_lpid_list( group_ptr1 );
         g1_idx = group_ptr1->idx_of_first_lpid;
     }
     if (g2_idx < 0) { 
-        MPIR_Group_setup_lpid_list( group_ptr2 ); 
+        MPII_Group_setup_lpid_list( group_ptr2 );
         g2_idx = group_ptr2->idx_of_first_lpid;
     }
     nnew = group_ptr1->size;
@@ -133,7 +133,7 @@ int MPIR_Group_union_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2, MPIR_G
     /* TODO calculate is_local_dense_monotonic */
 
  fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GROUP_UNION_IMPL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GROUP_UNION_IMPL);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -175,12 +175,12 @@ int MPI_Group_union(MPI_Group group1, MPI_Group group2, MPI_Group *newgroup)
     MPIR_Group *group_ptr1 = NULL;
     MPIR_Group *group_ptr2 = NULL;
     MPIR_Group *new_group_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GROUP_UNION);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GROUP_UNION);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GROUP_UNION);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GROUP_UNION);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -223,7 +223,7 @@ int MPI_Group_union(MPI_Group group1, MPI_Group group2, MPI_Group *newgroup)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GROUP_UNION);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GROUP_UNION);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/group/groupdebug.c b/src/mpi/group/groupdebug.c
index 61ee628..c21a846 100644
--- a/src/mpi/group/groupdebug.c
+++ b/src/mpi/group/groupdebug.c
@@ -23,13 +23,13 @@ void MPITEST_Group_create( int nproc, int myrank, MPI_Group *new_group )
     MPIR_Group *new_group_ptr;
     int i;
 
-    new_group_ptr = (MPIR_Group *)MPIU_Handle_obj_alloc( &MPIR_Group_mem );
+    new_group_ptr = (MPIR_Group *)MPIR_Handle_obj_alloc( &MPIR_Group_mem );
     if (!new_group_ptr) {
 	fprintf( stderr, "Could not create a new group\n" );
 	PMPI_Abort( MPI_COMM_WORLD, 1 );
     }
-    MPIU_Object_set_ref( new_group_ptr, 1 );
-    new_group_ptr->lrank_to_lpid = (MPIR_Group_pmap_t *)MPL_malloc( nproc * sizeof(MPIR_Group_pmap_t) );
+    MPIR_Object_set_ref( new_group_ptr, 1 );
+    new_group_ptr->lrank_to_lpid = (MPII_Group_pmap_t *)MPL_malloc( nproc * sizeof(MPII_Group_pmap_t) );
     if (!new_group_ptr->lrank_to_lpid) {
 	fprintf( stderr, "Could not create lrank map for new group\n" );
 	PMPI_Abort( MPI_COMM_WORLD, 1 );
@@ -56,7 +56,7 @@ void MPITEST_Group_print( MPI_Group g )
 
     g_idx = g_ptr->idx_of_first_lpid;
     if (g_idx < 0) { 
-	MPIR_Group_setup_lpid_list( g_ptr ); 
+	MPII_Group_setup_lpid_list( g_ptr );
 	g_idx = g_ptr->idx_of_first_lpid;
     }
     
diff --git a/src/mpi/group/grouputil.c b/src/mpi/group/grouputil.c
index 735c889..5e17886 100644
--- a/src/mpi/group/grouputil.c
+++ b/src/mpi/group/grouputil.c
@@ -14,7 +14,7 @@
 /* Preallocated group objects */
 MPIR_Group MPIR_Group_builtin[MPIR_GROUP_N_BUILTIN] = { {0} };
 MPIR_Group MPIR_Group_direct[MPID_GROUP_PREALLOC] = { {0} };
-MPIU_Object_alloc_t MPIR_Group_mem = { 0, 0, 0, 0, MPIR_GROUP,
+MPIR_Object_alloc_t MPIR_Group_mem = { 0, 0, 0, 0, MPIR_GROUP,
 				      sizeof(MPIR_Group), MPIR_Group_direct,
 				       MPID_GROUP_PREALLOC};
 
@@ -24,10 +24,10 @@ int MPIR_Group_init(void)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIR_GROUP_N_BUILTIN == 1); /* update this func if this ever triggers */
+    MPIR_Assert(MPIR_GROUP_N_BUILTIN == 1); /* update this func if this ever triggers */
 
     MPIR_Group_builtin[0].handle = MPI_GROUP_EMPTY;
-    MPIU_Object_set_ref(&MPIR_Group_builtin[0], 1);
+    MPIR_Object_set_ref(&MPIR_Group_builtin[0], 1);
     MPIR_Group_builtin[0].size = 0;
     MPIR_Group_builtin[0].rank = MPI_UNDEFINED;
     MPIR_Group_builtin[0].idx_of_first_lpid = -1;
@@ -47,7 +47,7 @@ int MPIR_Group_release(MPIR_Group *group_ptr)
     if (!inuse) {
         /* Only if refcount is 0 do we actually free. */
         MPL_free(group_ptr->lrank_to_lpid);
-        MPIU_Handle_obj_free( &MPIR_Group_mem, group_ptr );
+        MPIR_Handle_obj_free( &MPIR_Group_mem, group_ptr );
     }
     return mpi_errno;
 }
@@ -62,21 +62,21 @@ int MPIR_Group_create( int nproc, MPIR_Group **new_group_ptr )
 {
     int mpi_errno = MPI_SUCCESS;
 
-    *new_group_ptr = (MPIR_Group *)MPIU_Handle_obj_alloc( &MPIR_Group_mem );
+    *new_group_ptr = (MPIR_Group *)MPIR_Handle_obj_alloc( &MPIR_Group_mem );
     /* --BEGIN ERROR HANDLING-- */
     if (!*new_group_ptr) {
 	mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, "MPIR_Group_create", __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
 	return mpi_errno;
     }
     /* --END ERROR HANDLING-- */
-    MPIU_Object_set_ref( *new_group_ptr, 1 );
+    MPIR_Object_set_ref( *new_group_ptr, 1 );
     (*new_group_ptr)->lrank_to_lpid = 
-	(MPIR_Group_pmap_t *)MPL_malloc( nproc * sizeof(MPIR_Group_pmap_t) );
+	(MPII_Group_pmap_t *)MPL_malloc( nproc * sizeof(MPII_Group_pmap_t) );
     /* --BEGIN ERROR HANDLING-- */
     if (!(*new_group_ptr)->lrank_to_lpid) {
-	MPIU_Handle_obj_free( &MPIR_Group_mem, *new_group_ptr );
+	MPIR_Handle_obj_free( &MPIR_Group_mem, *new_group_ptr );
 	*new_group_ptr = NULL;
-	MPIU_CHKMEM_SETERR(mpi_errno,nproc*sizeof(MPIR_Group_pmap_t),
+	MPIR_CHKMEM_SETERR(mpi_errno,nproc*sizeof(MPII_Group_pmap_t),
 			   "newgroup->lrank_to_lpid");
 	return mpi_errno;
     }
@@ -99,7 +99,7 @@ int MPIR_Group_create( int nproc, MPIR_Group **new_group_ptr )
  * in group rank order).  Instead it builds the traversal order (in increasing
  * lpid order) through the maparray given by the "next_lpid" fields.
  */
-static int mergesort_lpidarray( MPIR_Group_pmap_t maparray[], int n )
+static int mergesort_lpidarray( MPII_Group_pmap_t maparray[], int n )
 {
     int idx1, idx2, first_idx, cur_idx, next_lpid, idx2_offset;
 
@@ -199,7 +199,7 @@ static int mergesort_lpidarray( MPIR_Group_pmap_t maparray[], int n )
  * be held on entry to this routine.  This forces some of the routines
  * noted above to hold the SINGLE_CS; which would otherwise not be required.
  */
-void MPIR_Group_setup_lpid_list( MPIR_Group *group_ptr )
+void MPII_Group_setup_lpid_list( MPIR_Group *group_ptr )
 {
     if (group_ptr->idx_of_first_lpid == -1) {
 	group_ptr->idx_of_first_lpid = 
@@ -213,10 +213,10 @@ void MPIR_Group_setup_lpid_pairs( MPIR_Group *group_ptr1,
 {
     /* If the lpid list hasn't been created, do it now */
     if (group_ptr1->idx_of_first_lpid < 0) { 
-	MPIR_Group_setup_lpid_list( group_ptr1 ); 
+	MPII_Group_setup_lpid_list( group_ptr1 );
     }
     if (group_ptr2->idx_of_first_lpid < 0) { 
-	MPIR_Group_setup_lpid_list( group_ptr2 ); 
+	MPII_Group_setup_lpid_list( group_ptr2 );
     }
 }
 
@@ -377,15 +377,15 @@ int MPIR_Group_check_subset( MPIR_Group *group_ptr, MPIR_Comm *comm_ptr )
 {
     int mpi_errno = MPI_SUCCESS;
     int g1_idx, g2_idx, l1_pid, l2_pid, i;
-    MPIR_Group_pmap_t *vmap=0;
+    MPII_Group_pmap_t *vmap=0;
     int vsize = comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM ? comm_ptr->local_size :
         comm_ptr->remote_size;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
-    MPIU_Assert(group_ptr != NULL);
+    MPIR_Assert(group_ptr != NULL);
 
-    MPIU_CHKLMEM_MALLOC(vmap,MPIR_Group_pmap_t*,
-			vsize*sizeof(MPIR_Group_pmap_t),mpi_errno, "" );
+    MPIR_CHKLMEM_MALLOC(vmap,MPII_Group_pmap_t*,
+			vsize*sizeof(MPII_Group_pmap_t),mpi_errno, "" );
     /* Initialize the vmap */
     for (i=0; i<vsize; i++) {
 	MPID_Comm_get_lpid(comm_ptr, i, &vmap[i].lpid, FALSE);
@@ -393,7 +393,7 @@ int MPIR_Group_check_subset( MPIR_Group *group_ptr, MPIR_Comm *comm_ptr )
 	vmap[i].flag      = 0;
     }
     
-    MPIR_Group_setup_lpid_list( group_ptr );
+    MPII_Group_setup_lpid_list( group_ptr );
     g1_idx = group_ptr->idx_of_first_lpid;
     g2_idx = mergesort_lpidarray( vmap, vsize );
     MPL_DBG_MSG_FMT(MPIR_DBG_COMM,VERBOSE,(MPL_DBG_FDEST,
@@ -426,7 +426,7 @@ int MPIR_Group_check_subset( MPIR_Group *group_ptr, MPIR_Comm *comm_ptr )
     }
 
  fn_fail:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
 }
 
diff --git a/src/mpi/info/info_create.c b/src/mpi/info/info_create.c
index ca437c0..665c851 100644
--- a/src/mpi/info/info_create.c
+++ b/src/mpi/info/info_create.c
@@ -48,12 +48,12 @@ int MPI_Info_create( MPI_Info *info )
     MPIR_Info *info_ptr;
     static const char FCNAME[] = "MPI_Info_create";
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INFO_CREATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INFO_CREATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INFO_CREATE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INFO_CREATE);
 
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -68,7 +68,7 @@ int MPI_Info_create( MPI_Info *info )
 
     /* ... body of routine ...  */
 
-    mpi_errno = MPIU_Info_alloc(&info_ptr);
+    mpi_errno = MPIR_Info_alloc(&info_ptr);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
     *info	     = info_ptr->handle;
@@ -79,7 +79,7 @@ int MPI_Info_create( MPI_Info *info )
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INFO_CREATE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INFO_CREATE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/info/info_delete.c b/src/mpi/info/info_delete.c
index 7645dd1..2fb494b 100644
--- a/src/mpi/info/info_delete.c
+++ b/src/mpi/info/info_delete.c
@@ -48,12 +48,12 @@ int MPI_Info_delete( MPI_Info info, const char *key )
     static const char FCNAME[] = "MPI_Info_delete";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Info *info_ptr=0, *prev_ptr, *curr_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INFO_DELETE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INFO_DELETE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INFO_DELETE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INFO_DELETE);
     
 
     /* Validate parameters, especially handles needing to be converted */
@@ -101,7 +101,7 @@ int MPI_Info_delete( MPI_Info info, const char *key )
 	    MPL_free(curr_ptr->key);
 	    MPL_free(curr_ptr->value);
 	    prev_ptr->next = curr_ptr->next;
-	    MPIU_Handle_obj_free( &MPIR_Info_mem, curr_ptr );
+	    MPIR_Handle_obj_free( &MPIR_Info_mem, curr_ptr );
 	    break;
 	}
 	prev_ptr = curr_ptr;
@@ -114,7 +114,7 @@ int MPI_Info_delete( MPI_Info info, const char *key )
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INFO_DELETE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INFO_DELETE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/info/info_dup.c b/src/mpi/info/info_dup.c
index 6a1ced6..d8dcc11 100644
--- a/src/mpi/info/info_dup.c
+++ b/src/mpi/info/info_dup.c
@@ -45,14 +45,14 @@ int MPIR_Info_dup_impl(MPIR_Info *info_ptr, MPIR_Info **new_info_ptr)
        may want to add an "allocate n elements" routine and execute this
        it two steps: count and then allocate */
     /* FIXME : multithreaded */
-    mpi_errno = MPIU_Info_alloc(&curr_new);
+    mpi_errno = MPIR_Info_alloc(&curr_new);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     *new_info_ptr = curr_new;
 
     curr_old = info_ptr->next;
     while (curr_old)
     {
-        mpi_errno = MPIU_Info_alloc(&curr_new->next);
+        mpi_errno = MPIR_Info_alloc(&curr_new->next);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
         curr_new         = curr_new->next;
@@ -95,12 +95,12 @@ int MPI_Info_dup( MPI_Info info, MPI_Info *newinfo )
 {
     MPIR_Info *info_ptr = 0, *new_info_ptr;
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INFO_DUP);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INFO_DUP);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INFO_DUP);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INFO_DUP);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -139,7 +139,7 @@ int MPI_Info_dup( MPI_Info info, MPI_Info *newinfo )
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INFO_DUP);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INFO_DUP);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/info/info_free.c b/src/mpi/info/info_free.c
index ca8191a..70e7f28 100644
--- a/src/mpi/info/info_free.c
+++ b/src/mpi/info/info_free.c
@@ -51,12 +51,12 @@ int MPI_Info_free( MPI_Info *info )
 #endif
     int mpi_errno = MPI_SUCCESS;
     MPIR_Info *info_ptr=0;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INFO_FREE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INFO_FREE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INFO_FREE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INFO_FREE);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -87,7 +87,7 @@ int MPI_Info_free( MPI_Info *info )
 
     /* ... body of routine ...  */
     
-    MPIU_Info_free( info_ptr );
+    MPIR_Info_free( info_ptr );
     *info = MPI_INFO_NULL;
     
     /* ... end of body of routine ... */
@@ -95,7 +95,7 @@ int MPI_Info_free( MPI_Info *info )
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INFO_FREE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INFO_FREE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/info/info_get.c b/src/mpi/info/info_get.c
index f6bcf95..9514472 100644
--- a/src/mpi/info/info_get.c
+++ b/src/mpi/info/info_get.c
@@ -95,12 +95,12 @@ int MPI_Info_get(MPI_Info info, const char *key, int valuelen, char *value,
 {
     MPIR_Info *info_ptr=0;
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INFO_GET);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INFO_GET);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INFO_GET);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INFO_GET);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -151,7 +151,7 @@ int MPI_Info_get(MPI_Info info, const char *key, int valuelen, char *value,
     if (mpi_errno) goto fn_fail;
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INFO_GET);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INFO_GET);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/info/info_getn.c b/src/mpi/info/info_getn.c
index 82152b2..5d9fa80 100644
--- a/src/mpi/info/info_getn.c
+++ b/src/mpi/info/info_getn.c
@@ -73,12 +73,12 @@ int MPI_Info_get_nkeys( MPI_Info info, int *nkeys )
 {
     MPIR_Info *info_ptr=0;
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INFO_GET_NKEYS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INFO_GET_NKEYS);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INFO_GET_NKEYS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INFO_GET_NKEYS);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -118,7 +118,7 @@ int MPI_Info_get_nkeys( MPI_Info info, int *nkeys )
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INFO_GET_NKEYS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INFO_GET_NKEYS);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/info/info_getnth.c b/src/mpi/info/info_getnth.c
index 81d2d58..33d7cab 100644
--- a/src/mpi/info/info_getnth.c
+++ b/src/mpi/info/info_getnth.c
@@ -86,12 +86,12 @@ int MPI_Info_get_nthkey( MPI_Info info, int n, char *key )
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Info *info_ptr=0;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INFO_GET_NTHKEY);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INFO_GET_NTHKEY);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INFO_GET_NTHKEY);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INFO_GET_NTHKEY);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -128,7 +128,7 @@ int MPI_Info_get_nthkey( MPI_Info info, int n, char *key )
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INFO_GET_NTHKEY);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INFO_GET_NTHKEY);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/info/info_getvallen.c b/src/mpi/info/info_getvallen.c
index 4051e28..31ef9d1 100644
--- a/src/mpi/info/info_getvallen.c
+++ b/src/mpi/info/info_getvallen.c
@@ -79,12 +79,12 @@ int MPI_Info_get_valuelen( MPI_Info info, const char *key, int *valuelen, int *f
 {
     MPIR_Info *info_ptr=0;
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INFO_GET_VALUELEN);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INFO_GET_VALUELEN);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INFO_GET_VALUELEN);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INFO_GET_VALUELEN);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -135,7 +135,7 @@ int MPI_Info_get_valuelen( MPI_Info info, const char *key, int *valuelen, int *f
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INFO_GET_VALUELEN);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INFO_GET_VALUELEN);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/info/info_set.c b/src/mpi/info/info_set.c
index 4701ebf..61b217d 100644
--- a/src/mpi/info/info_set.c
+++ b/src/mpi/info/info_set.c
@@ -54,12 +54,12 @@ int MPI_Info_set( MPI_Info info, const char *key, const char *value )
     static const char FCNAME[] = "MPI_Info_set";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INFO_SET);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INFO_SET);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INFO_SET);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INFO_SET);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -105,7 +105,7 @@ int MPI_Info_set( MPI_Info info, const char *key, const char *value )
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INFO_SET);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INFO_SET);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
@@ -134,9 +134,9 @@ int MPIR_Info_set_impl(MPIR_Info *info_ptr, const char *key, const char *value)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Info *curr_ptr, *prev_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_INFO_SET_IMPL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_INFO_SET_IMPL);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_INFO_SET_IMPL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_INFO_SET_IMPL);
 
     prev_ptr = info_ptr;
     curr_ptr = info_ptr->next;
@@ -154,7 +154,7 @@ int MPIR_Info_set_impl(MPIR_Info *info_ptr, const char *key, const char *value)
 
     if (!curr_ptr) {
         /* Key not present, insert value */
-        mpi_errno = MPIU_Info_alloc(&curr_ptr);
+        mpi_errno = MPIR_Info_alloc(&curr_ptr);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
         /*printf( "Inserting new elm %x at %x\n", curr_ptr->id, prev_ptr->id );*/
@@ -164,7 +164,7 @@ int MPIR_Info_set_impl(MPIR_Info *info_ptr, const char *key, const char *value)
     }
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_INFO_SET_IMPL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_INFO_SET_IMPL);
     return mpi_errno;
 
 fn_fail:
diff --git a/src/mpi/info/infoutil.c b/src/mpi/info/infoutil.c
index 261f911..5b3c6e6 100644
--- a/src/mpi/info/infoutil.c
+++ b/src/mpi/info/infoutil.c
@@ -17,24 +17,24 @@
 /* Preallocated info objects */
 MPIR_Info MPIR_Info_builtin[MPIR_INFO_N_BUILTIN] = { { 0 } };
 MPIR_Info MPIR_Info_direct[MPIR_INFO_PREALLOC] = { { 0 } };
-MPIU_Object_alloc_t MPIR_Info_mem = { 0, 0, 0, 0, MPIR_INFO,
+MPIR_Object_alloc_t MPIR_Info_mem = { 0, 0, 0, 0, MPIR_INFO,
 				      sizeof(MPIR_Info), MPIR_Info_direct,
                                       MPIR_INFO_PREALLOC, };
 
 /* Free an info structure.  In the multithreaded case, this routine
    relies on the SINGLE_CS in the info routines (particularly MPI_Info_free) */
 #undef FUNCNAME
-#define FUNCNAME MPIU_Info_free
+#define FUNCNAME MPIR_Info_free
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-void MPIU_Info_free( MPIR_Info *info_ptr )
+void MPIR_Info_free( MPIR_Info *info_ptr )
 {
     MPIR_Info *curr_ptr, *last_ptr;
 
     curr_ptr = info_ptr->next;
     last_ptr = NULL;
 
-    MPIU_Handle_obj_free(&MPIR_Info_mem, info_ptr);
+    MPIR_Handle_obj_free(&MPIR_Info_mem, info_ptr);
 
     /* printf( "Returning info %x\n", info_ptr->id ); */
     /* First, free the string storage */
@@ -43,7 +43,7 @@ void MPIU_Info_free( MPIR_Info *info_ptr )
 	MPL_free(curr_ptr->value);
 	last_ptr = curr_ptr;
 	curr_ptr = curr_ptr->next;
-        MPIU_Handle_obj_free(&MPIR_Info_mem, last_ptr);
+        MPIR_Handle_obj_free(&MPIR_Info_mem, last_ptr);
     }
 }
 
@@ -51,16 +51,16 @@ void MPIU_Info_free( MPIR_Info *info_ptr )
  *
  * Returns MPICH error codes */
 #undef FUNCNAME
-#define FUNCNAME MPIU_Info_alloc
+#define FUNCNAME MPIR_Info_alloc
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIU_Info_alloc(MPIR_Info **info_p_p)
+int MPIR_Info_alloc(MPIR_Info **info_p_p)
 {
     int mpi_errno = MPI_SUCCESS;
-    *info_p_p = (MPIR_Info *)MPIU_Handle_obj_alloc(&MPIR_Info_mem);
+    *info_p_p = (MPIR_Info *)MPIR_Handle_obj_alloc(&MPIR_Info_mem);
     MPIR_ERR_CHKANDJUMP1(!*info_p_p, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPI_Info");
 
-    MPIU_Object_set_ref(*info_p_p, 0);
+    MPIR_Object_set_ref(*info_p_p, 0);
     (*info_p_p)->next  = NULL;
     (*info_p_p)->key   = NULL;
     (*info_p_p)->value = NULL;
diff --git a/src/mpi/init/abort.c b/src/mpi/init/abort.c
index ecf929e..76fac72 100644
--- a/src/mpi/init/abort.c
+++ b/src/mpi/init/abort.c
@@ -78,7 +78,7 @@ int MPI_Abort(MPI_Comm comm, int errorcode)
     /* FIXME: 100 is arbitrary and may not be long enough */
     char abort_str[100] = "", comm_name[MPI_MAX_OBJECT_NAME];
     int len = MPI_MAX_OBJECT_NAME;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ABORT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ABORT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
@@ -87,7 +87,7 @@ int MPI_Abort(MPI_Comm comm, int errorcode)
        hung holding the critical section.  Also note the "not thread-safe"
        comment in the description of MPI_Abort above. */
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ABORT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ABORT);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -138,12 +138,12 @@ int MPI_Abort(MPI_Comm comm, int errorcode)
     /* --BEGIN ERROR HANDLING-- */
     if (mpi_errno != MPI_SUCCESS) goto fn_fail;
     /* MPID_Abort() should never return MPI_SUCCESS */
-    MPIU_Assert(0);
+    MPIR_Assert(0);
     /* --END ERROR HANDLING-- */
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ABORT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ABORT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/init/async.c b/src/mpi/init/async.c
index 40dee1c..39c6f24 100644
--- a/src/mpi/init/async.c
+++ b/src/mpi/init/async.c
@@ -47,22 +47,22 @@ static void progress_fn(void * data)
 
     mpi_errno = MPID_Irecv(NULL, 0, MPI_CHAR, 0, WAKE_TAG, progress_comm_ptr,
                            MPIR_CONTEXT_INTRA_PT2PT, &request_ptr);
-    MPIU_Assert(!mpi_errno);
+    MPIR_Assert(!mpi_errno);
     request = request_ptr->handle;
     mpi_errno = MPIR_Wait_impl(&request, &status);
-    MPIU_Assert(!mpi_errno);
+    MPIR_Assert(!mpi_errno);
 
     /* Send a signal to the main thread saying we are done */
     MPID_Thread_mutex_lock(&progress_mutex, &mpi_errno);
-    MPIU_Assert(!mpi_errno);
+    MPIR_Assert(!mpi_errno);
 
     progress_thread_done = 1;
 
     MPID_Thread_mutex_unlock(&progress_mutex, &mpi_errno);
-    MPIU_Assert(!mpi_errno);
+    MPIR_Assert(!mpi_errno);
 
     MPID_Thread_cond_signal(&progress_cond, &mpi_errno);
-    MPIU_Assert(!mpi_errno);
+    MPIR_Assert(!mpi_errno);
 
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 
@@ -81,9 +81,9 @@ int MPIR_Init_async_thread(void)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_self_ptr;
     int err = 0;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_INIT_ASYNC_THREAD);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_INIT_ASYNC_THREAD);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_INIT_ASYNC_THREAD);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_INIT_ASYNC_THREAD);
 
 
     /* Dup comm world for the progress thread */
@@ -100,7 +100,7 @@ int MPIR_Init_async_thread(void)
     MPID_Thread_create((MPID_Thread_func_t) progress_fn, NULL, &progress_thread_id, &err);
     MPIR_ERR_CHKANDJUMP1(err, mpi_errno, MPI_ERR_OTHER, "**mutex_create", "**mutex_create %s", strerror(err));
     
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_INIT_ASYNC_THREAD);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_INIT_ASYNC_THREAD);
 
  fn_exit:
     return mpi_errno;
@@ -122,43 +122,43 @@ int MPIR_Finalize_async_thread(void)
     MPIR_Request *request_ptr = NULL;
     MPI_Request request;
     MPI_Status status;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_FINALIZE_ASYNC_THREAD);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_FINALIZE_ASYNC_THREAD);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_FINALIZE_ASYNC_THREAD);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_FINALIZE_ASYNC_THREAD);
 
     mpi_errno = MPID_Isend(NULL, 0, MPI_CHAR, 0, WAKE_TAG, progress_comm_ptr,
                            MPIR_CONTEXT_INTRA_PT2PT, &request_ptr);
-    MPIU_Assert(!mpi_errno);
+    MPIR_Assert(!mpi_errno);
     request = request_ptr->handle;
     mpi_errno = MPIR_Wait_impl(&request, &status);
-    MPIU_Assert(!mpi_errno);
+    MPIR_Assert(!mpi_errno);
 
     /* XXX DJG why is this unlock/lock necessary?  Should we just YIELD here or later?  */
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 
     MPID_Thread_mutex_lock(&progress_mutex, &mpi_errno);
-    MPIU_Assert(!mpi_errno);
+    MPIR_Assert(!mpi_errno);
 
     while (!progress_thread_done) {
         MPID_Thread_cond_wait(&progress_cond, &progress_mutex, &mpi_errno);
-        MPIU_Assert(!mpi_errno);
+        MPIR_Assert(!mpi_errno);
     }
 
     MPID_Thread_mutex_unlock(&progress_mutex, &mpi_errno);
-    MPIU_Assert(!mpi_errno);
+    MPIR_Assert(!mpi_errno);
 
     mpi_errno = MPIR_Comm_free_impl(progress_comm_ptr);
-    MPIU_Assert(!mpi_errno);
+    MPIR_Assert(!mpi_errno);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 
     MPID_Thread_cond_destroy(&progress_cond, &mpi_errno);
-    MPIU_Assert(!mpi_errno);
+    MPIR_Assert(!mpi_errno);
 
     MPID_Thread_mutex_destroy(&progress_mutex, &mpi_errno);
-    MPIU_Assert(!mpi_errno);
+    MPIR_Assert(!mpi_errno);
 
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_FINALIZE_ASYNC_THREAD);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_FINALIZE_ASYNC_THREAD);
 
 #endif /* MPICH_THREAD_LEVEL == MPI_THREAD_MULTIPLE */
     return mpi_errno;
diff --git a/src/mpi/init/finalize.c b/src/mpi/init/finalize.c
index e25e286..02d7f87 100644
--- a/src/mpi/init/finalize.c
+++ b/src/mpi/init/finalize.c
@@ -145,14 +145,14 @@ int MPI_Finalize( void )
 #if defined(HAVE_USLEEP) && defined(USE_COVERAGE)
     int rank=0;
 #endif
-    MPID_MPI_FINALIZE_STATE_DECL(MPID_STATE_MPI_FINALIZE);
+    MPIR_FUNC_TERSE_FINALIZE_STATE_DECL(MPID_STATE_MPI_FINALIZE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     /* Note: Only one thread may ever call MPI_Finalize (MPI_Finalize may
        be called at most once in any program) */
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FINALIZE_FUNC_ENTER(MPID_STATE_MPI_FINALIZE);
+    MPIR_FUNC_TERSE_FINALIZE_ENTER(MPID_STATE_MPI_FINALIZE);
     
     /* ... body of routine ... */
 
@@ -200,7 +200,7 @@ int MPI_Finalize( void )
 	MPIR_Errhandler_release_ref( MPIR_Process.comm_world->errhandler,
 				     &in_use);
 	if (!in_use) {
-	    MPIU_Handle_obj_free( &MPIR_Errhandler_mem,
+	    MPIR_Handle_obj_free( &MPIR_Errhandler_mem,
 				  MPIR_Process.comm_world->errhandler );
 	}
         /* always set to NULL to avoid a double-release later in finalize */
@@ -213,7 +213,7 @@ int MPI_Finalize( void )
 	MPIR_Errhandler_release_ref( MPIR_Process.comm_self->errhandler,
 				     &in_use);
 	if (!in_use) {
-	    MPIU_Handle_obj_free( &MPIR_Errhandler_mem,
+	    MPIR_Handle_obj_free( &MPIR_Errhandler_mem,
 				  MPIR_Process.comm_self->errhandler );
 	}
         /* always set to NULL to avoid a double-release later in finalize */
@@ -222,7 +222,7 @@ int MPI_Finalize( void )
 
     /* FIXME: Why is this not one of the finalize callbacks?.  Do we need
        pre and post MPID_Finalize callbacks? */
-    MPIU_Timer_finalize();
+    MPII_Timer_finalize();
 
     /* Call the high-priority callbacks */
     MPIR_Call_finalize_callbacks( MPIR_FINALIZE_CALLBACK_PRIO+1, 
@@ -231,7 +231,7 @@ int MPI_Finalize( void )
     /* Signal the debugger that we are about to exit. */
     /* FIXME: Should this also be a finalize callback? */
 #ifdef HAVE_DEBUGGER_SUPPORT
-    MPIR_DebuggerSetAborting( (char *)0 );
+    MPIR_Debugger_set_aborting( (char *)0 );
 #endif
 
     mpi_errno = MPID_Finalize();
@@ -306,7 +306,7 @@ int MPI_Finalize( void )
 
     /* ... end of body of routine ... */
   fn_exit:
-    MPID_MPI_FINALIZE_FUNC_EXIT(MPID_STATE_MPI_FINALIZE);
+    MPIR_FUNC_TERSE_FINALIZE_EXIT(MPID_STATE_MPI_FINALIZE);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/init/finalized.c b/src/mpi/init/finalized.c
index 53a4014..0870ccc 100644
--- a/src/mpi/init/finalized.c
+++ b/src/mpi/init/finalized.c
@@ -48,9 +48,9 @@ int MPI_Finalized( int *flag )
     static const char FCNAME[] = "MPI_Finalized";
 #endif
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_FINALIZED);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_FINALIZED);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_FINALIZED);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_FINALIZED);
 
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -76,7 +76,7 @@ int MPI_Finalized( int *flag )
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_FINALIZED);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_FINALIZED);
     return mpi_errno;
     
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/init/init.c b/src/mpi/init/init.c
index 91a8f0e..340568c 100644
--- a/src/mpi/init/init.c
+++ b/src/mpi/init/init.c
@@ -71,7 +71,7 @@ int MPI_Init(int *argc, char ***argv) __attribute__((weak,alias("PMPI_Init")));
 #define MPI_Init PMPI_Init
 
 /* Fortran logical values. extern'd in mpiimpl.h */
-/* MPI_Fint MPIR_F_TRUE, MPIR_F_FALSE; */
+/* MPI_Fint MPII_F_TRUE, MPII_F_FALSE; */
 
 /* Any internal routines can go here.  Make them static if possible */
 
@@ -123,14 +123,14 @@ int MPI_Init( int *argc, char ***argv )
     int mpi_errno = MPI_SUCCESS;
     int rc ATTRIBUTE((unused));
     int threadLevel, provided;
-    MPID_MPI_INIT_STATE_DECL(MPID_STATE_MPI_INIT);
+    MPIR_FUNC_TERSE_INIT_STATE_DECL(MPID_STATE_MPI_INIT);
 
     rc = MPID_Wtime_init();
 #ifdef MPL_USE_DBG_LOGGING
     MPL_dbg_pre_init( argc, argv, rc );
 #endif
 
-    MPID_MPI_INIT_FUNC_ENTER(MPID_STATE_MPI_INIT);
+    MPIR_FUNC_TERSE_INIT_ENTER(MPID_STATE_MPI_INIT);
 #   ifdef HAVE_ERROR_CHECKING
     {
         MPID_BEGIN_ERROR_CHECKS;
@@ -191,7 +191,7 @@ int MPI_Init( int *argc, char ***argv )
     }
 
     /* ... end of body of routine ... */
-    MPID_MPI_INIT_FUNC_EXIT(MPID_STATE_MPI_INIT);
+    MPIR_FUNC_TERSE_INIT_EXIT(MPID_STATE_MPI_INIT);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/init/initialized.c b/src/mpi/init/initialized.c
index b8938e2..c0dd45c 100644
--- a/src/mpi/init/initialized.c
+++ b/src/mpi/init/initialized.c
@@ -48,9 +48,9 @@ int MPI_Initialized( int *flag )
     static const char FCNAME[] = "MPI_Initialized";
 #endif
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INITIALIZED);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INITIALIZED);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INITIALIZED);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INITIALIZED);
     
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -76,7 +76,7 @@ int MPI_Initialized( int *flag )
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INITIALIZED);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INITIALIZED);
     return mpi_errno;
     
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/init/initinfo.c b/src/mpi/init/initinfo.c
index ad79819..71dac94 100644
--- a/src/mpi/init/initinfo.c
+++ b/src/mpi/init/initinfo.c
@@ -12,11 +12,11 @@
    and configure information without requiring the user to run an MPI
    program 
 */
-const char MPIR_Version_string[]       = MPICH_VERSION;
-const char MPIR_Version_date[]         = MPICH_VERSION_DATE;
-const char MPIR_Version_configure[]    = MPICH_CONFIGURE_ARGS_CLEAN;
-const char MPIR_Version_device[]       = MPICH_DEVICE;
-const char MPIR_Version_CC[]           = MPICH_COMPILER_CC;
-const char MPIR_Version_CXX[]          = MPICH_COMPILER_CXX;
-const char MPIR_Version_F77[]          = MPICH_COMPILER_F77;
-const char MPIR_Version_FC[]           = MPICH_COMPILER_FC;
+const char MPII_Version_string[]       = MPICH_VERSION;
+const char MPII_Version_date[]         = MPICH_VERSION_DATE;
+const char MPII_Version_configure[]    = MPICH_CONFIGURE_ARGS_CLEAN;
+const char MPII_Version_device[]       = MPICH_DEVICE;
+const char MPII_Version_CC[]           = MPICH_COMPILER_CC;
+const char MPII_Version_CXX[]          = MPICH_COMPILER_CXX;
+const char MPII_Version_F77[]          = MPICH_COMPILER_F77;
+const char MPII_Version_FC[]           = MPICH_COMPILER_FC;
diff --git a/src/mpi/init/initthread.c b/src/mpi/init/initthread.c
index 40f8080..b93ebae 100644
--- a/src/mpi/init/initthread.c
+++ b/src/mpi/init/initthread.c
@@ -82,7 +82,7 @@ int MPI_Init_thread(int *argc, char ***argv, int required, int *provided) __attr
 /* Any internal routines can go here.  Make them static if possible */
 
 /* Global variables can be initialized here */
-MPICH_PerProcess_t MPIR_Process = { OPA_INT_T_INITIALIZER(MPICH_MPI_STATE__PRE_INIT) };
+MPIR_Process_t MPIR_Process = { OPA_INT_T_INITIALIZER(MPICH_MPI_STATE__PRE_INIT) };
      /* all other fields in MPIR_Process are irrelevant */
 MPIR_Thread_info_t MPIR_ThreadInfo = { 0 };
 
@@ -103,7 +103,7 @@ MPIU_DLL_SPEC MPI_Fint *MPI_F_STATUSES_IGNORE = 0;
 
 /* This will help force the load of initinfo.o, which contains data about
    how MPICH was configured. */
-extern const char MPIR_Version_device[];
+extern const char MPII_Version_device[];
 
 /* Make sure the Fortran symbols are initialized unless it will cause problems
    for C programs linked with the C compilers (i.e., not using the 
@@ -166,12 +166,12 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved)
 
 #if defined(MPICH_IS_THREADED)
 
-#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL || \
-    MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT
+#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL || \
+    MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ
 MPID_Thread_mutex_t MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX;
 #endif
 
-#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT
+#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ
 MPID_Thread_mutex_t MPIR_THREAD_POBJ_HANDLE_MUTEX;
 MPID_Thread_mutex_t MPIR_THREAD_POBJ_MSGQ_MUTEX;
 MPID_Thread_mutex_t MPIR_THREAD_POBJ_COMPLETION_MUTEX;
@@ -188,32 +188,32 @@ static int thread_cs_init( void )
 {
     int err;
 
-#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL
+#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL
 /* There is a single, global lock, held for the duration of an MPI call */
     MPID_Thread_mutex_create(&MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX, &err);
-    MPIU_Assert(err == 0);
+    MPIR_Assert(err == 0);
 
-#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT
-    /* MPICH_THREAD_GRANULARITY_PER_OBJECT: Multiple locks */
+#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ
+    /* MPICH_THREAD_GRANULARITY__POBJ: Multiple locks */
     MPID_Thread_mutex_create(&MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX, &err);
-    MPIU_Assert(err == 0);
+    MPIR_Assert(err == 0);
     MPID_Thread_mutex_create(&MPIR_THREAD_POBJ_HANDLE_MUTEX, &err);
-    MPIU_Assert(err == 0);
+    MPIR_Assert(err == 0);
     MPID_Thread_mutex_create(&MPIR_THREAD_POBJ_MSGQ_MUTEX, &err);
-    MPIU_Assert(err == 0);
+    MPIR_Assert(err == 0);
     MPID_Thread_mutex_create(&MPIR_THREAD_POBJ_COMPLETION_MUTEX, &err);
-    MPIU_Assert(err == 0);
+    MPIR_Assert(err == 0);
     MPID_Thread_mutex_create(&MPIR_THREAD_POBJ_CTX_MUTEX, &err);
-    MPIU_Assert(err == 0);
+    MPIR_Assert(err == 0);
     MPID_Thread_mutex_create(&MPIR_THREAD_POBJ_PMI_MUTEX, &err);
-    MPIU_Assert(err == 0);
+    MPIR_Assert(err == 0);
 
-#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_LOCK_FREE
+#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__LOCKFREE
 /* Updates to shared data and access to shared services is handled without 
    locks where ever possible. */
 #error lock-free not yet implemented
 
-#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_SINGLE
+#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__SINGLE
 /* No thread support, make all operations a no-op */
 
 #else
@@ -235,34 +235,34 @@ int MPIR_Thread_CS_Finalize( void )
     int err;
 
     MPL_DBG_MSG(MPIR_DBG_INIT,TYPICAL,"Freeing global mutex and private storage");
-#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL
+#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL
 /* There is a single, global lock, held for the duration of an MPI call */
     MPID_Thread_mutex_destroy(&MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX, &err);
-    MPIU_Assert(err == 0);
+    MPIR_Assert(err == 0);
 
-#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT
-    /* MPICH_THREAD_GRANULARITY_PER_OBJECT: There are multiple locks,
+#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ
+    /* MPICH_THREAD_GRANULARITY__POBJ: There are multiple locks,
      * one for each logical class (e.g., each type of object) */
     MPID_Thread_mutex_destroy(&MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX, &err);
-    MPIU_Assert(err == 0);
+    MPIR_Assert(err == 0);
     MPID_Thread_mutex_destroy(&MPIR_THREAD_POBJ_HANDLE_MUTEX, &err);
-    MPIU_Assert(err == 0);
+    MPIR_Assert(err == 0);
     MPID_Thread_mutex_destroy(&MPIR_THREAD_POBJ_MSGQ_MUTEX, &err);
-    MPIU_Assert(err == 0);
+    MPIR_Assert(err == 0);
     MPID_Thread_mutex_destroy(&MPIR_THREAD_POBJ_COMPLETION_MUTEX, &err);
-    MPIU_Assert(err == 0);
+    MPIR_Assert(err == 0);
     MPID_Thread_mutex_destroy(&MPIR_THREAD_POBJ_CTX_MUTEX, &err);
-    MPIU_Assert(err == 0);
+    MPIR_Assert(err == 0);
     MPID_Thread_mutex_destroy(&MPIR_THREAD_POBJ_PMI_MUTEX, &err);
-    MPIU_Assert(err == 0);
+    MPIR_Assert(err == 0);
 
 
-#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_LOCK_FREE
+#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__LOCKFREE
 /* Updates to shared data and access to shared services is handled without 
    locks where ever possible. */
 #error lock-free not yet implemented
 
-#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_SINGLE
+#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__SINGLE
 /* No thread support, make all operations a no-op */
 
 #else
@@ -420,7 +420,7 @@ int MPIR_Init_thread(int * argc, char ***argv, int required, int * provided)
        intially NULL and will be allocated by the device if the process group
        was started using one of the MPI_Comm_spawn functions. */
     MPIR_Process.comm_world		    = MPIR_Comm_builtin + 0;
-    MPIR_Comm_init(MPIR_Process.comm_world);
+    MPII_Comm_init(MPIR_Process.comm_world);
     MPIR_Process.comm_world->handle	    = MPI_COMM_WORLD;
     MPIR_Process.comm_world->context_id	    = 0 << MPIR_CONTEXT_PREFIX_SHIFT;
     MPIR_Process.comm_world->recvcontext_id = 0 << MPIR_CONTEXT_PREFIX_SHIFT;
@@ -431,7 +431,7 @@ int MPIR_Init_thread(int * argc, char ***argv, int required, int * provided)
 		 MPI_MAX_OBJECT_NAME);
 
     MPIR_Process.comm_self		    = MPIR_Comm_builtin + 1;
-    MPIR_Comm_init(MPIR_Process.comm_self);
+    MPII_Comm_init(MPIR_Process.comm_self);
     MPIR_Process.comm_self->handle	    = MPI_COMM_SELF;
     MPIR_Process.comm_self->context_id	    = 1 << MPIR_CONTEXT_PREFIX_SHIFT;
     MPIR_Process.comm_self->recvcontext_id  = 1 << MPIR_CONTEXT_PREFIX_SHIFT;
@@ -441,7 +441,7 @@ int MPIR_Init_thread(int * argc, char ***argv, int required, int * provided)
 
 #ifdef MPID_NEEDS_ICOMM_WORLD
     MPIR_Process.icomm_world		    = MPIR_Comm_builtin + 2;
-    MPIR_Comm_init(MPIR_Process.icomm_world);
+    MPII_Comm_init(MPIR_Process.icomm_world);
     MPIR_Process.icomm_world->handle	    = MPIR_ICOMM_WORLD;
     MPIR_Process.icomm_world->context_id    = 2 << MPIR_CONTEXT_PREFIX_SHIFT;
     MPIR_Process.icomm_world->recvcontext_id= 2 << MPIR_CONTEXT_PREFIX_SHIFT;
@@ -457,8 +457,8 @@ int MPIR_Init_thread(int * argc, char ***argv, int required, int * provided)
 
     /* Setup the initial communicator list in case we have 
        enabled the debugger message-queue interface */
-    MPIR_COMML_REMEMBER( MPIR_Process.comm_world );
-    MPIR_COMML_REMEMBER( MPIR_Process.comm_self );
+    MPII_COMML_REMEMBER( MPIR_Process.comm_world );
+    MPII_COMML_REMEMBER( MPIR_Process.comm_self );
 
     /* Call any and all MPID_Init type functions */
     MPIR_Err_init();
@@ -496,7 +496,7 @@ int MPIR_Init_thread(int * argc, char ***argv, int required, int * provided)
        as defined by the standard. */
     info_ptr = MPIR_Info_builtin + 1;
     info_ptr->handle = MPI_INFO_ENV;
-    MPIU_Object_set_ref(info_ptr, 1);
+    MPIR_Object_set_ref(info_ptr, 1);
     info_ptr->next  = NULL;
     info_ptr->key   = NULL;
     info_ptr->value = NULL;
@@ -506,7 +506,7 @@ int MPIR_Init_thread(int * argc, char ***argv, int required, int * provided)
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
     /* Assert: tag_ub should be a power of 2 minus 1 */
-    MPIU_Assert(((unsigned)MPIR_Process.attrs.tag_ub & ((unsigned)MPIR_Process.attrs.tag_ub + 1)) == 0);
+    MPIR_Assert(((unsigned)MPIR_Process.attrs.tag_ub & ((unsigned)MPIR_Process.attrs.tag_ub + 1)) == 0);
 
     /* Set aside tag space for tagged collectives and failure notification */
     MPIR_Process.attrs.tag_ub     >>= 3;
@@ -515,7 +515,7 @@ int MPIR_Init_thread(int * argc, char ***argv, int required, int * provided)
     MPIR_Process.tagged_coll_mask   = MPIR_Process.attrs.tag_ub + 1;
 
     /* Assert: tag_ub is at least the minimum asked for in the MPI spec */
-    MPIU_Assert( MPIR_Process.attrs.tag_ub >= 32767 );
+    MPIR_Assert( MPIR_Process.attrs.tag_ub >= 32767 );
 
     /* Capture the level of thread support provided */
     MPIR_ThreadInfo.thread_provided = thread_provided;
@@ -525,7 +525,7 @@ int MPIR_Init_thread(int * argc, char ***argv, int required, int * provided)
 #endif /* MPICH_IS_THREADED */
 
     /* FIXME: Define these in the interface.  Does Timer init belong here? */
-    MPIU_Timer_init(MPIR_Process.comm_world->rank,
+    MPII_Timer_init(MPIR_Process.comm_world->rank,
 		    MPIR_Process.comm_world->local_size);
 #ifdef USE_MEMORY_TRACING
 #ifdef MPICH_IS_THREADED
@@ -580,10 +580,10 @@ int MPIR_Init_thread(int * argc, char ***argv, int required, int * provided)
 #endif
 
     /* FIXME: Does this need to come before the call to MPID_InitComplete?
-       For some debugger support, MPIR_WaitForDebugger may want to use
+       For some debugger support, MPII_Wait_for_debugger may want to use
        MPI communication routines to collect information for the debugger */
 #ifdef HAVE_DEBUGGER_SUPPORT
-    MPIR_WaitForDebugger();
+    MPII_Wait_for_debugger();
 #endif
 
     /* Let the device know that the rest of the init process is completed */
@@ -660,14 +660,14 @@ int MPI_Init_thread( int *argc, char ***argv, int required, int *provided )
 {
     int mpi_errno = MPI_SUCCESS;
     int rc ATTRIBUTE((unused)), reqd = required;
-    MPID_MPI_INIT_STATE_DECL(MPID_STATE_MPI_INIT_THREAD);
+    MPIR_FUNC_TERSE_INIT_STATE_DECL(MPID_STATE_MPI_INIT_THREAD);
 
     rc = MPID_Wtime_init();
 #ifdef MPL_USE_DBG_LOGGING
     MPL_dbg_pre_init( argc, argv, rc );
 #endif
 
-    MPID_MPI_INIT_FUNC_ENTER(MPID_STATE_MPI_INIT_THREAD);
+    MPIR_FUNC_TERSE_INIT_ENTER(MPID_STATE_MPI_INIT_THREAD);
 
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -717,7 +717,7 @@ int MPI_Init_thread( int *argc, char ***argv, int required, int *provided )
 
     /* ... end of body of routine ... */
 
-    MPID_MPI_INIT_FUNC_EXIT(MPID_STATE_MPI_INIT_THREAD);
+    MPIR_FUNC_TERSE_INIT_EXIT(MPID_STATE_MPI_INIT_THREAD);
     return mpi_errno;
 
   fn_fail:
@@ -731,7 +731,7 @@ int MPI_Init_thread( int *argc, char ***argv, int required, int *provided )
     }
 #   endif
     mpi_errno = MPIR_Err_return_comm( 0, FCNAME, mpi_errno );
-    MPID_MPI_INIT_FUNC_EXIT(MPID_STATE_MPI_INIT_THREAD);
+    MPIR_FUNC_TERSE_INIT_EXIT(MPID_STATE_MPI_INIT_THREAD);
 
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 
diff --git a/src/mpi/init/ismain.c b/src/mpi/init/ismain.c
index 47a1295..0d32580 100644
--- a/src/mpi/init/ismain.c
+++ b/src/mpi/init/ismain.c
@@ -49,7 +49,7 @@ int MPI_Is_thread_main( int *flag )
     static const char FCNAME[] = "MPI_Is_thread_main";
 #endif
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IS_THREAD_MAIN);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IS_THREAD_MAIN);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 #   ifdef HAVE_ERROR_CHECKING
@@ -62,7 +62,7 @@ int MPI_Is_thread_main( int *flag )
     }
 #   endif /* HAVE_ERROR_CHECKING */
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IS_THREAD_MAIN);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IS_THREAD_MAIN);
     
     /* ... body of routine ...  */
 #   if MPICH_THREAD_LEVEL <= MPI_THREAD_FUNNELED || ! defined(MPICH_IS_THREADED)
@@ -82,7 +82,7 @@ int MPI_Is_thread_main( int *flag )
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IS_THREAD_MAIN);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IS_THREAD_MAIN);
     return mpi_errno;
     
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/init/querythread.c b/src/mpi/init/querythread.c
index ae281d8..5f79cef 100644
--- a/src/mpi/init/querythread.c
+++ b/src/mpi/init/querythread.c
@@ -66,11 +66,11 @@ int MPI_Query_thread( int *provided )
     static const char FCNAME[] = "MPI_Query_thread";
 #endif
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_QUERY_THREAD);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_QUERY_THREAD);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_QUERY_THREAD);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_QUERY_THREAD);
 
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -89,7 +89,7 @@ int MPI_Query_thread( int *provided )
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_QUERY_THREAD);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_QUERY_THREAD);
     return mpi_errno;
     
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/misc/aint_add.c b/src/mpi/misc/aint_add.c
index 7bad236..598d88e 100644
--- a/src/mpi/misc/aint_add.c
+++ b/src/mpi/misc/aint_add.c
@@ -56,13 +56,13 @@ as if the process that originally produced base had called:
 MPI_Aint MPI_Aint_add(MPI_Aint base, MPI_Aint disp)
 {
     MPI_Aint result;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_AINT_ADD);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_AINT_ADD);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_AINT_ADD);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_AINT_ADD);
     result = MPID_Aint_add(base, disp);
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_AINT_ADD);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_AINT_ADD);
 
     return result;
 }
diff --git a/src/mpi/misc/aint_diff.c b/src/mpi/misc/aint_diff.c
index 61147e4..5d69cff 100644
--- a/src/mpi/misc/aint_diff.c
+++ b/src/mpi/misc/aint_diff.c
@@ -57,13 +57,13 @@ on the addresses initially passed to MPI_GET_ADDRESS.
 MPI_Aint MPI_Aint_diff(MPI_Aint addr1, MPI_Aint addr2)
 {
     MPI_Aint result;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_AINT_DIFF);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_AINT_DIFF);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_AINT_DIFF);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_AINT_DIFF);
     result = MPID_Aint_diff(addr1, addr2);
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_AINT_DIFF);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_AINT_DIFF);
 
     return result;
 }
diff --git a/src/mpi/misc/getpname.c b/src/mpi/misc/getpname.c
index 26042c8..f4fcaa8 100644
--- a/src/mpi/misc/getpname.c
+++ b/src/mpi/misc/getpname.c
@@ -72,11 +72,11 @@ int MPI_Get_processor_name( char *name, int *resultlen )
 {
     static const char FCNAME[] = "MPI_Get_processor_name";
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GET_PROCESSOR_NAME);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GET_PROCESSOR_NAME);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GET_PROCESSOR_NAME);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GET_PROCESSOR_NAME);
 
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -100,7 +100,7 @@ int MPI_Get_processor_name( char *name, int *resultlen )
     if (mpi_errno != MPI_SUCCESS) goto fn_fail;
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GET_PROCESSOR_NAME);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GET_PROCESSOR_NAME);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/misc/library_version.c b/src/mpi/misc/library_version.c
index 7cb3655..410b190 100644
--- a/src/mpi/misc/library_version.c
+++ b/src/mpi/misc/library_version.c
@@ -48,11 +48,11 @@ Output Parameters:
 int MPI_Get_library_version(char *version, int *resultlen)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GET_LIBRARY_VERSION);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GET_LIBRARY_VERSION);
 
     /* Note that this routine may be called before MPI_Init */
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GET_LIBRARY_VERSION);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GET_LIBRARY_VERSION);
 
     /* Validate parameters and objects (post conversion) */
 #ifdef HAVE_ERROR_CHECKING
@@ -77,9 +77,9 @@ int MPI_Get_library_version(char *version, int *resultlen)
                  "MPICH CXX:\t%s\n"
                  "MPICH F77:\t%s\n"
                  "MPICH FC:\t%s\n",
-                 MPIR_Version_string, MPIR_Version_date, MPIR_Version_device,
-                 MPIR_Version_configure, MPIR_Version_CC, MPIR_Version_CXX,
-                 MPIR_Version_F77, MPIR_Version_FC);
+                 MPII_Version_string, MPII_Version_date, MPII_Version_device,
+                 MPII_Version_configure, MPII_Version_CC, MPII_Version_CXX,
+                 MPII_Version_F77, MPII_Version_FC);
 
     *resultlen = (int)strlen(version);
 
@@ -88,7 +88,7 @@ int MPI_Get_library_version(char *version, int *resultlen)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GET_LIBRARY_VERSION);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GET_LIBRARY_VERSION);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/misc/pcontrol.c b/src/mpi/misc/pcontrol.c
index 02627aa..4f76b8f 100644
--- a/src/mpi/misc/pcontrol.c
+++ b/src/mpi/misc/pcontrol.c
@@ -53,11 +53,11 @@ int MPI_Pcontrol(const int level, ...)
 {
     int mpi_errno = MPI_SUCCESS;
     va_list list;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_PCONTROL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_PCONTROL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_PCONTROL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_PCONTROL);
 
     /* ... body of routine ...  */
     
@@ -70,7 +70,7 @@ int MPI_Pcontrol(const int level, ...)
     va_end( list );
 
     /* ... end of body of routine ... */
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_PCONTROL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_PCONTROL);
     return mpi_errno;
     /* There should never be any fn_fail case; this suppresses warnings from
        compilers that object to unused labels */
diff --git a/src/mpi/misc/version.c b/src/mpi/misc/version.c
index 995f065..c4160fb 100644
--- a/src/mpi/misc/version.c
+++ b/src/mpi/misc/version.c
@@ -50,12 +50,12 @@ int MPI_Get_version( int *version, int *subversion )
     static const char FCNAME[] = "MPI_Get_version";
 #endif
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GET_VERSION);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GET_VERSION);
 
     /* Note that this routine may be called before MPI_Init */
     /* MPIR_ERRTEST_INITIALIZED_ORDIE(); */
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GET_VERSION);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GET_VERSION);
     
     /* Validate parameters and objects (post conversion) */
 #   ifdef HAVE_ERROR_CHECKING
@@ -79,7 +79,7 @@ int MPI_Get_version( int *version, int *subversion )
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GET_VERSION);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GET_VERSION);
     return mpi_errno;
     
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/pt2pt/bsend.c b/src/mpi/pt2pt/bsend.c
index d0327fb..4aeef58 100644
--- a/src/mpi/pt2pt/bsend.c
+++ b/src/mpi/pt2pt/bsend.c
@@ -93,12 +93,12 @@ int MPI_Bsend(const void *buf, int count, MPI_Datatype datatype, int dest, int t
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Request *request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_BSEND);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_BSEND);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER_FRONT(MPID_STATE_MPI_BSEND);
+    MPIR_FUNC_TERSE_PT2PT_ENTER_FRONT(MPID_STATE_MPI_BSEND);
     
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -172,7 +172,7 @@ int MPI_Bsend(const void *buf, int count, MPI_Datatype datatype, int dest, int t
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_BSEND);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_BSEND);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 	
diff --git a/src/mpi/pt2pt/bsend_init.c b/src/mpi/pt2pt/bsend_init.c
index 781d88b..2ddc1ad 100644
--- a/src/mpi/pt2pt/bsend_init.c
+++ b/src/mpi/pt2pt/bsend_init.c
@@ -66,12 +66,12 @@ int MPI_Bsend_init(const void *buf, int count, MPI_Datatype datatype,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *request_ptr = NULL;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_BSEND_INIT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_BSEND_INIT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_BSEND_INIT);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_BSEND_INIT);
     
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -122,7 +122,7 @@ int MPI_Bsend_init(const void *buf, int count, MPI_Datatype datatype,
     mpi_errno = MPID_Bsend_init(buf, count, datatype, dest, tag, comm_ptr,
 				MPIR_CONTEXT_INTRA_PT2PT, &request_ptr);
     if (mpi_errno != MPI_SUCCESS) goto fn_fail;
-    MPIR_SENDQ_REMEMBER(request_ptr, dest, tag, comm_ptr->context_id);
+    MPII_SENDQ_REMEMBER(request_ptr, dest, tag, comm_ptr->context_id);
 
     /* return the handle of the request to the user */
     MPIR_OBJ_PUBLISH_HANDLE(*request, request_ptr->handle);
@@ -130,7 +130,7 @@ int MPI_Bsend_init(const void *buf, int count, MPI_Datatype datatype,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_BSEND_INIT);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_BSEND_INIT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/bsendutil.c b/src/mpi/pt2pt/bsendutil.c
index 04ab5a2..32e71c6 100644
--- a/src/mpi/pt2pt/bsendutil.c
+++ b/src/mpi/pt2pt/bsendutil.c
@@ -5,7 +5,7 @@
  */
 
 #include "mpiimpl.h"
-#include "mpir_bsend.h"
+#include "mpii_bsend.h"
 #include "bsendutil.h"
 
 /*
@@ -13,7 +13,7 @@
  * By storing total_size along with "size available for messages", we
  * avoid any complexities associated with alignment, since we must
  * ensure that each KPIR_Bsend_data_t structure is properly aligned
- * (i.e., we can't simply do (sizeof(MPIR_Bsend_data_t) + size) to get
+ * (i.e., we can't simply do (sizeof(MPII_Bsend_data_t) + size) to get
  * total_size).
  *
  * Function Summary
@@ -21,7 +21,7 @@
  *   MPIR_Bsend_detach - Performs the work of MPI_Buffer_detach
  *   MPIR_Bsend_isend  - Essentially performs an MPI_Ibsend.  Returns
  *                an MPIR_Request that is also stored internally in the
- *                corresponding MPIR_Bsend_data_t entry
+ *                corresponding MPII_Bsend_data_t entry
  *   MPIR_Bsend_free_segment - Free a buffer that is no longer needed,
  *                merging with adjacent segments
  *   MPIR_Bsend_check_active - Check for completion of any pending sends
@@ -36,14 +36,14 @@
  *   MPIR_Bsend_take_buffer - Find and acquire a buffer for a message
  *   MPIR_Bsend_finalize - Finalize handler when Bsend routines are used 
  *   MPIR_Bsend_dump - Debugging routine to print the contents of the control
- *                information in the bsend buffer (the MPIR_Bsend_data_t entries)
+ *                information in the bsend buffer (the MPII_Bsend_data_t entries)
  */
 
 #ifdef MPL_USE_DBG_LOGGING
 static void MPIR_Bsend_dump( void );
 #endif
 
-#define BSENDDATA_HEADER_TRUE_SIZE (sizeof(MPIR_Bsend_data_t) - sizeof(double))
+#define BSENDDATA_HEADER_TRUE_SIZE (sizeof(MPII_Bsend_data_t) - sizeof(double))
 
 /* BsendBuffer is the structure that describes the overall Bsend buffer */
 /* 
@@ -62,13 +62,13 @@ static struct BsendBuffer {
 					  the user */
     size_t             origbuffer_size;/* Size of the buffer as provided 
 					    by the user */
-    MPIR_Bsend_data_t  *avail;         /* Pointer to the first available block
+    MPII_Bsend_data_t  *avail;         /* Pointer to the first available block
 					  of space */
-    MPIR_Bsend_data_t  *pending;       /* Pointer to the first message that
+    MPII_Bsend_data_t  *pending;       /* Pointer to the first message that
 					  could not be sent because of a 
 					  resource limit (e.g., no requests
 					  available) */
-    MPIR_Bsend_data_t  *active;        /* Pointer to the first active (sending)
+    MPII_Bsend_data_t  *active;        /* Pointer to the first active (sending)
 					  message */
 } BsendBuffer = { 0, 0, 0, 0, 0, 0, 0 };
 
@@ -78,10 +78,10 @@ static int initialized = 0;   /* keep track of the first call to any
 /* Forward references */
 static void MPIR_Bsend_retry_pending( void );
 static int MPIR_Bsend_check_active ( void );
-static MPIR_Bsend_data_t *MPIR_Bsend_find_buffer( int );
-static void MPIR_Bsend_take_buffer( MPIR_Bsend_data_t *, int );
+static MPII_Bsend_data_t *MPIR_Bsend_find_buffer( int );
+static void MPIR_Bsend_take_buffer( MPII_Bsend_data_t *, int );
 static int MPIR_Bsend_finalize( void * );
-static void MPIR_Bsend_free_segment( MPIR_Bsend_data_t * );
+static void MPIR_Bsend_free_segment( MPII_Bsend_data_t * );
 
 /*
  * Attach a buffer.  This checks for the error conditions and then
@@ -93,7 +93,7 @@ static void MPIR_Bsend_free_segment( MPIR_Bsend_data_t * );
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Bsend_attach( void *buffer, int buffer_size )
 {
-    MPIR_Bsend_data_t *p;
+    MPII_Bsend_data_t *p;
     size_t offset, align_sz;
 
 #   ifdef HAVE_ERROR_CHECKING
@@ -147,7 +147,7 @@ int MPIR_Bsend_attach( void *buffer, int buffer_size )
     BsendBuffer.active		= 0;
 
     /* Set the first block */
-    p		  = (MPIR_Bsend_data_t *)buffer;
+    p		  = (MPII_Bsend_data_t *)buffer;
     p->size	  = buffer_size - BSENDDATA_HEADER_TRUE_SIZE;
     p->total_size = buffer_size;
     p->next	  = p->prev = NULL;
@@ -175,7 +175,7 @@ int MPIR_Bsend_detach( void *bufferp, int *size )
     }
     if (BsendBuffer.active) {
 	/* Loop through each active element and wait on it */
-	MPIR_Bsend_data_t *p = BsendBuffer.active;
+	MPII_Bsend_data_t *p = BsendBuffer.active;
 
 	while (p) {
 	    MPI_Request r = p->request->handle;
@@ -209,11 +209,11 @@ int MPIR_Bsend_detach( void *bufferp, int *size )
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIR_Bsend_isend(const void *buf, int count, MPI_Datatype dtype,
                      int dest, int tag, MPIR_Comm *comm_ptr,
-                     MPIR_Bsend_kind_t kind, MPIR_Request **request )
+                     MPII_Bsend_kind_t kind, MPIR_Request **request )
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIR_Bsend_data_t *p;
-    MPIR_Bsend_msg_t *msg;
+    MPII_Bsend_data_t *p;
+    MPII_Bsend_msg_t *msg;
     MPI_Aint packsize;
     int pass;
 
@@ -222,7 +222,7 @@ int MPIR_Bsend_isend(const void *buf, int count, MPI_Datatype dtype,
        no copying.
 
        We may want to decide here whether we need to pack at all 
-       or if we can just use (a MPIU_Memcpy) of the buffer.
+       or if we can just use (a MPIR_Memcpy) of the buffer.
     */
 
 
@@ -256,7 +256,7 @@ int MPIR_Bsend_isend(const void *buf, int count, MPI_Datatype dtype,
 	    /* Pack the data into the buffer */
 	    /* We may want to optimize for the special case of
 	       either primative or contiguous types, and just
-	       use MPIU_Memcpy and the provided datatype */
+	       use MPIR_Memcpy and the provided datatype */
 	    msg->count = 0;
             if (dtype != MPI_PACKED)
             {
@@ -265,7 +265,7 @@ int MPIR_Bsend_isend(const void *buf, int count, MPI_Datatype dtype,
             }
             else
             {
-                MPIU_Memcpy(p->msg.msgbuf, buf, count);
+                MPIR_Memcpy(p->msg.msgbuf, buf, count);
                 p->msg.count = count;
             }
 	    /* Try to send the message.  We must use MPID_Isend
@@ -329,7 +329,7 @@ int MPIR_Bsend_isend(const void *buf, int count, MPI_Datatype dtype,
 int MPIR_Bsend_free_req_seg( MPIR_Request* req )
 {
     int mpi_errno = MPI_ERR_INTERN;
-    MPIR_Bsend_data_t *active = BsendBuffer.active;
+    MPII_Bsend_data_t *active = BsendBuffer.active;
 
     MPL_DBG_MSG_P(MPIR_DBG_BSEND,TYPICAL,"Checking active starting at %p", active);
     while (active) {
@@ -364,9 +364,9 @@ int MPIR_Bsend_free_req_seg( MPIR_Request* req )
 #define FUNCNAME MPIR_Bsend_free_segment
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-static void MPIR_Bsend_free_segment( MPIR_Bsend_data_t *p )
+static void MPIR_Bsend_free_segment( MPII_Bsend_data_t *p )
 {
-    MPIR_Bsend_data_t *prev = p->prev, *avail = BsendBuffer.avail, *avail_prev;
+    MPII_Bsend_data_t *prev = p->prev, *avail = BsendBuffer.avail, *avail_prev;
 
     MPL_DBG_MSG_FMT(MPIR_DBG_BSEND,TYPICAL,(MPL_DBG_FDEST,
                  "Freeing bsend segment at %p of size %llu, next at %p",
@@ -461,7 +461,7 @@ static void MPIR_Bsend_free_segment( MPIR_Bsend_data_t *p )
 static int MPIR_Bsend_check_active( void )
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIR_Bsend_data_t *active = BsendBuffer.active, *next_active;
+    MPII_Bsend_data_t *active = BsendBuffer.active, *next_active;
 
     MPL_DBG_MSG_P(MPIR_DBG_BSEND,TYPICAL,"Checking active starting at %p", active);
     while (active) {
@@ -479,7 +479,7 @@ static int MPIR_Bsend_check_active( void )
 	       the request (it is a grequest, the free call will do it) */
 	    flag = 0;
             /* XXX DJG FIXME-MT should we be checking this? */
-	    if (MPIU_Object_get_ref(active->request) == 1) {
+	    if (MPIR_Object_get_ref(active->request) == 1) {
 		mpi_errno = MPIR_Test_impl(&r, &flag, MPI_STATUS_IGNORE );
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 	    } else {
@@ -516,7 +516,7 @@ static int MPIR_Bsend_check_active( void )
  */
 static void MPIR_Bsend_retry_pending( void )
 {
-    MPIR_Bsend_data_t *pending = BsendBuffer.pending, *next_pending;
+    MPII_Bsend_data_t *pending = BsendBuffer.pending, *next_pending;
 
     while (pending) {
 	next_pending = pending->next;
@@ -530,9 +530,9 @@ static void MPIR_Bsend_retry_pending( void )
  * Find a slot in the avail buffer that can hold size bytes.  Does *not*
  * remove the slot from the avail buffer (see MPIR_Bsend_take_buffer) 
  */
-static MPIR_Bsend_data_t *MPIR_Bsend_find_buffer( int size )
+static MPII_Bsend_data_t *MPIR_Bsend_find_buffer( int size )
 {
-    MPIR_Bsend_data_t *p = BsendBuffer.avail;
+    MPII_Bsend_data_t *p = BsendBuffer.avail;
 
     while (p) {
 	if (p->size >= size) { 
@@ -552,9 +552,9 @@ static MPIR_Bsend_data_t *MPIR_Bsend_find_buffer( int size )
  * If there isn't enough left of p, remove the entire segment from
  * the avail list.
  */
-static void MPIR_Bsend_take_buffer( MPIR_Bsend_data_t *p, int size  )
+static void MPIR_Bsend_take_buffer( MPII_Bsend_data_t *p, int size  )
 {
-    MPIR_Bsend_data_t *prev;
+    MPII_Bsend_data_t *prev;
     int         alloc_size;
 
     /* Compute the remaining size.  This must include any padding 
@@ -573,10 +573,10 @@ static void MPIR_Bsend_take_buffer( MPIR_Bsend_data_t *p, int size  )
     if (alloc_size + (int)BSENDDATA_HEADER_TRUE_SIZE + MIN_BUFFER_BLOCK <= p->size) {
 	/* Yes, the available space (p->size) is large enough to 
 	   carve out a new block */
-	MPIR_Bsend_data_t *newp;
+	MPII_Bsend_data_t *newp;
 	
 	MPL_DBG_MSG_P(MPIR_DBG_BSEND,TYPICAL,"Breaking block into used and allocated at %p", p );
-	newp = (MPIR_Bsend_data_t *)( (char *)p + BSENDDATA_HEADER_TRUE_SIZE + 
+	newp = (MPII_Bsend_data_t *)( (char *)p + BSENDDATA_HEADER_TRUE_SIZE +
 				alloc_size );
 	newp->total_size = p->total_size - alloc_size - 
 	    BSENDDATA_HEADER_TRUE_SIZE;
@@ -645,7 +645,7 @@ static int MPIR_Bsend_finalize( void *p ATTRIBUTE((unused)) )
 #ifdef MPL_USE_DBG_LOGGING
 static void MPIR_Bsend_dump( void )
 {
-    MPIR_Bsend_data_t *a = BsendBuffer.avail;
+    MPII_Bsend_data_t *a = BsendBuffer.avail;
 
     MPL_DBG_MSG_D(MPIR_DBG_BSEND, TYPICAL, "Total size is %llu",
                    (unsigned long long) BsendBuffer.buffer_size);
diff --git a/src/mpi/pt2pt/bsendutil.h b/src/mpi/pt2pt/bsendutil.h
index 50fff28..124cc79 100644
--- a/src/mpi/pt2pt/bsendutil.h
+++ b/src/mpi/pt2pt/bsendutil.h
@@ -5,12 +5,12 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-#include "mpir_bsend.h"
+#include "mpii_bsend.h"
 
 /* Function Prototypes for the bsend utility functions */
 int MPIR_Bsend_attach( void *, int );
 int MPIR_Bsend_detach( void *, int * );
 int MPIR_Bsend_isend(const void *, int, MPI_Datatype, int, int, MPIR_Comm *,
-                     MPIR_Bsend_kind_t, MPIR_Request ** );
+                     MPII_Bsend_kind_t, MPIR_Request ** );
 int MPIR_Bsend_free_req_seg(MPIR_Request * );
 
diff --git a/src/mpi/pt2pt/bufattach.c b/src/mpi/pt2pt/bufattach.c
index 37529c0..e32ac68 100644
--- a/src/mpi/pt2pt/bufattach.c
+++ b/src/mpi/pt2pt/bufattach.c
@@ -78,12 +78,12 @@ int MPI_Buffer_attach(void *buffer, int size)
 {
     static const char FCNAME[] = "MPI_Buffer_attach";
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_BUFFER_ATTACH);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_BUFFER_ATTACH);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_BUFFER_ATTACH);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_BUFFER_ATTACH);
     
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -103,7 +103,7 @@ int MPI_Buffer_attach(void *buffer, int size)
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_BUFFER_ATTACH);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_BUFFER_ATTACH);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/buffree.c b/src/mpi/pt2pt/buffree.c
index 7332b25..08650bb 100644
--- a/src/mpi/pt2pt/buffree.c
+++ b/src/mpi/pt2pt/buffree.c
@@ -95,12 +95,12 @@ int MPI_Buffer_detach(void *buffer_addr, int *size)
 {
     static const char FCNAME[] = "MPI_Buffer_detach";
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_BUFFER_DETACH);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_BUFFER_DETACH);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_BUFFER_DETACH);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_BUFFER_DETACH);
     
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -120,7 +120,7 @@ int MPI_Buffer_detach(void *buffer_addr, int *size)
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_BUFFER_DETACH);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_BUFFER_DETACH);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/cancel.c b/src/mpi/pt2pt/cancel.c
index 8ceb998..0a1058a 100644
--- a/src/mpi/pt2pt/cancel.c
+++ b/src/mpi/pt2pt/cancel.c
@@ -160,12 +160,12 @@ int MPI_Cancel(MPI_Request *request)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request * request_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_CANCEL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_CANCEL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_CANCEL);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_CANCEL);
     
     /* Convert MPI object handles to object pointers */
     MPIR_Request_get_ptr( *request, request_ptr );
@@ -191,7 +191,7 @@ int MPI_Cancel(MPI_Request *request)
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_CANCEL);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_CANCEL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/greq_complete.c b/src/mpi/pt2pt/greq_complete.c
index 19749c7..d6e018e 100644
--- a/src/mpi/pt2pt/greq_complete.c
+++ b/src/mpi/pt2pt/greq_complete.c
@@ -68,12 +68,12 @@ int MPI_Grequest_complete( MPI_Request request )
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *request_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GREQUEST_COMPLETE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GREQUEST_COMPLETE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GREQUEST_COMPLETE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GREQUEST_COMPLETE);
     
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -114,7 +114,7 @@ int MPI_Grequest_complete( MPI_Request request )
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GREQUEST_COMPLETE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GREQUEST_COMPLETE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/pt2pt/greq_start.c b/src/mpi/pt2pt/greq_start.c
index ed9a159..e7a1d95 100644
--- a/src/mpi/pt2pt/greq_start.c
+++ b/src/mpi/pt2pt/greq_start.c
@@ -36,7 +36,7 @@ PMPI_LOCAL int MPIR_Grequest_free_classes_on_finalize(void *extra_data);
 
 MPIR_Grequest_class MPIR_Grequest_class_direct[MPIR_GREQ_CLASS_PREALLOC] =
                                               { {0} };
-MPIU_Object_alloc_t MPIR_Grequest_class_mem = {0, 0, 0, 0, MPIR_GREQ_CLASS,
+MPIR_Object_alloc_t MPIR_Grequest_class_mem = {0, 0, 0, 0, MPIR_GREQ_CLASS,
 	                                       sizeof(MPIR_Grequest_class),
 					       MPIR_Grequest_class_direct,
 					       MPIR_GREQ_CLASS_PREALLOC, };
@@ -64,7 +64,7 @@ PMPI_LOCAL int MPIR_Grequest_free_classes_on_finalize(void *extra_data ATTRIBUTE
     while (cur) {
         last = cur;
         cur = last->next;
-        MPIU_Handle_obj_free(&MPIR_Grequest_class_mem, last);
+        MPIR_Handle_obj_free(&MPIR_Grequest_class_mem, last);
     }
 
     return mpi_errno;
@@ -80,18 +80,18 @@ int MPIR_Grequest_start_impl(MPI_Grequest_query_function *query_fn,
                              void *extra_state, MPIR_Request **request_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_DECL(1);
 
     /* MT FIXME this routine is not thread-safe in the non-global case */
     
     *request_ptr = MPIR_Request_create(MPIR_REQUEST_KIND__GREQUEST);
     MPIR_ERR_CHKANDJUMP1(request_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "generalized request");
     
-    MPIU_Object_set_ref( *request_ptr, 1 );
+    MPIR_Object_set_ref( *request_ptr, 1 );
     (*request_ptr)->cc_ptr               = &(*request_ptr)->cc;
     MPIR_cc_set((*request_ptr)->cc_ptr, 1);
     (*request_ptr)->comm                 = NULL;
-    MPIU_CHKPMEM_MALLOC((*request_ptr)->u.ureq.greq_fns, struct MPIR_Grequest_fns *, sizeof(struct MPIR_Grequest_fns), mpi_errno, "greq_fns");
+    MPIR_CHKPMEM_MALLOC((*request_ptr)->u.ureq.greq_fns, struct MPIR_Grequest_fns *, sizeof(struct MPIR_Grequest_fns), mpi_errno, "greq_fns");
     (*request_ptr)->u.ureq.greq_fns->cancel_fn            = cancel_fn;
     (*request_ptr)->u.ureq.greq_fns->free_fn              = free_fn;
     (*request_ptr)->u.ureq.greq_fns->query_fn             = query_fn;
@@ -105,18 +105,18 @@ int MPIR_Grequest_start_impl(MPI_Grequest_query_function *query_fn,
      * we test or wait on it. */
     MPIR_Request_add_ref((*request_ptr));
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit:
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
 
 #else
 extern MPIR_Grequest_class MPIR_Grequest_class_direct[];
-extern MPIU_Object_alloc_t MPIR_Grequest_class_mem;
+extern MPIR_Object_alloc_t MPIR_Grequest_class_mem;
 extern int MPIR_Grequest_registered_finalizer;
 extern MPIR_Grequest_class *MPIR_Grequest_class_list;
 #endif
@@ -178,12 +178,12 @@ int MPI_Grequest_start( MPI_Grequest_query_function *query_fn,
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *request_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GREQUEST_START);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GREQUEST_START);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GREQUEST_START);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GREQUEST_START);
 
     /* Validate parameters if error checking is enabled */
 #   ifdef HAVE_ERROR_CHECKING
@@ -206,7 +206,7 @@ int MPI_Grequest_start( MPI_Grequest_query_function *query_fn,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GREQUEST_START);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GREQUEST_START);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
@@ -266,7 +266,7 @@ int MPIX_Grequest_class_create(MPI_Grequest_query_function *query_fn,
 	int mpi_errno = MPI_SUCCESS;
 
 	class_ptr = (MPIR_Grequest_class *)
-		MPIU_Handle_obj_alloc(&MPIR_Grequest_class_mem);
+		MPIR_Handle_obj_alloc(&MPIR_Grequest_class_mem);
         /* --BEGIN ERROR HANDLING-- */
 	if (!class_ptr)
 	{
@@ -284,7 +284,7 @@ int MPIX_Grequest_class_create(MPI_Grequest_query_function *query_fn,
 	class_ptr->poll_fn = poll_fn;
 	class_ptr->wait_fn = wait_fn;
 
-	MPIU_Object_set_ref(class_ptr, 1);
+	MPIR_Object_set_ref(class_ptr, 1);
 
         if (MPIR_Grequest_class_list == NULL) {
             class_ptr->next = NULL;
diff --git a/src/mpi/pt2pt/ibsend.c b/src/mpi/pt2pt/ibsend.c
index 77cbfde..803716d 100644
--- a/src/mpi/pt2pt/ibsend.c
+++ b/src/mpi/pt2pt/ibsend.c
@@ -74,7 +74,7 @@ PMPI_LOCAL int MPIR_Ibsend_cancel( void *extra, int complete )
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     mpi_errno = MPIR_Wait_impl( &req_hdl, &status );
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIR_Test_cancelled_impl( &status, &ibsend_info->cancelled );
+    ibsend_info->cancelled = MPIR_STATUS_GET_CANCEL_BIT(status);
 
     /* If the cancelation is successful, free the memory in the
        attached buffer used by the request */
@@ -105,7 +105,7 @@ int MPIR_Ibsend_impl(const void *buf, int count, MPI_Datatype datatype, int dest
     mpi_errno = MPIR_Bsend_isend( buf, count, datatype, dest, tag, comm_ptr,
 				  IBSEND, &request_ptr );
     if (mpi_errno != MPI_SUCCESS) goto fn_fail;
-    MPIR_SENDQ_REMEMBER(request_ptr, dest, tag, comm_ptr->context_id);
+    MPII_SENDQ_REMEMBER(request_ptr, dest, tag, comm_ptr->context_id);
 
     /* FIXME: use the memory management macros */
     ibinfo = (ibsend_req_info *)MPL_malloc( sizeof(ibsend_req_info) );
@@ -165,12 +165,12 @@ int MPI_Ibsend(const void *buf, int count, MPI_Datatype datatype, int dest, int
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IBSEND);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IBSEND);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER_FRONT(MPID_STATE_MPI_IBSEND);
+    MPIR_FUNC_TERSE_PT2PT_ENTER_FRONT(MPID_STATE_MPI_IBSEND);
 
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -231,7 +231,7 @@ int MPI_Ibsend(const void *buf, int count, MPI_Datatype datatype, int dest, int
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_IBSEND);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_IBSEND);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/pt2pt/improbe.c b/src/mpi/pt2pt/improbe.c
index 5d0320d..51012ed 100644
--- a/src/mpi/pt2pt/improbe.c
+++ b/src/mpi/pt2pt/improbe.c
@@ -57,10 +57,10 @@ int MPI_Improbe(int source, int tag, MPI_Comm comm, int *flag, MPI_Message *mess
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *msgp = NULL;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IMPROBE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IMPROBE);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IMPROBE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IMPROBE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -101,7 +101,7 @@ int MPI_Improbe(int source, int tag, MPI_Comm comm, int *flag, MPI_Message *mess
 
     if (*flag) {
 	if (msgp == NULL) {
-	    MPIU_Assert(source == MPI_PROC_NULL);
+	    MPIR_Assert(source == MPI_PROC_NULL);
 	    *message = MPI_MESSAGE_NO_PROC;
 	}
 	else {
@@ -112,7 +112,7 @@ int MPI_Improbe(int source, int tag, MPI_Comm comm, int *flag, MPI_Message *mess
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IMPROBE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IMPROBE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/imrecv.c b/src/mpi/pt2pt/imrecv.c
index e5955e5..2ae5cc7 100644
--- a/src/mpi/pt2pt/imrecv.c
+++ b/src/mpi/pt2pt/imrecv.c
@@ -58,10 +58,10 @@ int MPI_Imrecv(void *buf, int count, MPI_Datatype datatype, MPI_Message *message
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *rreq = NULL;
     MPIR_Request *msgp = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IMRECV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IMRECV);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_IMRECV);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IMRECV);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -113,14 +113,14 @@ int MPI_Imrecv(void *buf, int count, MPI_Datatype datatype, MPI_Message *message
     mpi_errno = MPID_Imrecv(buf, count, datatype, msgp, &rreq);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(rreq != NULL);
+    MPIR_Assert(rreq != NULL);
     *request = rreq->handle;
     *message = MPI_MESSAGE_NULL;
 
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_IMRECV);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IMRECV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/iprobe.c b/src/mpi/pt2pt/iprobe.c
index 53a2641..94a6029 100644
--- a/src/mpi/pt2pt/iprobe.c
+++ b/src/mpi/pt2pt/iprobe.c
@@ -60,12 +60,12 @@ int MPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag,
     static const char FCNAME[] = "MPI_Iprobe";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IPROBE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IPROBE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_IPROBE);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_IPROBE);
     
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -110,7 +110,7 @@ int MPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_IPROBE);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_IPROBE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/irecv.c b/src/mpi/pt2pt/irecv.c
index 72d9a6f..1980d23 100644
--- a/src/mpi/pt2pt/irecv.c
+++ b/src/mpi/pt2pt/irecv.c
@@ -65,12 +65,12 @@ int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Request *request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IRECV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IRECV);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER_BACK(MPID_STATE_MPI_IRECV);
+    MPIR_FUNC_TERSE_PT2PT_ENTER_BACK(MPID_STATE_MPI_IRECV);
 
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -141,7 +141,7 @@ int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT_BACK(MPID_STATE_MPI_IRECV);
+    MPIR_FUNC_TERSE_PT2PT_EXIT_BACK(MPID_STATE_MPI_IRECV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/irsend.c b/src/mpi/pt2pt/irsend.c
index ce19356..6e9ded7 100644
--- a/src/mpi/pt2pt/irsend.c
+++ b/src/mpi/pt2pt/irsend.c
@@ -66,12 +66,12 @@ int MPI_Irsend(const void *buf, int count, MPI_Datatype datatype, int dest, int
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Request *request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_IRSEND);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IRSEND);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER_FRONT(MPID_STATE_MPI_IRSEND);
+    MPIR_FUNC_TERSE_PT2PT_ENTER_FRONT(MPID_STATE_MPI_IRSEND);
 
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -127,7 +127,7 @@ int MPI_Irsend(const void *buf, int count, MPI_Datatype datatype, int dest, int
     mpi_errno = MPID_Irsend(buf, count, datatype, dest, tag, comm_ptr,
 			    MPIR_CONTEXT_INTRA_PT2PT, &request_ptr);
     if (mpi_errno != MPI_SUCCESS) goto fn_fail;
-    MPIR_SENDQ_REMEMBER(request_ptr,dest,tag,comm_ptr->context_id);
+    MPII_SENDQ_REMEMBER(request_ptr,dest,tag,comm_ptr->context_id);
 	
     /* return the handle of the request to the user */
     /* MPIU_OBJ_HANDLE_PUBLISH is unnecessary for irsend, lower-level access is
@@ -138,7 +138,7 @@ int MPI_Irsend(const void *buf, int count, MPI_Datatype datatype, int dest, int
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_IRSEND);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_IRSEND);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/pt2pt/isend.c b/src/mpi/pt2pt/isend.c
index e41adca..0ca0608 100644
--- a/src/mpi/pt2pt/isend.c
+++ b/src/mpi/pt2pt/isend.c
@@ -64,12 +64,12 @@ int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int t
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Request *request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ISEND);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ISEND);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER_FRONT(MPID_STATE_MPI_ISEND);
+    MPIR_FUNC_TERSE_PT2PT_ENTER_FRONT(MPID_STATE_MPI_ISEND);
 
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -126,7 +126,7 @@ int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int t
 			   MPIR_CONTEXT_INTRA_PT2PT, &request_ptr);
     if (mpi_errno != MPI_SUCCESS) goto fn_fail;
 
-    MPIR_SENDQ_REMEMBER(request_ptr,dest,tag,comm_ptr->context_id);
+    MPII_SENDQ_REMEMBER(request_ptr,dest,tag,comm_ptr->context_id);
 
     /* return the handle of the request to the user */
     /* MPIU_OBJ_HANDLE_PUBLISH is unnecessary for isend, lower-level access is
@@ -137,7 +137,7 @@ int MPI_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int t
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_ISEND);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_ISEND);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/pt2pt/issend.c b/src/mpi/pt2pt/issend.c
index f22b216..9a5630c 100644
--- a/src/mpi/pt2pt/issend.c
+++ b/src/mpi/pt2pt/issend.c
@@ -65,12 +65,12 @@ int MPI_Issend(const void *buf, int count, MPI_Datatype datatype, int dest, int
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Request *request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ISSEND);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ISSEND);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER_FRONT(MPID_STATE_MPI_ISSEND);
+    MPIR_FUNC_TERSE_PT2PT_ENTER_FRONT(MPID_STATE_MPI_ISSEND);
 
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -126,7 +126,7 @@ int MPI_Issend(const void *buf, int count, MPI_Datatype datatype, int dest, int
     mpi_errno = MPID_Issend(buf, count, datatype, dest, tag, comm_ptr,
 			    MPIR_CONTEXT_INTRA_PT2PT, &request_ptr);
     if (mpi_errno != MPI_SUCCESS) goto fn_fail;
-    MPIR_SENDQ_REMEMBER(request_ptr,dest,tag,comm_ptr->context_id);
+    MPII_SENDQ_REMEMBER(request_ptr,dest,tag,comm_ptr->context_id);
 
     /* return the handle of the request to the user */
     /* MPIU_OBJ_HANDLE_PUBLISH is unnecessary for issend, lower-level access is
@@ -137,7 +137,7 @@ int MPI_Issend(const void *buf, int count, MPI_Datatype datatype, int dest, int
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_ISSEND);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_ISSEND);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/mpir_request.c b/src/mpi/pt2pt/mpir_request.c
index 511f14a..d61cd1c 100644
--- a/src/mpi/pt2pt/mpir_request.c
+++ b/src/mpi/pt2pt/mpir_request.c
@@ -10,7 +10,7 @@
 /* style:PMPIuse:PMPI_Status_f2c:2 sig:0 */
 
 MPIR_Request MPIR_Request_direct[MPIR_REQUEST_PREALLOC] = {{0}};
-MPIU_Object_alloc_t MPIR_Request_mem = {
+MPIR_Object_alloc_t MPIR_Request_mem = {
     0, 0, 0, 0, MPIR_REQUEST, sizeof(MPIR_Request), MPIR_Request_direct,
     MPIR_REQUEST_PREALLOC };
 
@@ -61,8 +61,8 @@ fn_exit:
    was inactive and did not require any extra completion operation.
 
    If debugger information is being provided for pending (user-initiated) 
-   send operations, the macros MPIR_SENDQ_FORGET will be defined to 
-   call the routine MPIR_Sendq_forget; otherwise that macro will be a no-op.
+   send operations, the macros MPII_SENDQ_FORGET will be defined to
+   call the routine MPII_Sendq_forget; otherwise that macro will be a no-op.
    The implementation of the MPIR_Sendq_xxx is in src/mpi/debugger/dbginit.c .
 */
 int MPIR_Request_complete(MPI_Request * request, MPIR_Request * request_ptr,
@@ -80,7 +80,7 @@ int MPIR_Request_complete(MPI_Request * request, MPIR_Request * request_ptr,
 		MPIR_STATUS_SET_CANCEL_BIT(*status, MPIR_STATUS_GET_CANCEL_BIT(request_ptr->status));
 	    }
 	    mpi_errno = request_ptr->status.MPI_ERROR;
-	    MPIR_SENDQ_FORGET(request_ptr);
+	    MPII_SENDQ_FORGET(request_ptr);
 	    MPIR_Request_free(request_ptr);
             if (NULL != request) *request = MPI_REQUEST_NULL;
 	    break;
@@ -372,7 +372,7 @@ int MPIR_Request_get_error(MPIR_Request * request_ptr)
 
 #ifdef HAVE_FORTRAN_BINDING
 /* Set the language type to Fortran for this (generalized) request */
-void MPIR_Grequest_set_lang_f77( MPI_Request greq )
+void MPII_Grequest_set_lang_f77( MPI_Request greq )
 {
     MPIR_Request *greq_ptr;
 
@@ -549,9 +549,9 @@ int MPIR_Grequest_progress_poke(int count,
     void ** state_ptrs;
     int i, j, n_classes, n_native, n_greq;
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
-    MPIU_CHKLMEM_MALLOC(state_ptrs, void **, sizeof(void*) * count, mpi_errno, "state_ptrs");
+    MPIR_CHKLMEM_MALLOC(state_ptrs, void **, sizeof(void*) * count, mpi_errno, "state_ptrs");
 
     /* This somewhat messy for-loop computes how many requests are native
      * requests and how many are generalized requests, and how many generalized
@@ -592,7 +592,7 @@ int MPIR_Grequest_progress_poke(int count,
 	}
     }
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -611,9 +611,9 @@ int MPIR_Grequest_waitall(int count, MPIR_Request * const * request_ptrs)
     int i;
     int mpi_error = MPI_SUCCESS;
     MPID_Progress_state progress_state;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
-    MPIU_CHKLMEM_MALLOC(state_ptrs, void *, sizeof(void*)*count, mpi_error, "state_ptrs");
+    MPIR_CHKLMEM_MALLOC(state_ptrs, void *, sizeof(void*)*count, mpi_error, "state_ptrs");
     
         /* DISABLED CODE: The greq wait_fn function returns when ANY
            of the requests completes, rather than all.  Also, once a
@@ -682,7 +682,7 @@ int MPIR_Grequest_waitall(int count, MPIR_Request * const * request_ptrs)
 
         mpi_error = (request_ptrs[i]->u.ureq.greq_fns->wait_fn)(1, &request_ptrs[i]->u.ureq.greq_fns->grequest_extra_state, 0, NULL);
         if (mpi_error) MPIR_ERR_POP(mpi_error);
-        MPIU_Assert(MPIR_Request_is_complete(request_ptrs[i]));
+        MPIR_Assert(MPIR_Request_is_complete(request_ptrs[i]));
     }
 
     MPID_Progress_start(&progress_state);
@@ -716,7 +716,7 @@ int MPIR_Grequest_waitall(int count, MPIR_Request * const * request_ptrs)
 #endif
 
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_error;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpi/pt2pt/mprobe.c b/src/mpi/pt2pt/mprobe.c
index 6765dc5..526030f 100644
--- a/src/mpi/pt2pt/mprobe.c
+++ b/src/mpi/pt2pt/mprobe.c
@@ -55,10 +55,10 @@ int MPI_Mprobe(int source, int tag, MPI_Comm comm, MPI_Message *message, MPI_Sta
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *msgp = NULL;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_MPROBE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_MPROBE);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_MPROBE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_MPROBE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -96,7 +96,7 @@ int MPI_Mprobe(int source, int tag, MPI_Comm comm, MPI_Message *message, MPI_Sta
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
     if (msgp == NULL) {
-	MPIU_Assert(source == MPI_PROC_NULL);
+	MPIR_Assert(source == MPI_PROC_NULL);
 	*message = MPI_MESSAGE_NO_PROC;
     }
     else {
@@ -106,7 +106,7 @@ int MPI_Mprobe(int source, int tag, MPI_Comm comm, MPI_Message *message, MPI_Sta
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_MPROBE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_MPROBE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/mrecv.c b/src/mpi/pt2pt/mrecv.c
index 64ca570..4bd8b3e 100644
--- a/src/mpi/pt2pt/mrecv.c
+++ b/src/mpi/pt2pt/mrecv.c
@@ -57,10 +57,10 @@ int MPI_Mrecv(void *buf, int count, MPI_Datatype datatype, MPI_Message *message,
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *msgp = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_MRECV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_MRECV);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_MRECV);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_MRECV);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -116,7 +116,7 @@ int MPI_Mrecv(void *buf, int count, MPI_Datatype datatype, MPI_Message *message,
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_MRECV);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_MRECV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/probe.c b/src/mpi/pt2pt/probe.c
index 114d40f..f0a9df4 100644
--- a/src/mpi/pt2pt/probe.c
+++ b/src/mpi/pt2pt/probe.c
@@ -56,12 +56,12 @@ int MPI_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status)
     static const char FCNAME[] = "MPI_Probe";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_PROBE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_PROBE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_PROBE);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_PROBE);
     
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -103,7 +103,7 @@ int MPI_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_PROBE);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_PROBE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/recv.c b/src/mpi/pt2pt/recv.c
index 3c4bbde..dd774f3 100644
--- a/src/mpi/pt2pt/recv.c
+++ b/src/mpi/pt2pt/recv.c
@@ -71,12 +71,12 @@ int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Request * request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_RECV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_RECV);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER_BACK(MPID_STATE_MPI_RECV);
+    MPIR_FUNC_TERSE_PT2PT_ENTER_BACK(MPID_STATE_MPI_RECV);
     
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -188,7 +188,7 @@ int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT_BACK(MPID_STATE_MPI_RECV);
+    MPIR_FUNC_TERSE_PT2PT_EXIT_BACK(MPID_STATE_MPI_RECV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/recv_init.c b/src/mpi/pt2pt/recv_init.c
index 14ac702..fe3b402 100644
--- a/src/mpi/pt2pt/recv_init.c
+++ b/src/mpi/pt2pt/recv_init.c
@@ -67,12 +67,12 @@ int MPI_Recv_init(void *buf, int count, MPI_Datatype datatype, int source,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Request *request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_RECV_INIT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_RECV_INIT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_RECV_INIT);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_RECV_INIT);
 
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -135,7 +135,7 @@ int MPI_Recv_init(void *buf, int count, MPI_Datatype datatype, int source,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_RECV_INIT);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_RECV_INIT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/request_free.c b/src/mpi/pt2pt/request_free.c
index 00a119a..5b3a770 100644
--- a/src/mpi/pt2pt/request_free.c
+++ b/src/mpi/pt2pt/request_free.c
@@ -68,12 +68,12 @@ int MPI_Request_free(MPI_Request *request)
     static const char FCNAME[] = "MPI_Request_free";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_REQUEST_FREE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_REQUEST_FREE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_REQUEST_FREE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_REQUEST_FREE);
     
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -111,7 +111,7 @@ int MPI_Request_free(MPI_Request *request)
     {
 	case MPIR_REQUEST_KIND__SEND:
 	{
-	    MPIR_SENDQ_FORGET(request_ptr);
+	    MPII_SENDQ_FORGET(request_ptr);
 	    break;
 	}
 	case MPIR_REQUEST_KIND__RECV:
@@ -173,7 +173,7 @@ int MPI_Request_free(MPI_Request *request)
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_REQUEST_FREE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_REQUEST_FREE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/request_get_status.c b/src/mpi/pt2pt/request_get_status.c
index 224cf64..89fd97e 100644
--- a/src/mpi/pt2pt/request_get_status.c
+++ b/src/mpi/pt2pt/request_get_status.c
@@ -58,12 +58,12 @@ int MPI_Request_get_status(MPI_Request request, int *flag, MPI_Status *status)
     static const char FCNAME[] = "MPI_Request_get_status";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_REQUEST_GET_STATUS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_REQUEST_GET_STATUS);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_REQUEST_GET_STATUS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_REQUEST_GET_STATUS);
 
     /* Check the arguments */
 #   ifdef HAVE_ERROR_CHECKING
@@ -245,7 +245,7 @@ int MPI_Request_get_status(MPI_Request request, int *flag, MPI_Status *status)
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_REQUEST_GET_STATUS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_REQUEST_GET_STATUS);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/rsend.c b/src/mpi/pt2pt/rsend.c
index d958ae9..daaabdd 100644
--- a/src/mpi/pt2pt/rsend.c
+++ b/src/mpi/pt2pt/rsend.c
@@ -62,12 +62,12 @@ int MPI_Rsend(const void *buf, int count, MPI_Datatype datatype, int dest, int t
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Request * request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_RSEND);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_RSEND);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER_FRONT(MPID_STATE_MPI_RSEND);
+    MPIR_FUNC_TERSE_PT2PT_ENTER_FRONT(MPID_STATE_MPI_RSEND);
     
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -157,7 +157,7 @@ int MPI_Rsend(const void *buf, int count, MPI_Datatype datatype, int dest, int t
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_RSEND);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_RSEND);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/pt2pt/rsend_init.c b/src/mpi/pt2pt/rsend_init.c
index 9ca7bcf..0cf2a13 100644
--- a/src/mpi/pt2pt/rsend_init.c
+++ b/src/mpi/pt2pt/rsend_init.c
@@ -67,12 +67,12 @@ int MPI_Rsend_init(const void *buf, int count, MPI_Datatype datatype, int dest,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Request *request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_RSEND_INIT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_RSEND_INIT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_RSEND_INIT);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_RSEND_INIT);
     
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -135,7 +135,7 @@ int MPI_Rsend_init(const void *buf, int count, MPI_Datatype datatype, int dest,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_RSEND_INIT);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_RSEND_INIT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/send.c b/src/mpi/pt2pt/send.c
index edb2a81..7b75c47 100644
--- a/src/mpi/pt2pt/send.c
+++ b/src/mpi/pt2pt/send.c
@@ -67,12 +67,12 @@ int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int ta
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Request * request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_SEND);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_SEND);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER_FRONT(MPID_STATE_MPI_SEND);
+    MPIR_FUNC_TERSE_PT2PT_ENTER_FRONT(MPID_STATE_MPI_SEND);
     
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -162,7 +162,7 @@ int MPI_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int ta
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_SEND);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_SEND);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/send_init.c b/src/mpi/pt2pt/send_init.c
index a718f14..31c34ed 100644
--- a/src/mpi/pt2pt/send_init.c
+++ b/src/mpi/pt2pt/send_init.c
@@ -67,12 +67,12 @@ int MPI_Send_init(const void *buf, int count, MPI_Datatype datatype, int dest,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Request *request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_SEND_INIT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_SEND_INIT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_SEND_INIT);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_SEND_INIT);
     
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -128,7 +128,7 @@ int MPI_Send_init(const void *buf, int count, MPI_Datatype datatype, int dest,
     mpi_errno = MPID_Send_init(buf, count, datatype, dest, tag, comm_ptr,
 			       MPIR_CONTEXT_INTRA_PT2PT, &request_ptr);
     if (mpi_errno != MPI_SUCCESS) goto fn_fail;
-    MPIR_SENDQ_REMEMBER(request_ptr, dest, tag, comm_ptr->context_id);
+    MPII_SENDQ_REMEMBER(request_ptr, dest, tag, comm_ptr->context_id);
     
     /* return the handle of the request to the user */
     MPIR_OBJ_PUBLISH_HANDLE(*request, request_ptr->handle);
@@ -136,7 +136,7 @@ int MPI_Send_init(const void *buf, int count, MPI_Datatype datatype, int dest,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_SEND_INIT);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_SEND_INIT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/sendrecv.c b/src/mpi/pt2pt/sendrecv.c
index bfa03df..2f4eb0e 100644
--- a/src/mpi/pt2pt/sendrecv.c
+++ b/src/mpi/pt2pt/sendrecv.c
@@ -78,12 +78,12 @@ int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Request * sreq;
     MPIR_Request * rreq;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_SENDRECV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_SENDRECV);
     
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER_BOTH(MPID_STATE_MPI_SENDRECV);
+    MPIR_FUNC_TERSE_PT2PT_ENTER_BOTH(MPID_STATE_MPI_SENDRECV);
     
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -225,7 +225,7 @@ int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT_BOTH(MPID_STATE_MPI_SENDRECV);
+    MPIR_FUNC_TERSE_PT2PT_EXIT_BOTH(MPID_STATE_MPI_SENDRECV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/sendrecv_rep.c b/src/mpi/pt2pt/sendrecv_rep.c
index 70ce78b..ac2dcbc 100644
--- a/src/mpi/pt2pt/sendrecv_rep.c
+++ b/src/mpi/pt2pt/sendrecv_rep.c
@@ -72,13 +72,13 @@ int MPI_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype,
     static const char FCNAME[] = "MPI_Sendrecv_replace";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPIU_CHKLMEM_DECL(1);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_SENDRECV_REPLACE);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_SENDRECV_REPLACE);
     
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER_BOTH(MPID_STATE_MPI_SENDRECV_REPLACE);
+    MPIR_FUNC_TERSE_PT2PT_ENTER_BOTH(MPID_STATE_MPI_SENDRECV_REPLACE);
 
     /* Convert handles to MPI objects. */
     MPIR_Comm_get_ptr(comm, comm_ptr);
@@ -147,7 +147,7 @@ int MPI_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype,
 	{
 	    MPIR_Pack_size_impl(count, datatype, &tmpbuf_size);
 
-	    MPIU_CHKLMEM_MALLOC_ORJUMP(tmpbuf, void *, tmpbuf_size, mpi_errno, "temporary send buffer");
+	    MPIR_CHKLMEM_MALLOC_ORJUMP(tmpbuf, void *, tmpbuf_size, mpi_errno, "temporary send buffer");
 
 	    mpi_errno = MPIR_Pack_impl(buf, count, datatype, tmpbuf, tmpbuf_size, &tmpbuf_count);
 	    if (mpi_errno != MPI_SUCCESS) goto fn_fail;
@@ -214,8 +214,8 @@ int MPI_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPID_MPI_PT2PT_FUNC_EXIT_BOTH(MPID_STATE_MPI_SENDRECV_REPLACE);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_TERSE_PT2PT_EXIT_BOTH(MPID_STATE_MPI_SENDRECV_REPLACE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/pt2pt/ssend.c b/src/mpi/pt2pt/ssend.c
index 38880a2..c6d5449 100644
--- a/src/mpi/pt2pt/ssend.c
+++ b/src/mpi/pt2pt/ssend.c
@@ -61,12 +61,12 @@ int MPI_Ssend(const void *buf, int count, MPI_Datatype datatype, int dest, int t
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Request * request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_SSEND);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_SSEND);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER_FRONT(MPID_STATE_MPI_SSEND);
+    MPIR_FUNC_TERSE_PT2PT_ENTER_FRONT(MPID_STATE_MPI_SSEND);
     
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -140,7 +140,7 @@ int MPI_Ssend(const void *buf, int count, MPI_Datatype datatype, int dest, int t
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_SSEND);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_SSEND);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/pt2pt/ssend_init.c b/src/mpi/pt2pt/ssend_init.c
index 4478117..b13260c 100644
--- a/src/mpi/pt2pt/ssend_init.c
+++ b/src/mpi/pt2pt/ssend_init.c
@@ -64,12 +64,12 @@ int MPI_Ssend_init(const void *buf, int count, MPI_Datatype datatype, int dest,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Request *request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_SSEND_INIT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_SSEND_INIT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_SSEND_INIT);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_SSEND_INIT);
     
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -132,7 +132,7 @@ int MPI_Ssend_init(const void *buf, int count, MPI_Datatype datatype, int dest,
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_SSEND_INIT);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_SSEND_INIT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/start.c b/src/mpi/pt2pt/start.c
index 0ffb5c8..094afdf 100644
--- a/src/mpi/pt2pt/start.c
+++ b/src/mpi/pt2pt/start.c
@@ -50,12 +50,12 @@ int MPI_Start(MPI_Request *request)
     static const char FCNAME[] = "MPI_Start";
     MPIR_Request * request_ptr = NULL;
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_START);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_START);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_START);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_START);
 
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -94,7 +94,7 @@ int MPI_Start(MPI_Request *request)
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_START);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_START);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/startall.c b/src/mpi/pt2pt/startall.c
index f970885..f8017c4 100644
--- a/src/mpi/pt2pt/startall.c
+++ b/src/mpi/pt2pt/startall.c
@@ -65,13 +65,13 @@ int MPI_Startall(int count, MPI_Request array_of_requests[])
     MPIR_Request ** request_ptrs = request_ptr_array;
     int i;
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKLMEM_DECL(1);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_STARTALL);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_STARTALL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_STARTALL);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_STARTALL);
 
     /* Validate handle parameters needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -92,7 +92,7 @@ int MPI_Startall(int count, MPI_Request array_of_requests[])
     /* Convert MPI request handles to a request object pointers */
     if (count > MPIR_REQUEST_PTR_ARRAY_SIZE)
     {
-	MPIU_CHKLMEM_MALLOC_ORJUMP(request_ptrs, MPIR_Request **, count * sizeof(MPIR_Request *), mpi_errno, "request pointers");
+	MPIR_CHKLMEM_MALLOC_ORJUMP(request_ptrs, MPIR_Request **, count * sizeof(MPIR_Request *), mpi_errno, "request pointers");
     }
 
     for (i = 0; i < count; i++)
@@ -128,10 +128,10 @@ int MPI_Startall(int count, MPI_Request array_of_requests[])
   fn_exit:
     if (count > MPIR_REQUEST_PTR_ARRAY_SIZE)
     {
-	MPIU_CHKLMEM_FREEALL();
+	MPIR_CHKLMEM_FREEALL();
     }
 
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_STARTALL);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_STARTALL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/status_set_cancelled.c b/src/mpi/pt2pt/status_set_cancelled.c
index 05ff76c..473b9d2 100644
--- a/src/mpi/pt2pt/status_set_cancelled.c
+++ b/src/mpi/pt2pt/status_set_cancelled.c
@@ -52,11 +52,11 @@ int MPI_Status_set_cancelled(MPI_Status *status, int flag)
     static const char FCNAME[] = "MPI_Status_set_cancelled";
 #endif
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_STATUS_SET_CANCELLED);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_STATUS_SET_CANCELLED);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_STATUS_SET_CANCELLED);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_STATUS_SET_CANCELLED);
 
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -77,7 +77,7 @@ int MPI_Status_set_cancelled(MPI_Status *status, int flag)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_STATUS_SET_CANCELLED);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_STATUS_SET_CANCELLED);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/pt2pt/test.c b/src/mpi/pt2pt/test.c
index 25ca804..5efd125 100644
--- a/src/mpi/pt2pt/test.c
+++ b/src/mpi/pt2pt/test.c
@@ -113,12 +113,12 @@ int MPI_Test(MPI_Request *request, int *flag, MPI_Status *status)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *request_ptr = NULL;
-   MPID_MPI_STATE_DECL(MPID_STATE_MPI_TEST);
+   MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TEST);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_TEST);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_TEST);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -162,7 +162,7 @@ int MPI_Test(MPI_Request *request, int *flag, MPI_Status *status)
     /* ... end of body of routine ... */
     
   fn_exit:
-	MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_TEST);
+	MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_TEST);
 	MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 	return mpi_errno;
     
diff --git a/src/mpi/pt2pt/test_cancelled.c b/src/mpi/pt2pt/test_cancelled.c
index de502b8..3696928 100644
--- a/src/mpi/pt2pt/test_cancelled.c
+++ b/src/mpi/pt2pt/test_cancelled.c
@@ -51,11 +51,11 @@ Output Parameters:
 int MPI_Test_cancelled(const MPI_Status *status, int *flag)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TEST_CANCELLED);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TEST_CANCELLED);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_TEST_CANCELLED);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_TEST_CANCELLED);
     
     /* Validate parameters if error checking is enabled */
 #   ifdef HAVE_ERROR_CHECKING
@@ -70,15 +70,14 @@ int MPI_Test_cancelled(const MPI_Status *status, int *flag)
 #   endif /* HAVE_ERROR_CHECKING */
 
     /* ... body of routine ...  */
-    
-    MPIR_Test_cancelled_impl(status, flag);
+    *flag = MPIR_STATUS_GET_CANCEL_BIT(*status);
     
     /* ... end of body of routine ... */
     
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_TEST_CANCELLED);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_TEST_CANCELLED);
     return mpi_errno;
     
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/pt2pt/testall.c b/src/mpi/pt2pt/testall.c
index a8c144c..9079523 100644
--- a/src/mpi/pt2pt/testall.c
+++ b/src/mpi/pt2pt/testall.c
@@ -47,12 +47,12 @@ int MPIR_Testall_impl(int count, MPI_Request array_of_requests[], int *flag,
     int rc;
     int proc_failure = FALSE;
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     /* Convert MPI request handles to a request object pointers */
     if (count > MPIR_REQUEST_PTR_ARRAY_SIZE)
     {
-        MPIU_CHKLMEM_MALLOC_ORJUMP(request_ptrs, MPIR_Request **,
+        MPIR_CHKLMEM_MALLOC_ORJUMP(request_ptrs, MPIR_Request **,
                 count * sizeof(MPIR_Request *), mpi_errno, "request pointers");
     }
 
@@ -174,7 +174,7 @@ int MPIR_Testall_impl(int count, MPI_Request array_of_requests[], int *flag,
  fn_exit:
     if (count > MPIR_REQUEST_PTR_ARRAY_SIZE)
     {
-        MPIU_CHKLMEM_FREEALL();
+        MPIR_CHKLMEM_FREEALL();
     }
 
     return mpi_errno;
@@ -234,12 +234,12 @@ int MPI_Testall(int count, MPI_Request array_of_requests[], int *flag,
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TESTALL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TESTALL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_TESTALL);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_TESTALL);
 
     /* Check the arguments */
 #   ifdef HAVE_ERROR_CHECKING
@@ -272,7 +272,7 @@ int MPI_Testall(int count, MPI_Request array_of_requests[], int *flag,
     
   fn_exit:
     
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_TESTALL);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_TESTALL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/testany.c b/src/mpi/pt2pt/testany.c
index 013aad4..26d86ed 100644
--- a/src/mpi/pt2pt/testany.c
+++ b/src/mpi/pt2pt/testany.c
@@ -75,13 +75,13 @@ int MPI_Testany(int count, MPI_Request array_of_requests[], int *indx,
     int active_flag;
     int last_disabled_anysource = -1;
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKLMEM_DECL(1);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TESTANY);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TESTANY);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_TESTANY);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_TESTANY);
 
     /* Check the arguments */
 #   ifdef HAVE_ERROR_CHECKING
@@ -111,7 +111,7 @@ int MPI_Testany(int count, MPI_Request array_of_requests[], int *indx,
     /* Convert MPI request handles to a request object pointers */
     if (count > MPIR_REQUEST_PTR_ARRAY_SIZE)
     {
-	MPIU_CHKLMEM_MALLOC_ORJUMP(request_ptrs, MPIR_Request **, count * sizeof(MPIR_Request *), mpi_errno, "request pointers");
+	MPIR_CHKLMEM_MALLOC_ORJUMP(request_ptrs, MPIR_Request **, count * sizeof(MPIR_Request *), mpi_errno, "request pointers");
     }
 
     n_inactive = 0;
@@ -217,10 +217,10 @@ int MPI_Testany(int count, MPI_Request array_of_requests[], int *indx,
   fn_exit:
     if (count > MPIR_REQUEST_PTR_ARRAY_SIZE)
     {
-	MPIU_CHKLMEM_FREEALL();
+	MPIR_CHKLMEM_FREEALL();
     }
 
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_TESTANY);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_TESTANY);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/testsome.c b/src/mpi/pt2pt/testsome.c
index 3a4112a..30cec08 100644
--- a/src/mpi/pt2pt/testsome.c
+++ b/src/mpi/pt2pt/testsome.c
@@ -79,13 +79,13 @@ int MPI_Testsome(int incount, MPI_Request array_of_requests[], int *outcount,
     int active_flag;
     int rc;
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKLMEM_DECL(1);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TESTSOME);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TESTSOME);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_TESTSOME);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_TESTSOME);
 
     /* Check the arguments */
 #   ifdef HAVE_ERROR_CHECKING
@@ -117,7 +117,7 @@ int MPI_Testsome(int incount, MPI_Request array_of_requests[], int *outcount,
     /* Convert MPI request handles to a request object pointers */
     if (incount > MPIR_REQUEST_PTR_ARRAY_SIZE)
     {
-	MPIU_CHKLMEM_MALLOC_ORJUMP(request_ptrs, MPIR_Request **, incount * sizeof(MPIR_Request *), mpi_errno, "request pointers");
+	MPIR_CHKLMEM_MALLOC_ORJUMP(request_ptrs, MPIR_Request **, incount * sizeof(MPIR_Request *), mpi_errno, "request pointers");
     }
 
     n_inactive = 0;
@@ -240,10 +240,10 @@ int MPI_Testsome(int incount, MPI_Request array_of_requests[], int *outcount,
   fn_exit:
     if (incount > MPIR_REQUEST_PTR_ARRAY_SIZE)
     {
-	MPIU_CHKLMEM_FREEALL();
+	MPIR_CHKLMEM_FREEALL();
     }
 
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_TESTSOME);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_TESTSOME);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/wait.c b/src/mpi/pt2pt/wait.c
index ea92615..696ce0a 100644
--- a/src/mpi/pt2pt/wait.c
+++ b/src/mpi/pt2pt/wait.c
@@ -138,12 +138,12 @@ int MPI_Wait(MPI_Request *request, MPI_Status *status)
     MPIR_Request * request_ptr = NULL;
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm * comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WAIT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WAIT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_WAIT);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_WAIT);
 
     /* Check the arguments */
 #   ifdef HAVE_ERROR_CHECKING
@@ -192,7 +192,7 @@ int MPI_Wait(MPI_Request *request, MPI_Status *status)
     /* ... end of body of routine ... */
     
   fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_WAIT);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_WAIT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 	
diff --git a/src/mpi/pt2pt/waitall.c b/src/mpi/pt2pt/waitall.c
index 6983246..f3327bb 100644
--- a/src/mpi/pt2pt/waitall.c
+++ b/src/mpi/pt2pt/waitall.c
@@ -46,11 +46,11 @@ static inline int request_complete_fastpath(MPI_Request *request, MPIR_Request *
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(request_ptr->kind == MPIR_REQUEST_KIND__SEND || request_ptr->kind == MPIR_REQUEST_KIND__RECV);
+    MPIR_Assert(request_ptr->kind == MPIR_REQUEST_KIND__SEND || request_ptr->kind == MPIR_REQUEST_KIND__RECV);
 
     if (request_ptr->kind == MPIR_REQUEST_KIND__SEND) {
         /* FIXME: are Ibsend requests added to the send queue? */
-        MPIR_SENDQ_FORGET(request_ptr);
+        MPII_SENDQ_FORGET(request_ptr);
     }
 
     /* the completion path for SEND and RECV is the same at this time, modulo
@@ -84,12 +84,12 @@ int MPIR_Waitall_impl(int count, MPI_Request array_of_requests[],
     int disabled_anysource = FALSE;
     const int ignoring_statuses = (array_of_statuses == MPI_STATUSES_IGNORE);
     int optimize = ignoring_statuses; /* see NOTE-O1 */
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     /* Convert MPI request handles to a request object pointers */
     if (count > MPIR_REQUEST_PTR_ARRAY_SIZE)
     {
-	MPIU_CHKLMEM_MALLOC(request_ptrs, MPIR_Request **, count * sizeof(MPIR_Request *), mpi_errno, "request pointers");
+	MPIR_CHKLMEM_MALLOC(request_ptrs, MPIR_Request **, count * sizeof(MPIR_Request *), mpi_errno, "request pointers");
     }
 
     n_greqs = 0;
@@ -212,7 +212,7 @@ int MPIR_Waitall_impl(int count, MPI_Request array_of_requests[],
         while (!MPIR_Request_is_complete(request_ptrs[i]))
         {
             /* generalized requests should already be finished */
-            MPIU_Assert(request_ptrs[i]->kind != MPIR_REQUEST_KIND__GREQUEST);
+            MPIR_Assert(request_ptrs[i]->kind != MPIR_REQUEST_KIND__GREQUEST);
             
             mpi_errno = MPID_Progress_wait(&progress_state);
             if (mpi_errno != MPI_SUCCESS) {
@@ -289,7 +289,7 @@ int MPIR_Waitall_impl(int count, MPI_Request array_of_requests[],
  fn_exit:
      if (count > MPIR_REQUEST_PTR_ARRAY_SIZE)
     {
-	MPIU_CHKLMEM_FREEALL();
+	MPIR_CHKLMEM_FREEALL();
     }
 
    return mpi_errno;
@@ -343,12 +343,12 @@ int MPI_Waitall(int count, MPI_Request array_of_requests[],
 		MPI_Status array_of_statuses[])
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WAITALL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WAITALL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_WAITALL);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_WAITALL);
 
     /* Check the arguments */
 #   ifdef HAVE_ERROR_CHECKING
@@ -381,7 +381,7 @@ int MPI_Waitall(int count, MPI_Request array_of_requests[],
     /* ... end of body of routine ... */
     
  fn_exit:
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_WAITALL);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_WAITALL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/waitany.c b/src/mpi/pt2pt/waitany.c
index f74fa0a..95b0bb0 100644
--- a/src/mpi/pt2pt/waitany.c
+++ b/src/mpi/pt2pt/waitany.c
@@ -79,13 +79,13 @@ int MPI_Waitany(int count, MPI_Request array_of_requests[], int *indx,
     int found_nonnull_req;
     int last_disabled_anysource = -1;
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKLMEM_DECL(1);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WAITANY);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WAITANY);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_WAITANY);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_WAITANY);
 
     /* Check the arguments */
 #   ifdef HAVE_ERROR_CHECKING
@@ -110,7 +110,7 @@ int MPI_Waitany(int count, MPI_Request array_of_requests[], int *indx,
     /* Convert MPI request handles to a request object pointers */
     if (count > MPIR_REQUEST_PTR_ARRAY_SIZE)
     {
-	MPIU_CHKLMEM_MALLOC_ORJUMP(request_ptrs, MPIR_Request **, count * sizeof(MPIR_Request *), mpi_errno, "request pointers");
+	MPIR_CHKLMEM_MALLOC_ORJUMP(request_ptrs, MPIR_Request **, count * sizeof(MPIR_Request *), mpi_errno, "request pointers");
     }
 
     n_inactive = 0;
@@ -222,10 +222,10 @@ int MPI_Waitany(int count, MPI_Request array_of_requests[], int *indx,
   fn_exit:
     if (count > MPIR_REQUEST_PTR_ARRAY_SIZE)
     {
-	MPIU_CHKLMEM_FREEALL();
+	MPIR_CHKLMEM_FREEALL();
     }
 
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_WAITANY);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_WAITANY);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/pt2pt/waitsome.c b/src/mpi/pt2pt/waitsome.c
index 01d58bf..efbb6ab 100644
--- a/src/mpi/pt2pt/waitsome.c
+++ b/src/mpi/pt2pt/waitsome.c
@@ -98,13 +98,13 @@ int MPI_Waitsome(int incount, MPI_Request array_of_requests[],
     int rc;
     int disabled_anysource = FALSE;
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKLMEM_DECL(1);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WAITSOME);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WAITSOME);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_PT2PT_FUNC_ENTER(MPID_STATE_MPI_WAITSOME);
+    MPIR_FUNC_TERSE_PT2PT_ENTER(MPID_STATE_MPI_WAITSOME);
 
     /* Check the arguments */
 #   ifdef HAVE_ERROR_CHECKING
@@ -136,7 +136,7 @@ int MPI_Waitsome(int incount, MPI_Request array_of_requests[],
     /* Convert MPI request handles to a request object pointers */
     if (incount > MPIR_REQUEST_PTR_ARRAY_SIZE)
     {
-	MPIU_CHKLMEM_MALLOC_ORJUMP(request_ptrs, MPIR_Request **, incount * sizeof(MPIR_Request *), mpi_errno, "request pointers");
+	MPIR_CHKLMEM_MALLOC_ORJUMP(request_ptrs, MPIR_Request **, incount * sizeof(MPIR_Request *), mpi_errno, "request pointers");
     }
     
     n_inactive = 0;
@@ -293,10 +293,10 @@ int MPI_Waitsome(int incount, MPI_Request array_of_requests[],
   fn_exit:
     if (incount > MPIR_REQUEST_PTR_ARRAY_SIZE)
     {
-	MPIU_CHKLMEM_FREEALL();
+	MPIR_CHKLMEM_FREEALL();
     }
 
-    MPID_MPI_PT2PT_FUNC_EXIT(MPID_STATE_MPI_WAITSOME);
+    MPIR_FUNC_TERSE_PT2PT_EXIT(MPID_STATE_MPI_WAITSOME);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/accumulate.c b/src/mpi/rma/accumulate.c
index 01b684c..74c9825 100644
--- a/src/mpi/rma/accumulate.c
+++ b/src/mpi/rma/accumulate.c
@@ -73,12 +73,12 @@ int MPI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
     static const char FCNAME[] = "MPI_Accumulate";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ACCUMULATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ACCUMULATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_ACCUMULATE);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_ACCUMULATE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -154,7 +154,7 @@ int MPI_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_ACCUMULATE);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_ACCUMULATE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/alloc_mem.c b/src/mpi/rma/alloc_mem.c
index 52f17b6..f837516 100644
--- a/src/mpi/rma/alloc_mem.c
+++ b/src/mpi/rma/alloc_mem.c
@@ -66,12 +66,12 @@ int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr)
     int mpi_errno = MPI_SUCCESS;
     void *ap;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLOC_MEM);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_ALLOC_MEM);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_ALLOC_MEM);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_ALLOC_MEM);
     
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -89,7 +89,7 @@ int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr)
 
     /* ... body of routine ...  */
 
-    MPIU_Ensure_Aint_fits_in_pointer(size);
+    MPIR_Ensure_Aint_fits_in_pointer(size);
     ap = MPID_Alloc_mem(size, info_ptr);
 
     /* --BEGIN ERROR HANDLING-- */
@@ -106,7 +106,7 @@ int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_ALLOC_MEM);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_ALLOC_MEM);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/compare_and_swap.c b/src/mpi/rma/compare_and_swap.c
index a259e76..961b88c 100644
--- a/src/mpi/rma/compare_and_swap.c
+++ b/src/mpi/rma/compare_and_swap.c
@@ -79,12 +79,12 @@ int MPI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
     static const char FCNAME[] = "MPI_Compare_and_swap";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMPARE_AND_SWAP);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMPARE_AND_SWAP);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_COMPARE_AND_SWAP);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_COMPARE_AND_SWAP);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -142,7 +142,7 @@ int MPI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_COMPARE_AND_SWAP);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_COMPARE_AND_SWAP);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/fetch_and_op.c b/src/mpi/rma/fetch_and_op.c
index 29e6b63..df81331 100644
--- a/src/mpi/rma/fetch_and_op.c
+++ b/src/mpi/rma/fetch_and_op.c
@@ -84,12 +84,12 @@ int MPI_Fetch_and_op(const void *origin_addr, void *result_addr,
     static const char FCNAME[] = "MPI_Fetch_and_op";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_FETCH_AND_OP);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_FETCH_AND_OP);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_FETCH_AND_OP);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_FETCH_AND_OP);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -158,7 +158,7 @@ int MPI_Fetch_and_op(const void *origin_addr, void *result_addr,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_FETCH_AND_OP);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_FETCH_AND_OP);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/free_mem.c b/src/mpi/rma/free_mem.c
index db0b6b2..57ba8c1 100644
--- a/src/mpi/rma/free_mem.c
+++ b/src/mpi/rma/free_mem.c
@@ -48,12 +48,12 @@ int MPI_Free_mem(void *base)
 {
     static const char FCNAME[] = "MPI_Free_mem";
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_FREE_MEM);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_FREE_MEM);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_FREE_MEM);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_FREE_MEM);
 
     /* ... body of routine ...  */
 
@@ -65,7 +65,7 @@ int MPI_Free_mem(void *base)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_FREE_MEM);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_FREE_MEM);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/get.c b/src/mpi/rma/get.c
index bf5788f..b7f3aed 100644
--- a/src/mpi/rma/get.c
+++ b/src/mpi/rma/get.c
@@ -68,12 +68,12 @@ int MPI_Get(void *origin_addr, int origin_count, MPI_Datatype
     static const char FCNAME[] = "MPI_Get";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GET);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GET);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_GET);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_GET);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -147,7 +147,7 @@ int MPI_Get(void *origin_addr, int origin_count, MPI_Datatype
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_GET);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_GET);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/get_accumulate.c b/src/mpi/rma/get_accumulate.c
index afb78b1..24c7911 100644
--- a/src/mpi/rma/get_accumulate.c
+++ b/src/mpi/rma/get_accumulate.c
@@ -104,12 +104,12 @@ int MPI_Get_accumulate(const void *origin_addr, int origin_count,
     static const char FCNAME[] = "MPI_Get_accumulate";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GET_ACCUMULATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GET_ACCUMULATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_GET_ACCUMULATE);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_GET_ACCUMULATE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -208,7 +208,7 @@ int MPI_Get_accumulate(const void *origin_addr, int origin_count,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_GET_ACCUMULATE);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_GET_ACCUMULATE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/put.c b/src/mpi/rma/put.c
index e7ecf7e..e7d8781 100644
--- a/src/mpi/rma/put.c
+++ b/src/mpi/rma/put.c
@@ -68,12 +68,12 @@ int MPI_Put(const void *origin_addr, int origin_count, MPI_Datatype
     static const char FCNAME[] = "MPI_Put";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_PUT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_PUT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_PUT);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_PUT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -147,7 +147,7 @@ int MPI_Put(const void *origin_addr, int origin_count, MPI_Datatype
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_PUT);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_PUT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/raccumulate.c b/src/mpi/rma/raccumulate.c
index fbeca51..75e7e2d 100644
--- a/src/mpi/rma/raccumulate.c
+++ b/src/mpi/rma/raccumulate.c
@@ -87,12 +87,12 @@ int MPI_Raccumulate(const void *origin_addr, int origin_count, MPI_Datatype
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
     MPIR_Request *request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_RACCUMULATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_RACCUMULATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_RACCUMULATE);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_RACCUMULATE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -171,7 +171,7 @@ int MPI_Raccumulate(const void *origin_addr, int origin_count, MPI_Datatype
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_RACCUMULATE);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_RACCUMULATE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/rget.c b/src/mpi/rma/rget.c
index b1a8879..e030fe1 100644
--- a/src/mpi/rma/rget.c
+++ b/src/mpi/rma/rget.c
@@ -82,12 +82,12 @@ int MPI_Rget(void *origin_addr, int origin_count, MPI_Datatype
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
     MPIR_Request *request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_RGET);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_RGET);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_RGET);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_RGET);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -164,7 +164,7 @@ int MPI_Rget(void *origin_addr, int origin_count, MPI_Datatype
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_RGET);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_RGET);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/rget_accumulate.c b/src/mpi/rma/rget_accumulate.c
index 23f5500..4d5ca12 100644
--- a/src/mpi/rma/rget_accumulate.c
+++ b/src/mpi/rma/rget_accumulate.c
@@ -97,12 +97,12 @@ int MPI_Rget_accumulate(const void *origin_addr, int origin_count,
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
     MPIR_Request *request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_RGET_ACCUMULATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_RGET_ACCUMULATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_RGET_ACCUMULATE);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_RGET_ACCUMULATE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -200,7 +200,7 @@ int MPI_Rget_accumulate(const void *origin_addr, int origin_count,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_RGET_ACCUMULATE);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_RGET_ACCUMULATE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/rput.c b/src/mpi/rma/rput.c
index f99d0dc..1219dc7 100644
--- a/src/mpi/rma/rput.c
+++ b/src/mpi/rma/rput.c
@@ -84,12 +84,12 @@ int MPI_Rput(const void *origin_addr, int origin_count, MPI_Datatype
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
     MPIR_Request *request_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_RPUT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_RPUT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_RPUT);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_RPUT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -166,7 +166,7 @@ int MPI_Rput(const void *origin_addr, int origin_count, MPI_Datatype
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_RPUT);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_RPUT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_allocate.c b/src/mpi/rma/win_allocate.c
index 39c0cee..79a3b85 100644
--- a/src/mpi/rma/win_allocate.c
+++ b/src/mpi/rma/win_allocate.c
@@ -76,12 +76,12 @@ int MPI_Win_allocate(MPI_Aint size, int disp_unit, MPI_Info info,
     MPIR_Win *win_ptr = NULL;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_ALLOCATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_ALLOCATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_ALLOCATE);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_ALLOCATE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -152,7 +152,7 @@ int MPI_Win_allocate(MPI_Aint size, int disp_unit, MPI_Info info,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_ALLOCATE);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_ALLOCATE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_allocate_shared.c b/src/mpi/rma/win_allocate_shared.c
index 44835a5..324f857 100644
--- a/src/mpi/rma/win_allocate_shared.c
+++ b/src/mpi/rma/win_allocate_shared.c
@@ -86,12 +86,12 @@ int MPI_Win_allocate_shared(MPI_Aint size, int disp_unit, MPI_Info info, MPI_Com
     MPIR_Win *win_ptr = NULL;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_ALLOCATE_SHARED);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_ALLOCATE_SHARED);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_ALLOCATE_SHARED);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_ALLOCATE_SHARED);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -150,7 +150,7 @@ int MPI_Win_allocate_shared(MPI_Aint size, int disp_unit, MPI_Info info, MPI_Com
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_ALLOCATE_SHARED);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_ALLOCATE_SHARED);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_attach.c b/src/mpi/rma/win_attach.c
index f7f200e..9f39a44 100644
--- a/src/mpi/rma/win_attach.c
+++ b/src/mpi/rma/win_attach.c
@@ -65,12 +65,12 @@ int MPI_Win_attach(MPI_Win win, void *base, MPI_Aint size)
     static const char FCNAME[] = "MPI_Win_attach";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_ATTACH);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_ATTACH);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_ATTACH);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_ATTACH);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -126,7 +126,7 @@ int MPI_Win_attach(MPI_Win win, void *base, MPI_Aint size)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_ATTACH);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_ATTACH);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_complete.c b/src/mpi/rma/win_complete.c
index 5f53db2..e3fc53b 100644
--- a/src/mpi/rma/win_complete.c
+++ b/src/mpi/rma/win_complete.c
@@ -50,12 +50,12 @@ int MPI_Win_complete(MPI_Win win)
     static const char FCNAME[] = "MPI_Win_complete";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_COMPLETE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_COMPLETE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_COMPLETE);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_COMPLETE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -93,7 +93,7 @@ int MPI_Win_complete(MPI_Win win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_COMPLETE);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_COMPLETE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_create.c b/src/mpi/rma/win_create.c
index e226401..ffb134f 100644
--- a/src/mpi/rma/win_create.c
+++ b/src/mpi/rma/win_create.c
@@ -104,12 +104,12 @@ int MPI_Win_create(void *base, MPI_Aint size, int disp_unit, MPI_Info info,
     MPIR_Win *win_ptr = NULL;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_CREATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_CREATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_CREATE);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_CREATE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -180,7 +180,7 @@ int MPI_Win_create(void *base, MPI_Aint size, int disp_unit, MPI_Info info,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_CREATE);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_CREATE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_create_dynamic.c b/src/mpi/rma/win_create_dynamic.c
index 8b4f6ca..76fa606 100644
--- a/src/mpi/rma/win_create_dynamic.c
+++ b/src/mpi/rma/win_create_dynamic.c
@@ -92,12 +92,12 @@ int MPI_Win_create_dynamic(MPI_Info info, MPI_Comm comm, MPI_Win *win)
     MPIR_Win *win_ptr = NULL;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_CREATE_DYNAMIC);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_CREATE_DYNAMIC);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_CREATE_DYNAMIC);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_CREATE_DYNAMIC);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -144,7 +144,7 @@ int MPI_Win_create_dynamic(MPI_Info info, MPI_Comm comm, MPI_Win *win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_CREATE_DYNAMIC);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_CREATE_DYNAMIC);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_detach.c b/src/mpi/rma/win_detach.c
index 03e0b93..b1c601c 100644
--- a/src/mpi/rma/win_detach.c
+++ b/src/mpi/rma/win_detach.c
@@ -64,12 +64,12 @@ int MPI_Win_detach(MPI_Win win, const void *base)
     static const char FCNAME[] = "MPI_Win_detach";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_DETACH);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_DETACH);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_DETACH);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_DETACH);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -108,7 +108,7 @@ int MPI_Win_detach(MPI_Win win, const void *base)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_DETACH);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_DETACH);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_fence.c b/src/mpi/rma/win_fence.c
index e07631d..4ba5a1a 100644
--- a/src/mpi/rma/win_fence.c
+++ b/src/mpi/rma/win_fence.c
@@ -68,12 +68,12 @@ int MPI_Win_fence(int assert, MPI_Win win)
     static const char FCNAME[] = "MPI_Win_fence";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_FENCE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_FENCE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_FENCE);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_FENCE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -114,7 +114,7 @@ int MPI_Win_fence(int assert, MPI_Win win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_FENCE);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_FENCE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_flush.c b/src/mpi/rma/win_flush.c
index 5322127..8a3370c 100644
--- a/src/mpi/rma/win_flush.c
+++ b/src/mpi/rma/win_flush.c
@@ -59,12 +59,12 @@ int MPI_Win_flush(int rank, MPI_Win win)
     static const char FCNAME[] = "MPI_Win_flush";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_FLUSH);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_FLUSH);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_FLUSH);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_FLUSH);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -109,7 +109,7 @@ int MPI_Win_flush(int rank, MPI_Win win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_FLUSH);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_FLUSH);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_flush_all.c b/src/mpi/rma/win_flush_all.c
index 0869355..fdea8c3 100644
--- a/src/mpi/rma/win_flush_all.c
+++ b/src/mpi/rma/win_flush_all.c
@@ -58,12 +58,12 @@ int MPI_Win_flush_all(MPI_Win win)
     static const char FCNAME[] = "MPI_Win_flush_all";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_FLUSH_ALL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_FLUSH_ALL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_FLUSH_ALL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_FLUSH_ALL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -103,7 +103,7 @@ int MPI_Win_flush_all(MPI_Win win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_FLUSH_ALL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_FLUSH_ALL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_flush_local.c b/src/mpi/rma/win_flush_local.c
index dc019a4..138b5ac 100644
--- a/src/mpi/rma/win_flush_local.c
+++ b/src/mpi/rma/win_flush_local.c
@@ -61,12 +61,12 @@ int MPI_Win_flush_local(int rank, MPI_Win win)
     static const char FCNAME[] = "MPI_Win_flush_local";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_FLUSH_LOCAL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_FLUSH_LOCAL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_FLUSH_LOCAL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_FLUSH_LOCAL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -111,7 +111,7 @@ int MPI_Win_flush_local(int rank, MPI_Win win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_FLUSH_LOCAL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_FLUSH_LOCAL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_flush_local_all.c b/src/mpi/rma/win_flush_local_all.c
index db90884..d99311b 100644
--- a/src/mpi/rma/win_flush_local_all.c
+++ b/src/mpi/rma/win_flush_local_all.c
@@ -57,12 +57,12 @@ int MPI_Win_flush_local_all(MPI_Win win)
     static const char FCNAME[] = "MPI_Win_flush_local_all";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_FLUSH_LOCAL_ALL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_FLUSH_LOCAL_ALL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_FLUSH_LOCAL_ALL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_FLUSH_LOCAL_ALL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -101,7 +101,7 @@ int MPI_Win_flush_local_all(MPI_Win win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_FLUSH_LOCAL_ALL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_FLUSH_LOCAL_ALL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_free.c b/src/mpi/rma/win_free.c
index 317d314..e614a21 100644
--- a/src/mpi/rma/win_free.c
+++ b/src/mpi/rma/win_free.c
@@ -53,12 +53,12 @@ int MPI_Win_free(MPI_Win *win)
     static const char FCNAME[] = "MPI_Win_free";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_FREE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_FREE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_FREE);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_FREE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -112,7 +112,7 @@ int MPI_Win_free(MPI_Win *win)
 	int in_use;
 	MPIR_Errhandler_release_ref( win_ptr->errhandler,&in_use);
 	if (!in_use) {
-	    MPIU_Handle_obj_free( &MPIR_Errhandler_mem, win_ptr->errhandler );
+	    MPIR_Handle_obj_free( &MPIR_Errhandler_mem, win_ptr->errhandler );
 	}
     }
     
@@ -123,7 +123,7 @@ int MPI_Win_free(MPI_Win *win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_FREE);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_FREE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_get_group.c b/src/mpi/rma/win_get_group.c
index c832278..333c014 100644
--- a/src/mpi/rma/win_get_group.c
+++ b/src/mpi/rma/win_get_group.c
@@ -62,12 +62,12 @@ int MPI_Win_get_group(MPI_Win win, MPI_Group *group)
     MPIR_Win *win_ptr = NULL;
     MPIR_Comm *win_comm_ptr;
     MPIR_Group *group_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_GET_GROUP);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_GET_GROUP);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_GET_GROUP);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_GET_GROUP);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -109,7 +109,7 @@ int MPI_Win_get_group(MPI_Win win, MPI_Group *group)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_GET_GROUP);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_GET_GROUP);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_get_info.c b/src/mpi/rma/win_get_info.c
index ad1c726..bb46e06 100644
--- a/src/mpi/rma/win_get_info.c
+++ b/src/mpi/rma/win_get_info.c
@@ -72,12 +72,12 @@ int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_GET_INFO);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_GET_INFO);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_GET_INFO);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_GET_INFO);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -117,7 +117,7 @@ int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_GET_INFO);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_GET_INFO);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_get_name.c b/src/mpi/rma/win_get_name.c
index 63d23b4..1b13ff1 100644
--- a/src/mpi/rma/win_get_name.c
+++ b/src/mpi/rma/win_get_name.c
@@ -57,11 +57,11 @@ int MPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_GET_NAME);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_GET_NAME);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_GET_NAME);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_GET_NAME);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -102,7 +102,7 @@ int MPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_GET_NAME);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_GET_NAME);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/rma/win_lock.c b/src/mpi/rma/win_lock.c
index 64fbd85..908be33 100644
--- a/src/mpi/rma/win_lock.c
+++ b/src/mpi/rma/win_lock.c
@@ -78,12 +78,12 @@ int MPI_Win_lock(int lock_type, int rank, int assert, MPI_Win win)
     static const char FCNAME[] = "MPI_Win_lock";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_LOCK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_LOCK);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_LOCK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_LOCK);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -142,7 +142,7 @@ int MPI_Win_lock(int lock_type, int rank, int assert, MPI_Win win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_LOCK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_LOCK);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_lock_all.c b/src/mpi/rma/win_lock_all.c
index 488e950..90e6a0b 100644
--- a/src/mpi/rma/win_lock_all.c
+++ b/src/mpi/rma/win_lock_all.c
@@ -82,12 +82,12 @@ int MPI_Win_lock_all(int assert, MPI_Win win)
     static const char FCNAME[] = "MPI_Win_lock_all";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_LOCK_ALL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_LOCK_ALL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_LOCK_ALL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_LOCK_ALL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -136,7 +136,7 @@ int MPI_Win_lock_all(int assert, MPI_Win win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_LOCK_ALL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_LOCK_ALL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_post.c b/src/mpi/rma/win_post.c
index 9a847fa..3ddc506 100644
--- a/src/mpi/rma/win_post.c
+++ b/src/mpi/rma/win_post.c
@@ -67,12 +67,12 @@ int MPI_Win_post(MPI_Group group, int assert, MPI_Win win)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
     MPIR_Group *group_ptr=NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_POST);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_POST);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_POST);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_POST);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -117,7 +117,7 @@ int MPI_Win_post(MPI_Group group, int assert, MPI_Win win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_POST);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_POST);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_set_info.c b/src/mpi/rma/win_set_info.c
index 1d2d7cc..00ace6e 100644
--- a/src/mpi/rma/win_set_info.c
+++ b/src/mpi/rma/win_set_info.c
@@ -69,12 +69,12 @@ int MPI_Win_set_info(MPI_Win win, MPI_Info info)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_SET_INFO);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_SET_INFO);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_SET_INFO);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_SET_INFO);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -116,7 +116,7 @@ int MPI_Win_set_info(MPI_Win win, MPI_Info info)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_SET_INFO);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_SET_INFO);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_set_name.c b/src/mpi/rma/win_set_name.c
index 54ac8eb..ac7c311 100644
--- a/src/mpi/rma/win_set_name.c
+++ b/src/mpi/rma/win_set_name.c
@@ -52,11 +52,11 @@ int MPI_Win_set_name(MPI_Win win, const char *win_name)
     static const char FCNAME[] = "MPI_Win_set_name";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_SET_NAME);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_SET_NAME);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_SET_NAME);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_SET_NAME);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -95,7 +95,7 @@ int MPI_Win_set_name(MPI_Win win, const char *win_name)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_SET_NAME);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_SET_NAME);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/rma/win_shared_query.c b/src/mpi/rma/win_shared_query.c
index 860c60f..a9c7bf9 100644
--- a/src/mpi/rma/win_shared_query.c
+++ b/src/mpi/rma/win_shared_query.c
@@ -76,12 +76,12 @@ int MPI_Win_shared_query(MPI_Win win, int rank, MPI_Aint *size, int *disp_unit,
     static const char FCNAME[] = "MPI_Win_shared_query";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_SHARED_QUERY);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_SHARED_QUERY);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_SHARED_QUERY);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_SHARED_QUERY);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -128,7 +128,7 @@ int MPI_Win_shared_query(MPI_Win win, int rank, MPI_Aint *size, int *disp_unit,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_SHARED_QUERY);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_SHARED_QUERY);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_start.c b/src/mpi/rma/win_start.c
index ae1258e..1af67de 100644
--- a/src/mpi/rma/win_start.c
+++ b/src/mpi/rma/win_start.c
@@ -68,12 +68,12 @@ int MPI_Win_start(MPI_Group group, int assert, MPI_Win win)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
     MPIR_Group *group_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_START);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_START);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_START);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_START);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -118,7 +118,7 @@ int MPI_Win_start(MPI_Group group, int assert, MPI_Win win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_START);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_START);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_sync.c b/src/mpi/rma/win_sync.c
index 1a7cac7..418ce95 100644
--- a/src/mpi/rma/win_sync.c
+++ b/src/mpi/rma/win_sync.c
@@ -60,12 +60,12 @@ int MPI_Win_sync(MPI_Win win)
     static const char FCNAME[] = "MPI_Win_sync";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_SYNC);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_SYNC);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_SYNC);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_SYNC);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -103,7 +103,7 @@ int MPI_Win_sync(MPI_Win win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_SYNC);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_SYNC);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_test.c b/src/mpi/rma/win_test.c
index 5ee2b3a..b24ec51 100644
--- a/src/mpi/rma/win_test.c
+++ b/src/mpi/rma/win_test.c
@@ -59,12 +59,12 @@ int MPI_Win_test(MPI_Win win, int *flag)
     static const char FCNAME[] = "MPI_Win_test";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_TEST);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_TEST);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_TEST);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_TEST);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -104,7 +104,7 @@ int MPI_Win_test(MPI_Win win, int *flag)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_TEST);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_TEST);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_unlock.c b/src/mpi/rma/win_unlock.c
index bd761a4..f37f7c7 100644
--- a/src/mpi/rma/win_unlock.c
+++ b/src/mpi/rma/win_unlock.c
@@ -54,12 +54,12 @@ int MPI_Win_unlock(int rank, MPI_Win win)
     static const char FCNAME[] = "MPI_Win_unlock";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_UNLOCK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_UNLOCK);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_UNLOCK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_UNLOCK);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -104,7 +104,7 @@ int MPI_Win_unlock(int rank, MPI_Win win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_UNLOCK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_UNLOCK);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_unlock_all.c b/src/mpi/rma/win_unlock_all.c
index 730e488..8e2616c 100644
--- a/src/mpi/rma/win_unlock_all.c
+++ b/src/mpi/rma/win_unlock_all.c
@@ -61,12 +61,12 @@ int MPI_Win_unlock_all(MPI_Win win)
     static const char FCNAME[] = "MPI_Win_unlock_all";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_UNLOCK_ALL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_UNLOCK_ALL);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WIN_UNLOCK_ALL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WIN_UNLOCK_ALL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -106,7 +106,7 @@ int MPI_Win_unlock_all(MPI_Win win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WIN_UNLOCK_ALL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WIN_UNLOCK_ALL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/win_wait.c b/src/mpi/rma/win_wait.c
index e0c0054..0ffb88f 100644
--- a/src/mpi/rma/win_wait.c
+++ b/src/mpi/rma/win_wait.c
@@ -50,12 +50,12 @@ int MPI_Win_wait(MPI_Win win)
     static const char FCNAME[] = "MPI_Win_wait";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WIN_WAIT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WIN_WAIT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_RMA_FUNC_ENTER(MPID_STATE_MPI_WIN_WAIT);
+    MPIR_FUNC_TERSE_RMA_ENTER(MPID_STATE_MPI_WIN_WAIT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -94,7 +94,7 @@ int MPI_Win_wait(MPI_Win win)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_RMA_FUNC_EXIT(MPID_STATE_MPI_WIN_WAIT);
+    MPIR_FUNC_TERSE_RMA_EXIT(MPID_STATE_MPI_WIN_WAIT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/rma/winutil.c b/src/mpi/rma/winutil.c
index 9b8ae90..326c3cf 100644
--- a/src/mpi/rma/winutil.c
+++ b/src/mpi/rma/winutil.c
@@ -14,6 +14,6 @@
 
 /* Preallocated window objects */
 MPIR_Win MPIR_Win_direct[MPIR_WIN_PREALLOC] = { {0} };
-MPIU_Object_alloc_t MPIR_Win_mem = { 0, 0, 0, 0, MPIR_WIN,
+MPIR_Object_alloc_t MPIR_Win_mem = { 0, 0, 0, 0, MPIR_WIN,
 				      sizeof(MPIR_Win), MPIR_Win_direct,
                                       MPIR_WIN_PREALLOC};
diff --git a/src/mpi/romio/adio/include/adioi.h b/src/mpi/romio/adio/include/adioi.h
index 42b82ff..e6a80b6 100644
--- a/src/mpi/romio/adio/include/adioi.h
+++ b/src/mpi/romio/adio/include/adioi.h
@@ -987,8 +987,8 @@ int  ADIOI_MPE_iwrite_b;
    (no loss of (meaningful) high order bytes in 8 byte MPI_Aint 
       to (possible) 4 byte ptr cast)                              */
 /* Should work even on 64bit or old 32bit configs                 */
-  /* Use MPIU_Ensure_Aint_fits_in_pointer and
-         MPIU_AINT_CAST_TO_VOID_PTR from configure (mpi.h) */
+  /* Use MPIR_Ensure_Aint_fits_in_pointer and
+         MPIR_AINT_CAST_TO_VOID_PTR from configure (mpi.h) */
   #include "mpir_ext.h"
 
   #define ADIOI_AINT_CAST_TO_VOID_PTR (void*)(intptr_t)
diff --git a/src/mpi/spawn/close_port.c b/src/mpi/spawn/close_port.c
index f7f27fe..35af9f1 100644
--- a/src/mpi/spawn/close_port.c
+++ b/src/mpi/spawn/close_port.c
@@ -53,12 +53,12 @@ Input Parameters:
 int MPI_Close_port(const char *port_name)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_CLOSE_PORT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_CLOSE_PORT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_CLOSE_PORT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_CLOSE_PORT);
 
     /* ... body of routine ...  */
     
@@ -68,7 +68,7 @@ int MPI_Close_port(const char *port_name)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_CLOSE_PORT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_CLOSE_PORT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/spawn/comm_accept.c b/src/mpi/spawn/comm_accept.c
index be8aa1a..b3606b1 100644
--- a/src/mpi/spawn/comm_accept.c
+++ b/src/mpi/spawn/comm_accept.c
@@ -71,12 +71,12 @@ int MPI_Comm_accept(const char *port_name, MPI_Info info, int root, MPI_Comm com
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Comm *newcomm_ptr = NULL;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_ACCEPT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_ACCEPT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_ACCEPT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_ACCEPT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -117,7 +117,7 @@ int MPI_Comm_accept(const char *port_name, MPI_Info info, int root, MPI_Comm com
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_ACCEPT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_ACCEPT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/spawn/comm_connect.c b/src/mpi/spawn/comm_connect.c
index 1a98038..394c1ea 100644
--- a/src/mpi/spawn/comm_connect.c
+++ b/src/mpi/spawn/comm_connect.c
@@ -70,12 +70,12 @@ int MPI_Comm_connect(const char *port_name, MPI_Info info, int root, MPI_Comm co
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Comm *newcomm_ptr = NULL;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_CONNECT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_CONNECT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_CONNECT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_CONNECT);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -117,7 +117,7 @@ int MPI_Comm_connect(const char *port_name, MPI_Info info, int root, MPI_Comm co
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_CONNECT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_CONNECT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/spawn/comm_disconnect.c b/src/mpi/spawn/comm_disconnect.c
index a56a3f6..6f627b8 100644
--- a/src/mpi/spawn/comm_disconnect.c
+++ b/src/mpi/spawn/comm_disconnect.c
@@ -55,12 +55,12 @@ int MPI_Comm_disconnect(MPI_Comm * comm)
     static const char FCNAME[] = "MPI_Comm_disconnect";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_DISCONNECT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_DISCONNECT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_DISCONNECT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_DISCONNECT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -104,12 +104,12 @@ int MPI_Comm_disconnect(MPI_Comm * comm)
      * those complete).
      */
     /* FIXME-MT should we be checking this? */
-    if (MPIU_Object_get_ref(comm_ptr) > 1)
+    if (MPIR_Object_get_ref(comm_ptr) > 1)
     {
 	MPID_Progress_state progress_state;
 	
 	MPID_Progress_start(&progress_state);
-	while (MPIU_Object_get_ref(comm_ptr) > 1)
+	while (MPIR_Object_get_ref(comm_ptr) > 1)
 	{
 	    mpi_errno = MPID_Progress_wait(&progress_state);
 	    /* --BEGIN ERROR HANDLING-- */
@@ -131,7 +131,7 @@ int MPI_Comm_disconnect(MPI_Comm * comm)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_DISCONNECT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_DISCONNECT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/spawn/comm_get_parent.c b/src/mpi/spawn/comm_get_parent.c
index 23805a4..8b4cba2 100644
--- a/src/mpi/spawn/comm_get_parent.c
+++ b/src/mpi/spawn/comm_get_parent.c
@@ -64,11 +64,11 @@ int MPI_Comm_get_parent(MPI_Comm *parent)
     static const char FCNAME[] = "MPI_Comm_get_parent";
 #endif
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_GET_PARENT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_GET_PARENT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_GET_PARENT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_GET_PARENT);
 
 #   ifdef HAVE_ERROR_CHECKING
     {
@@ -90,7 +90,7 @@ int MPI_Comm_get_parent(MPI_Comm *parent)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_GET_PARENT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_GET_PARENT);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/spawn/comm_join.c b/src/mpi/spawn/comm_join.c
index ee5837d..12ca669 100644
--- a/src/mpi/spawn/comm_join.c
+++ b/src/mpi/spawn/comm_join.c
@@ -139,18 +139,18 @@ int MPI_Comm_join(int fd, MPI_Comm *intercomm)
     int mpi_errno = MPI_SUCCESS, err;
     MPIR_Comm *intercomm_ptr;
     char *local_port, *remote_port;
-    MPIU_CHKLMEM_DECL(2);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_JOIN);
+    MPIR_CHKLMEM_DECL(2);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_JOIN);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_JOIN);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_JOIN);
 
     /* ... body of routine ...  */
     
-    MPIU_CHKLMEM_MALLOC(local_port, char *, MPI_MAX_PORT_NAME, mpi_errno, "local port name");
-    MPIU_CHKLMEM_MALLOC(remote_port, char *, MPI_MAX_PORT_NAME, mpi_errno, "remote port name");
+    MPIR_CHKLMEM_MALLOC(local_port, char *, MPI_MAX_PORT_NAME, mpi_errno, "local port name");
+    MPIR_CHKLMEM_MALLOC(remote_port, char *, MPI_MAX_PORT_NAME, mpi_errno, "remote port name");
 
     MPL_VG_MEM_INIT(local_port, MPI_MAX_PORT_NAME * sizeof(char));
     
@@ -186,8 +186,8 @@ int MPI_Comm_join(int fd, MPI_Comm *intercomm)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_JOIN);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_JOIN);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/spawn/comm_spawn.c b/src/mpi/spawn/comm_spawn.c
index b2e4ff8..deff61f 100644
--- a/src/mpi/spawn/comm_spawn.c
+++ b/src/mpi/spawn/comm_spawn.c
@@ -68,12 +68,12 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL, *intercomm_ptr;
     MPIR_Info *info_ptr=NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_SPAWN);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_SPAWN);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_SPAWN);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_SPAWN);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -129,7 +129,7 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_SPAWN);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_SPAWN);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/spawn/comm_spawn_multiple.c b/src/mpi/spawn/comm_spawn_multiple.c
index 074637d..2dadbf8 100644
--- a/src/mpi/spawn/comm_spawn_multiple.c
+++ b/src/mpi/spawn/comm_spawn_multiple.c
@@ -73,13 +73,13 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[],
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Comm *intercomm_ptr = NULL;
     MPIR_Info **array_of_info_ptrs = NULL;
-    MPIU_CHKLMEM_DECL(1);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_SPAWN_MULTIPLE);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_COMM_SPAWN_MULTIPLE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_SPAWN_MULTIPLE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_COMM_SPAWN_MULTIPLE);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -127,7 +127,7 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[],
     /* ... body of routine ...  */
     
     if (comm_ptr->rank == root) {
-	MPIU_CHKLMEM_MALLOC(array_of_info_ptrs, MPIR_Info **, count * sizeof(MPIR_Info*), mpi_errno, "array of info pointers");
+	MPIR_CHKLMEM_MALLOC(array_of_info_ptrs, MPIR_Info **, count * sizeof(MPIR_Info*), mpi_errno, "array of info pointers");
 	for (i=0; i<count; i++)
 	{
 	    MPIR_Info_get_ptr(array_of_info[i], array_of_info_ptrs[i]);
@@ -149,8 +149,8 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[],
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_SPAWN_MULTIPLE);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_COMM_SPAWN_MULTIPLE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/spawn/lookup_name.c b/src/mpi/spawn/lookup_name.c
index 82e735c..03a604e 100644
--- a/src/mpi/spawn/lookup_name.c
+++ b/src/mpi/spawn/lookup_name.c
@@ -68,11 +68,11 @@ int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name)
     static const char FCNAME[] = "MPI_Lookup_name";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_LOOKUP_NAME);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_LOOKUP_NAME);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_LOOKUP_NAME);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_LOOKUP_NAME);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -138,7 +138,7 @@ int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_LOOKUP_NAME);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_LOOKUP_NAME);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/spawn/open_port.c b/src/mpi/spawn/open_port.c
index f7d212a..b47554e 100644
--- a/src/mpi/spawn/open_port.c
+++ b/src/mpi/spawn/open_port.c
@@ -69,12 +69,12 @@ int MPI_Open_port(MPI_Info info, char *port_name)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_OPEN_PORT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_OPEN_PORT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_OPEN_PORT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_OPEN_PORT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -111,7 +111,7 @@ int MPI_Open_port(MPI_Info info, char *port_name)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_OPEN_PORT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_OPEN_PORT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/spawn/publish_name.c b/src/mpi/spawn/publish_name.c
index 03105a0..db646f3 100644
--- a/src/mpi/spawn/publish_name.c
+++ b/src/mpi/spawn/publish_name.c
@@ -58,12 +58,12 @@ int MPI_Publish_name(const char *service_name, MPI_Info info, const char *port_n
     static const char FCNAME[] = "MPI_Publish_name";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_PUBLISH_NAME);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_PUBLISH_NAME);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_PUBLISH_NAME);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_PUBLISH_NAME);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -121,7 +121,7 @@ int MPI_Publish_name(const char *service_name, MPI_Info info, const char *port_n
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_PUBLISH_NAME);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_PUBLISH_NAME);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
     
diff --git a/src/mpi/spawn/unpublish_name.c b/src/mpi/spawn/unpublish_name.c
index 14f4927..5094f56 100644
--- a/src/mpi/spawn/unpublish_name.c
+++ b/src/mpi/spawn/unpublish_name.c
@@ -55,12 +55,12 @@ int MPI_Unpublish_name(const char *service_name, MPI_Info info, const char *port
     static const char FCNAME[] = "MPI_Unpublish_name";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Info *info_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_UNPUBLISH_NAME);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_UNPUBLISH_NAME);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);  
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_UNPUBLISH_NAME);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_UNPUBLISH_NAME);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -121,7 +121,7 @@ int MPI_Unpublish_name(const char *service_name, MPI_Info info, const char *port
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_UNPUBLISH_NAME);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_UNPUBLISH_NAME);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/timer/wtick.c b/src/mpi/timer/wtick.c
index b3f056c..de2a497 100644
--- a/src/mpi/timer/wtick.c
+++ b/src/mpi/timer/wtick.c
@@ -44,13 +44,13 @@ double MPI_Wtick(void) __attribute__((weak,alias("PMPI_Wtick")));
 double MPI_Wtick( void )
 {
     double tick;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WTICK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WTICK);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WTICK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WTICK);
     MPID_Wtick(&tick);
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WTICK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WTICK);
 
     return tick;
 }
diff --git a/src/mpi/timer/wtime.c b/src/mpi/timer/wtime.c
index a0338f0..f8dc7d1 100644
--- a/src/mpi/timer/wtime.c
+++ b/src/mpi/timer/wtime.c
@@ -48,14 +48,14 @@ double MPI_Wtime( void )
 {
     double d;
     MPID_Time_t t;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_WTIME);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_WTIME);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_WTIME);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_WTIME);
     MPID_Wtime( &t );
     MPID_Wtime_todouble( &t, &d );
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_WTIME);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_WTIME);
 
     return d;
 }
diff --git a/src/mpi/topo/cart_coords.c b/src/mpi/topo/cart_coords.c
index d54747c..32c1176 100644
--- a/src/mpi/topo/cart_coords.c
+++ b/src/mpi/topo/cart_coords.c
@@ -61,11 +61,11 @@ int MPI_Cart_coords(MPI_Comm comm, int rank, int maxdims, int coords[])
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Topology *cart_ptr;
     int i, nnodes;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_CART_COORDS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_CART_COORDS);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_CART_COORDS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_CART_COORDS);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -127,7 +127,7 @@ int MPI_Cart_coords(MPI_Comm comm, int rank, int maxdims, int coords[])
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_CART_COORDS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_CART_COORDS);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/topo/cart_create.c b/src/mpi/topo/cart_create.c
index 99f39a0..ab59b95 100644
--- a/src/mpi/topo/cart_create.c
+++ b/src/mpi/topo/cart_create.c
@@ -41,7 +41,7 @@ int MPIR_Cart_create( MPIR_Comm *comm_ptr, int ndims, const int dims[],
     int i, newsize, rank, nranks, mpi_errno = MPI_SUCCESS;
     MPIR_Comm *newcomm_ptr = NULL;
     MPIR_Topology *cart_ptr = NULL;
-    MPIU_CHKPMEM_DECL(4);
+    MPIR_CHKPMEM_DECL(4);
     
     /* Set this as null incase we exit with an error */
     *comm_cart = MPI_COMM_NULL;
@@ -70,7 +70,7 @@ int MPIR_Cart_create( MPIR_Comm *comm_ptr, int ndims, const int dims[],
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 	    
 	    /* Create the topology structure */
-	    MPIU_CHKPMEM_MALLOC(cart_ptr,MPIR_Topology*,sizeof(MPIR_Topology),
+	    MPIR_CHKPMEM_MALLOC(cart_ptr,MPIR_Topology*,sizeof(MPIR_Topology),
 				mpi_errno, "cart_ptr" );
 	    
 	    cart_ptr->kind               = MPI_CART;
@@ -80,11 +80,11 @@ int MPIR_Cart_create( MPIR_Comm *comm_ptr, int ndims, const int dims[],
 	    /* make mallocs of size 1 int so that they get freed as part of the 
 	       normal free mechanism */
 	    
-	    MPIU_CHKPMEM_MALLOC(cart_ptr->topo.cart.dims,int*,sizeof(int),
+	    MPIR_CHKPMEM_MALLOC(cart_ptr->topo.cart.dims,int*,sizeof(int),
 				mpi_errno, "cart.dims");
-	    MPIU_CHKPMEM_MALLOC(cart_ptr->topo.cart.periodic,int*,sizeof(int),
+	    MPIR_CHKPMEM_MALLOC(cart_ptr->topo.cart.periodic,int*,sizeof(int),
 				mpi_errno, "cart.periodic");
-	    MPIU_CHKPMEM_MALLOC(cart_ptr->topo.cart.position,int*,sizeof(int),
+	    MPIR_CHKPMEM_MALLOC(cart_ptr->topo.cart.position,int*,sizeof(int),
 				mpi_errno, "cart.position");
 	}
 	else {
@@ -112,7 +112,7 @@ int MPIR_Cart_create( MPIR_Comm *comm_ptr, int ndims, const int dims[],
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
         } else {
-	    mpi_errno = MPIR_Comm_copy( (MPIR_Comm *)comm_ptr, newsize,
+	    mpi_errno = MPII_Comm_copy( (MPIR_Comm *)comm_ptr, newsize,
 					&newcomm_ptr );
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 	    rank   = comm_ptr->rank;
@@ -126,17 +126,17 @@ int MPIR_Cart_create( MPIR_Comm *comm_ptr, int ndims, const int dims[],
 	}
 	
 	/* Create the topololgy structure */
-	MPIU_CHKPMEM_MALLOC(cart_ptr,MPIR_Topology*,sizeof(MPIR_Topology),
+	MPIR_CHKPMEM_MALLOC(cart_ptr,MPIR_Topology*,sizeof(MPIR_Topology),
 			    mpi_errno, "cart_ptr" );
 	
 	cart_ptr->kind               = MPI_CART;
 	cart_ptr->topo.cart.nnodes   = newsize;
 	cart_ptr->topo.cart.ndims    = ndims;
-	MPIU_CHKPMEM_MALLOC(cart_ptr->topo.cart.dims,int*,ndims*sizeof(int),
+	MPIR_CHKPMEM_MALLOC(cart_ptr->topo.cart.dims,int*,ndims*sizeof(int),
 			    mpi_errno, "cart.dims");
-	MPIU_CHKPMEM_MALLOC(cart_ptr->topo.cart.periodic,int*,ndims*sizeof(int),
+	MPIR_CHKPMEM_MALLOC(cart_ptr->topo.cart.periodic,int*,ndims*sizeof(int),
 			    mpi_errno, "cart.periodic");
-	MPIU_CHKPMEM_MALLOC(cart_ptr->topo.cart.position,int*,ndims*sizeof(int),
+	MPIR_CHKPMEM_MALLOC(cart_ptr->topo.cart.position,int*,ndims*sizeof(int),
 			    mpi_errno, "cart.position");
 	nranks = newsize;
 	for (i=0; i<ndims; i++)
@@ -162,7 +162,7 @@ int MPIR_Cart_create( MPIR_Comm *comm_ptr, int ndims, const int dims[],
 
   fn_fail:
     /* --BEGIN ERROR HANDLING-- */
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     /* --END ERROR HANDLING-- */
     goto fn_exit;
 }
@@ -240,12 +240,12 @@ int MPI_Cart_create(MPI_Comm comm_old, int ndims, const int dims[],
 {
     int       mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_CART_CREATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_CART_CREATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_CART_CREATE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_CART_CREATE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -305,7 +305,7 @@ int MPI_Cart_create(MPI_Comm comm_old, int ndims, const int dims[],
     /* ... end of body of routine ... */
 
  fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_CART_CREATE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_CART_CREATE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/topo/cart_get.c b/src/mpi/topo/cart_get.c
index f1ec93b..c92d5f7 100644
--- a/src/mpi/topo/cart_get.c
+++ b/src/mpi/topo/cart_get.c
@@ -65,11 +65,11 @@ int MPI_Cart_get(MPI_Comm comm, int maxdims, int dims[], int periods[],
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Topology *cart_ptr;
     int i, n, *vals;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_CART_GET);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_CART_GET);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_CART_GET);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_CART_GET);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -140,7 +140,7 @@ int MPI_Cart_get(MPI_Comm comm, int maxdims, int dims[], int periods[],
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_CART_GET);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_CART_GET);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/topo/cart_map.c b/src/mpi/topo/cart_map.c
index 4ef7039..3689ed9 100644
--- a/src/mpi/topo/cart_map.c
+++ b/src/mpi/topo/cart_map.c
@@ -131,11 +131,11 @@ int MPI_Cart_map(MPI_Comm comm, int ndims, const int dims[], const int periods[]
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_CART_MAP);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_CART_MAP);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_CART_MAP);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_CART_MAP);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -183,7 +183,7 @@ int MPI_Cart_map(MPI_Comm comm, int ndims, const int dims[], const int periods[]
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_CART_MAP);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_CART_MAP);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/topo/cart_rank.c b/src/mpi/topo/cart_rank.c
index 5942542..56c97fd 100644
--- a/src/mpi/topo/cart_rank.c
+++ b/src/mpi/topo/cart_rank.c
@@ -92,11 +92,11 @@ int MPI_Cart_rank(MPI_Comm comm, const int coords[], int *rank)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Topology *cart_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_CART_RANK);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_CART_RANK);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_CART_RANK);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_CART_RANK);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -156,7 +156,7 @@ int MPI_Cart_rank(MPI_Comm comm, const int coords[], int *rank)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_CART_RANK);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_CART_RANK);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/topo/cart_shift.c b/src/mpi/topo/cart_shift.c
index 0c4985d..6ea8a1e 100644
--- a/src/mpi/topo/cart_shift.c
+++ b/src/mpi/topo/cart_shift.c
@@ -123,11 +123,11 @@ int MPI_Cart_shift(MPI_Comm comm, int direction, int disp, int *rank_source,
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_CART_SHIFT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_CART_SHIFT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_CART_SHIFT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_CART_SHIFT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -171,7 +171,7 @@ int MPI_Cart_shift(MPI_Comm comm, int direction, int disp, int *rank_source,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_CART_SHIFT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_CART_SHIFT);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/topo/cart_sub.c b/src/mpi/topo/cart_sub.c
index 014f20b..318b510 100644
--- a/src/mpi/topo/cart_sub.c
+++ b/src/mpi/topo/cart_sub.c
@@ -62,13 +62,13 @@ int MPI_Cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *newcomm)
     int ndims, key, color, ndims_in_subcomm, nnodes_in_subcomm, i, j, rank;
     MPIR_Comm *comm_ptr = NULL, *newcomm_ptr;
     MPIR_Topology *topo_ptr, *toponew_ptr;
-    MPIU_CHKPMEM_DECL(4);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_CART_SUB);
+    MPIR_CHKPMEM_DECL(4);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_CART_SUB);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_CART_SUB);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_CART_SUB);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -154,18 +154,18 @@ int MPI_Cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *newcomm)
         *newcomm = newcomm_ptr->handle;
 	
 	/* Save the topology of this new communicator */
-	MPIU_CHKPMEM_MALLOC(toponew_ptr,MPIR_Topology*,sizeof(MPIR_Topology),
+	MPIR_CHKPMEM_MALLOC(toponew_ptr,MPIR_Topology*,sizeof(MPIR_Topology),
 			    mpi_errno,"toponew_ptr");
 	
 	toponew_ptr->kind		  = MPI_CART;
 	toponew_ptr->topo.cart.ndims  = ndims_in_subcomm;
 	toponew_ptr->topo.cart.nnodes = nnodes_in_subcomm;
 	if (ndims_in_subcomm) {
-	    MPIU_CHKPMEM_MALLOC(toponew_ptr->topo.cart.dims,int*,
+	    MPIR_CHKPMEM_MALLOC(toponew_ptr->topo.cart.dims,int*,
 				ndims_in_subcomm*sizeof(int),mpi_errno,"cart.dims");
-	    MPIU_CHKPMEM_MALLOC(toponew_ptr->topo.cart.periodic,int*,
+	    MPIR_CHKPMEM_MALLOC(toponew_ptr->topo.cart.periodic,int*,
 				ndims_in_subcomm*sizeof(int),mpi_errno,"cart.periodic");
-	    MPIU_CHKPMEM_MALLOC(toponew_ptr->topo.cart.position,int*,
+	    MPIR_CHKPMEM_MALLOC(toponew_ptr->topo.cart.position,int*,
 				ndims_in_subcomm*sizeof(int),mpi_errno,"cart.position");
 	}
 	else {
@@ -197,13 +197,13 @@ int MPI_Cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *newcomm)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_CART_SUB);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_CART_SUB);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
   fn_fail:
     /* --BEGIN ERROR HANDLING-- */
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
 #   ifdef HAVE_ERROR_CHECKING
     {
 	mpi_errno = MPIR_Err_create_code(
diff --git a/src/mpi/topo/cartdim_get.c b/src/mpi/topo/cartdim_get.c
index a6b7a9e..489cf8b 100644
--- a/src/mpi/topo/cartdim_get.c
+++ b/src/mpi/topo/cartdim_get.c
@@ -56,11 +56,11 @@ int MPI_Cartdim_get(MPI_Comm comm, int *ndims)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Topology *cart_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_CARTDIM_GET);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_CARTDIM_GET);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_CARTDIM_GET);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_CARTDIM_GET);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -102,7 +102,7 @@ int MPI_Cartdim_get(MPI_Comm comm, int *ndims)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_CARTDIM_GET);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_CARTDIM_GET);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/topo/dims_create.c b/src/mpi/topo/dims_create.c
index f3320c2..4b6e1e5 100644
--- a/src/mpi/topo/dims_create.c
+++ b/src/mpi/topo/dims_create.c
@@ -350,11 +350,11 @@ Input/Output Parameters:
 int MPI_Dims_create(int nnodes, int ndims, int dims[])
 {
     int mpi_errno = MPI_SUCCESS;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_DIMS_CREATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_DIMS_CREATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_DIMS_CREATE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_DIMS_CREATE);
 
     if (ndims == 0) goto fn_exit;
     
@@ -382,7 +382,7 @@ int MPI_Dims_create(int nnodes, int ndims, int dims[])
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_DIMS_CREATE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_DIMS_CREATE);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/topo/dist_gr_create.c b/src/mpi/topo/dist_gr_create.c
index 02032ca..1d60956 100644
--- a/src/mpi/topo/dist_gr_create.c
+++ b/src/mpi/topo/dist_gr_create.c
@@ -76,7 +76,7 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[],
     MPIR_Comm *comm_dist_graph_ptr = NULL;
     MPIR_Request **reqs = NULL;
     MPIR_Topology *topo_ptr = NULL;
-    MPIR_Dist_graph_topology *dist_graph_ptr = NULL;
+    MPII_Dist_graph_topology *dist_graph_ptr = NULL;
     int i;
     int j;
     int idx;
@@ -92,14 +92,14 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[],
     int *rs;
     int in_out_peers[2] = {-1, -1};
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPIU_CHKLMEM_DECL(9);
-    MPIU_CHKPMEM_DECL(1);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_DIST_GRAPH_CREATE);
+    MPIR_CHKLMEM_DECL(9);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_DIST_GRAPH_CREATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_DIST_GRAPH_CREATE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_DIST_GRAPH_CREATE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -163,9 +163,9 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[],
 
     /* following the spirit of the old topo interface, attributes do not
      * propagate to the new communicator (see MPI-2.1 pp. 243 line 11) */
-    mpi_errno = MPIR_Comm_copy(comm_ptr, comm_size, &comm_dist_graph_ptr);
+    mpi_errno = MPII_Comm_copy(comm_ptr, comm_size, &comm_dist_graph_ptr);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_Assert(comm_dist_graph_ptr != NULL);
+    MPIR_Assert(comm_dist_graph_ptr != NULL);
 
     /* rin is an array of size comm_size containing pointers to arrays of
      * rin_sizes[x].  rin[x] is locally known number of edges into this process
@@ -174,12 +174,12 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[],
      * rout is an array of comm_size containing pointers to arrays of
      * rout_sizes[x].  rout[x] is the locally known number of edges out of this
      * process to rank x. */
-    MPIU_CHKLMEM_MALLOC(rout,       int **, comm_size*sizeof(int*), mpi_errno, "rout");
-    MPIU_CHKLMEM_MALLOC(rin,        int **, comm_size*sizeof(int*), mpi_errno, "rin");
-    MPIU_CHKLMEM_MALLOC(rin_sizes,  int *, comm_size*sizeof(int), mpi_errno, "rin_sizes");
-    MPIU_CHKLMEM_MALLOC(rout_sizes, int *, comm_size*sizeof(int), mpi_errno, "rout_sizes");
-    MPIU_CHKLMEM_MALLOC(rin_idx,    int *, comm_size*sizeof(int), mpi_errno, "rin_idx");
-    MPIU_CHKLMEM_MALLOC(rout_idx,   int *, comm_size*sizeof(int), mpi_errno, "rout_idx");
+    MPIR_CHKLMEM_MALLOC(rout,       int **, comm_size*sizeof(int*), mpi_errno, "rout");
+    MPIR_CHKLMEM_MALLOC(rin,        int **, comm_size*sizeof(int*), mpi_errno, "rin");
+    MPIR_CHKLMEM_MALLOC(rin_sizes,  int *, comm_size*sizeof(int), mpi_errno, "rin_sizes");
+    MPIR_CHKLMEM_MALLOC(rout_sizes, int *, comm_size*sizeof(int), mpi_errno, "rout_sizes");
+    MPIR_CHKLMEM_MALLOC(rin_idx,    int *, comm_size*sizeof(int), mpi_errno, "rin_idx");
+    MPIR_CHKLMEM_MALLOC(rout_idx,   int *, comm_size*sizeof(int), mpi_errno, "rout_idx");
 
     memset(rout,       0, comm_size*sizeof(int*));
     memset(rin,        0, comm_size*sizeof(int*));
@@ -191,9 +191,9 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[],
     /* compute array sizes */
     idx = 0;
     for (i = 0; i < n; ++i) {
-        MPIU_Assert(sources[i] < comm_size);
+        MPIR_Assert(sources[i] < comm_size);
         for (j = 0; j < degrees[i]; ++j) {
-            MPIU_Assert(destinations[idx] < comm_size);
+            MPIR_Assert(destinations[idx] < comm_size);
             /* rout_sizes[i] is twice as long as the number of edges to be
              * sent to rank i by this process */
             rout_sizes[sources[i]] += 2;
@@ -218,15 +218,15 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[],
     for (i = 0; i < n; ++i) {
         /* TODO add this assert as proper error checking above */
         int s_rank = sources[i];
-        MPIU_Assert(s_rank < comm_size);
-        MPIU_Assert(s_rank >= 0);
+        MPIR_Assert(s_rank < comm_size);
+        MPIR_Assert(s_rank >= 0);
 
         for (j = 0; j < degrees[i]; ++j) {
             int d_rank = destinations[idx];
             int weight = (weights == MPI_UNWEIGHTED ? 0 : weights[idx]);
             /* TODO add this assert as proper error checking above */
-            MPIU_Assert(d_rank < comm_size);
-            MPIU_Assert(d_rank >= 0);
+            MPIR_Assert(d_rank < comm_size);
+            MPIR_Assert(d_rank >= 0);
 
             /* XXX DJG what about self-edges? do we need to drop one of these
              * cases when there is a self-edge to avoid double-counting? */
@@ -249,11 +249,11 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[],
 
     for (i = 0; i < comm_size; ++i) {
         /* sanity check that all arrays are fully populated*/
-        MPIU_Assert(rin_idx[i] == rin_sizes[i]);
-        MPIU_Assert(rout_idx[i] == rout_sizes[i]);
+        MPIR_Assert(rin_idx[i] == rin_sizes[i]);
+        MPIR_Assert(rout_idx[i] == rout_sizes[i]);
     }
 
-    MPIU_CHKLMEM_MALLOC(rs, int *, 2*comm_size*sizeof(int), mpi_errno, "red-scat source buffer");
+    MPIR_CHKLMEM_MALLOC(rs, int *, 2*comm_size*sizeof(int), mpi_errno, "red-scat source buffer");
     for (i = 0; i < comm_size; ++i) {
         rs[2*i]   = (rin_sizes[i]  ? 1 : 0);
         rs[2*i+1] = (rout_sizes[i] ? 1 : 0);
@@ -264,13 +264,13 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[],
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
 
-    MPIU_Assert(in_out_peers[0] <= comm_size && in_out_peers[0] >= 0);
-    MPIU_Assert(in_out_peers[1] <= comm_size && in_out_peers[1] >= 0);
+    MPIR_Assert(in_out_peers[0] <= comm_size && in_out_peers[0] >= 0);
+    MPIR_Assert(in_out_peers[1] <= comm_size && in_out_peers[1] >= 0);
 
     idx = 0;
     /* must be 2*comm_size requests because we will possibly send inbound and
      * outbound edges to everyone in our communicator */
-    MPIU_CHKLMEM_MALLOC(reqs, MPIR_Request **, 2*comm_size*sizeof(MPIR_Request *), mpi_errno, "temp request array");
+    MPIR_CHKLMEM_MALLOC(reqs, MPIR_Request **, 2*comm_size*sizeof(MPIR_Request *), mpi_errno, "temp request array");
     for (i = 0; i < comm_size; ++i) {
         if (rin_sizes[i]) {
             /* send edges where i is a destination to process i */
@@ -283,10 +283,10 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[],
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
     }
-    MPIU_Assert(idx <= (2 * comm_size));
+    MPIR_Assert(idx <= (2 * comm_size));
 
     /* Create the topology structure */
-    MPIU_CHKPMEM_MALLOC(topo_ptr, MPIR_Topology *, sizeof(MPIR_Topology), mpi_errno, "topo_ptr");
+    MPIR_CHKPMEM_MALLOC(topo_ptr, MPIR_Topology *, sizeof(MPIR_Topology), mpi_errno, "topo_ptr");
     topo_ptr->kind = MPI_DIST_GRAPH;
     dist_graph_ptr = &topo_ptr->topo.dist_graph;
     dist_graph_ptr->indegree = 0;
@@ -330,10 +330,10 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[],
             if (deg >= in_capacity) {
                 in_capacity *= 2;
                 /* FIXME: buf is never freed on error! */
-                MPIU_REALLOC_ORJUMP(dist_graph_ptr->in, in_capacity*sizeof(int), mpi_errno);
+                MPIR_REALLOC_ORJUMP(dist_graph_ptr->in, in_capacity*sizeof(int), mpi_errno);
                 if (dist_graph_ptr->is_weighted)
                     /* FIXME: buf is never freed on error! */
-                    MPIU_REALLOC_ORJUMP(dist_graph_ptr->in_weights, in_capacity*sizeof(int), mpi_errno);
+                    MPIR_REALLOC_ORJUMP(dist_graph_ptr->in_weights, in_capacity*sizeof(int), mpi_errno);
             }
             dist_graph_ptr->in[deg] = buf[2*j];
             if (dist_graph_ptr->is_weighted)
@@ -364,10 +364,10 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[],
             if (deg >= out_capacity) {
                 out_capacity *= 2;
                 /* FIXME: buf is never freed on error! */
-                MPIU_REALLOC_ORJUMP(dist_graph_ptr->out, out_capacity*sizeof(int), mpi_errno);
+                MPIR_REALLOC_ORJUMP(dist_graph_ptr->out, out_capacity*sizeof(int), mpi_errno);
                 if (dist_graph_ptr->is_weighted)
                     /* FIXME: buf is never freed on error! */
-                    MPIU_REALLOC_ORJUMP(dist_graph_ptr->out_weights, out_capacity*sizeof(int), mpi_errno);
+                    MPIR_REALLOC_ORJUMP(dist_graph_ptr->out_weights, out_capacity*sizeof(int), mpi_errno);
             }
             dist_graph_ptr->out[deg] = buf[2*j];
             if (dist_graph_ptr->is_weighted)
@@ -380,17 +380,17 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[],
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
     /* remove any excess memory allocation */
-    MPIU_REALLOC_ORJUMP(dist_graph_ptr->in, dist_graph_ptr->indegree*sizeof(int), mpi_errno);
-    MPIU_REALLOC_ORJUMP(dist_graph_ptr->out, dist_graph_ptr->outdegree*sizeof(int), mpi_errno);
+    MPIR_REALLOC_ORJUMP(dist_graph_ptr->in, dist_graph_ptr->indegree*sizeof(int), mpi_errno);
+    MPIR_REALLOC_ORJUMP(dist_graph_ptr->out, dist_graph_ptr->outdegree*sizeof(int), mpi_errno);
     if (dist_graph_ptr->is_weighted) {
-        MPIU_REALLOC_ORJUMP(dist_graph_ptr->in_weights, dist_graph_ptr->indegree*sizeof(int), mpi_errno);
-        MPIU_REALLOC_ORJUMP(dist_graph_ptr->out_weights, dist_graph_ptr->outdegree*sizeof(int), mpi_errno);
+        MPIR_REALLOC_ORJUMP(dist_graph_ptr->in_weights, dist_graph_ptr->indegree*sizeof(int), mpi_errno);
+        MPIR_REALLOC_ORJUMP(dist_graph_ptr->out_weights, dist_graph_ptr->outdegree*sizeof(int), mpi_errno);
     }
 
     mpi_errno = MPIR_Topology_put(comm_dist_graph_ptr, topo_ptr);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
 
     MPIR_OBJ_PUBLISH_HANDLE(*comm_dist_graph, comm_dist_graph_ptr->handle);
 
@@ -404,9 +404,9 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[],
             MPL_free(rout[i]);
     }
 
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
 
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_DIST_GRAPH_CREATE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_DIST_GRAPH_CREATE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
@@ -420,7 +420,7 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[],
         MPL_free(dist_graph_ptr->out);
     if (dist_graph_ptr && dist_graph_ptr->out_weights)
         MPL_free(dist_graph_ptr->out_weights);
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
 #ifdef HAVE_ERROR_CHECKING
     mpi_errno = MPIR_Err_create_code(
         mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER,
diff --git a/src/mpi/topo/dist_gr_create_adj.c b/src/mpi/topo/dist_gr_create_adj.c
index 454e6a8..1c5b4e1 100644
--- a/src/mpi/topo/dist_gr_create_adj.c
+++ b/src/mpi/topo/dist_gr_create_adj.c
@@ -77,14 +77,14 @@ int MPI_Dist_graph_create_adjacent(MPI_Comm comm_old,
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Comm *comm_dist_graph_ptr = NULL;
     MPIR_Topology *topo_ptr = NULL;
-    MPIR_Dist_graph_topology *dist_graph_ptr = NULL;
-    MPIU_CHKPMEM_DECL(5);
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_DIST_GRAPH_CREATE_ADJACENT);
+    MPII_Dist_graph_topology *dist_graph_ptr = NULL;
+    MPIR_CHKPMEM_DECL(5);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_DIST_GRAPH_CREATE_ADJACENT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_DIST_GRAPH_CREATE_ADJACENT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_DIST_GRAPH_CREATE_ADJACENT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -146,11 +146,11 @@ int MPI_Dist_graph_create_adjacent(MPI_Comm comm_old,
 
     /* following the spirit of the old topo interface, attributes do not
      * propagate to the new communicator (see MPI-2.1 pp. 243 line 11) */
-    mpi_errno = MPIR_Comm_copy(comm_ptr, comm_ptr->local_size, &comm_dist_graph_ptr);
+    mpi_errno = MPII_Comm_copy(comm_ptr, comm_ptr->local_size, &comm_dist_graph_ptr);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
     /* Create the topology structure */
-    MPIU_CHKPMEM_MALLOC(topo_ptr, MPIR_Topology *, sizeof(MPIR_Topology), mpi_errno, "topo_ptr");
+    MPIR_CHKPMEM_MALLOC(topo_ptr, MPIR_Topology *, sizeof(MPIR_Topology), mpi_errno, "topo_ptr");
     topo_ptr->kind = MPI_DIST_GRAPH;
     dist_graph_ptr = &topo_ptr->topo.dist_graph;
     dist_graph_ptr->indegree = indegree;
@@ -161,32 +161,32 @@ int MPI_Dist_graph_create_adjacent(MPI_Comm comm_old,
     dist_graph_ptr->out_weights = NULL;
     dist_graph_ptr->is_weighted = (sourceweights != MPI_UNWEIGHTED);
 
-    MPIU_CHKPMEM_MALLOC(dist_graph_ptr->in, int *, indegree*sizeof(int), mpi_errno, "dist_graph_ptr->in");
-    MPIU_CHKPMEM_MALLOC(dist_graph_ptr->out, int *, outdegree*sizeof(int), mpi_errno, "dist_graph_ptr->out");
-    MPIU_Memcpy(dist_graph_ptr->in, sources, indegree*sizeof(int));
-    MPIU_Memcpy(dist_graph_ptr->out, destinations, outdegree*sizeof(int));
+    MPIR_CHKPMEM_MALLOC(dist_graph_ptr->in, int *, indegree*sizeof(int), mpi_errno, "dist_graph_ptr->in");
+    MPIR_CHKPMEM_MALLOC(dist_graph_ptr->out, int *, outdegree*sizeof(int), mpi_errno, "dist_graph_ptr->out");
+    MPIR_Memcpy(dist_graph_ptr->in, sources, indegree*sizeof(int));
+    MPIR_Memcpy(dist_graph_ptr->out, destinations, outdegree*sizeof(int));
 
     if (dist_graph_ptr->is_weighted) {
-        MPIU_CHKPMEM_MALLOC(dist_graph_ptr->in_weights, int *, indegree*sizeof(int), mpi_errno, "dist_graph_ptr->in_weights");
-        MPIU_CHKPMEM_MALLOC(dist_graph_ptr->out_weights, int *, outdegree*sizeof(int), mpi_errno, "dist_graph_ptr->out_weights");
-        MPIU_Memcpy(dist_graph_ptr->in_weights, sourceweights, indegree*sizeof(int));
-        MPIU_Memcpy(dist_graph_ptr->out_weights, destweights, outdegree*sizeof(int));
+        MPIR_CHKPMEM_MALLOC(dist_graph_ptr->in_weights, int *, indegree*sizeof(int), mpi_errno, "dist_graph_ptr->in_weights");
+        MPIR_CHKPMEM_MALLOC(dist_graph_ptr->out_weights, int *, outdegree*sizeof(int), mpi_errno, "dist_graph_ptr->out_weights");
+        MPIR_Memcpy(dist_graph_ptr->in_weights, sourceweights, indegree*sizeof(int));
+        MPIR_Memcpy(dist_graph_ptr->out_weights, destweights, outdegree*sizeof(int));
     }
 
     mpi_errno = MPIR_Topology_put(comm_dist_graph_ptr, topo_ptr);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
     MPIR_OBJ_PUBLISH_HANDLE(*comm_dist_graph, comm_dist_graph_ptr->handle);
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
     /* ... end of body of routine ... */
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_DIST_GRAPH_CREATE_ADJACENT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_DIST_GRAPH_CREATE_ADJACENT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
 #ifdef HAVE_ERROR_CHECKING
     mpi_errno = MPIR_Err_create_code(
         mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER,
diff --git a/src/mpi/topo/dist_gr_neighb.c b/src/mpi/topo/dist_gr_neighb.c
index dd7a128..fa7811e 100644
--- a/src/mpi/topo/dist_gr_neighb.c
+++ b/src/mpi/topo/dist_gr_neighb.c
@@ -42,14 +42,14 @@ int MPIR_Dist_graph_neighbors_impl(MPIR_Comm *comm_ptr,
     topo_ptr = MPIR_Topology_get(comm_ptr);
     MPIR_ERR_CHKANDJUMP(!topo_ptr || topo_ptr->kind != MPI_DIST_GRAPH, mpi_errno, MPI_ERR_TOPOLOGY, "**notdistgraphtopo");
 
-    MPIU_Memcpy(sources, topo_ptr->topo.dist_graph.in, maxindegree*sizeof(int));
-    MPIU_Memcpy(destinations, topo_ptr->topo.dist_graph.out, maxoutdegree*sizeof(int));
+    MPIR_Memcpy(sources, topo_ptr->topo.dist_graph.in, maxindegree*sizeof(int));
+    MPIR_Memcpy(destinations, topo_ptr->topo.dist_graph.out, maxoutdegree*sizeof(int));
 
     if (sourceweights != MPI_UNWEIGHTED && topo_ptr->topo.dist_graph.is_weighted) {
-        MPIU_Memcpy(sourceweights, topo_ptr->topo.dist_graph.in_weights, maxindegree*sizeof(int));
+        MPIR_Memcpy(sourceweights, topo_ptr->topo.dist_graph.in_weights, maxindegree*sizeof(int));
     }
     if (destweights != MPI_UNWEIGHTED && topo_ptr->topo.dist_graph.is_weighted) {
-        MPIU_Memcpy(destweights, topo_ptr->topo.dist_graph.out_weights, maxoutdegree*sizeof(int));
+        MPIR_Memcpy(destweights, topo_ptr->topo.dist_graph.out_weights, maxoutdegree*sizeof(int));
     }
 
 fn_exit:
@@ -91,13 +91,13 @@ int MPI_Dist_graph_neighbors(MPI_Comm comm,
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_DIST_GRAPH_NEIGHBORS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_DIST_GRAPH_NEIGHBORS);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     /* FIXME: Why does this routine need a CS */
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_DIST_GRAPH_NEIGHBORS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_DIST_GRAPH_NEIGHBORS);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -142,7 +142,7 @@ int MPI_Dist_graph_neighbors(MPI_Comm comm,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_DIST_GRAPH_NEIGHBORS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_DIST_GRAPH_NEIGHBORS);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/topo/dist_gr_neighb_count.c b/src/mpi/topo/dist_gr_neighb_count.c
index 37ad547..247b33a 100644
--- a/src/mpi/topo/dist_gr_neighb_count.c
+++ b/src/mpi/topo/dist_gr_neighb_count.c
@@ -76,13 +76,13 @@ int MPI_Dist_graph_neighbors_count(MPI_Comm comm, int *indegree, int *outdegree,
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_DIST_GRAPH_NEIGHBORS_COUNT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_DIST_GRAPH_NEIGHBORS_COUNT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
 
     /* FIXME: Why does this routine require a CS? */
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_DIST_GRAPH_NEIGHBORS_COUNT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_DIST_GRAPH_NEIGHBORS_COUNT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -122,7 +122,7 @@ int MPI_Dist_graph_neighbors_count(MPI_Comm comm, int *indegree, int *outdegree,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_DIST_GRAPH_NEIGHBORS_COUNT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_DIST_GRAPH_NEIGHBORS_COUNT);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/topo/graph_get.c b/src/mpi/topo/graph_get.c
index ac34259..f0b8646 100644
--- a/src/mpi/topo/graph_get.c
+++ b/src/mpi/topo/graph_get.c
@@ -62,11 +62,11 @@ int MPI_Graph_get(MPI_Comm comm, int maxindex, int maxedges,
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Topology *topo_ptr;
     int i, n, *vals;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GRAPH_GET);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GRAPH_GET);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GRAPH_GET);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GRAPH_GET);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -124,7 +124,7 @@ int MPI_Graph_get(MPI_Comm comm, int maxindex, int maxedges,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GRAPH_GET);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GRAPH_GET);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/topo/graph_map.c b/src/mpi/topo/graph_map.c
index e6e41a0..863490b 100644
--- a/src/mpi/topo/graph_map.c
+++ b/src/mpi/topo/graph_map.c
@@ -105,11 +105,11 @@ int MPI_Graph_map(MPI_Comm comm, int nnodes, const int indx[], const int edges[]
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GRAPH_MAP);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GRAPH_MAP);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GRAPH_MAP);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GRAPH_MAP);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -154,7 +154,7 @@ int MPI_Graph_map(MPI_Comm comm, int nnodes, const int indx[], const int edges[]
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GRAPH_MAP);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GRAPH_MAP);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/topo/graph_nbr.c b/src/mpi/topo/graph_nbr.c
index 86b87ef..4fc5a99 100644
--- a/src/mpi/topo/graph_nbr.c
+++ b/src/mpi/topo/graph_nbr.c
@@ -93,11 +93,11 @@ int MPI_Graph_neighbors(MPI_Comm comm, int rank, int maxneighbors,
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GRAPH_NEIGHBORS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GRAPH_NEIGHBORS);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GRAPH_NEIGHBORS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GRAPH_NEIGHBORS);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -137,7 +137,7 @@ int MPI_Graph_neighbors(MPI_Comm comm, int rank, int maxneighbors,
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GRAPH_NEIGHBORS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GRAPH_NEIGHBORS);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/topo/graphcreate.c b/src/mpi/topo/graphcreate.c
index 4a11a89..9f830e6 100644
--- a/src/mpi/topo/graphcreate.c
+++ b/src/mpi/topo/graphcreate.c
@@ -49,7 +49,7 @@ int MPIR_Graph_create( MPIR_Comm *comm_ptr, int nnodes,
     int i, nedges;
     MPIR_Comm *newcomm_ptr = NULL;
     MPIR_Topology *graph_ptr = NULL;
-    MPIU_CHKPMEM_DECL(3);
+    MPIR_CHKPMEM_DECL(3);
 
     /* Set this to null in case there is an error */
     *comm_graph = MPI_COMM_NULL;
@@ -71,7 +71,7 @@ int MPIR_Graph_create( MPIR_Comm *comm_ptr, int nnodes,
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     } else {
 	/* Just use the first nnodes processes in the communicator */
-	mpi_errno = MPIR_Comm_copy( (MPIR_Comm *)comm_ptr, nnodes,
+	mpi_errno = MPII_Comm_copy( (MPIR_Comm *)comm_ptr, nnodes,
 				    &newcomm_ptr );
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
@@ -85,15 +85,15 @@ int MPIR_Graph_create( MPIR_Comm *comm_ptr, int nnodes,
     }
 
     nedges = indx[nnodes-1];
-    MPIU_CHKPMEM_MALLOC(graph_ptr,MPIR_Topology*,sizeof(MPIR_Topology),
+    MPIR_CHKPMEM_MALLOC(graph_ptr,MPIR_Topology*,sizeof(MPIR_Topology),
 			mpi_errno,"graph_ptr");
     
     graph_ptr->kind = MPI_GRAPH;
     graph_ptr->topo.graph.nnodes = nnodes;
     graph_ptr->topo.graph.nedges = nedges;
-    MPIU_CHKPMEM_MALLOC(graph_ptr->topo.graph.index,int*,
+    MPIR_CHKPMEM_MALLOC(graph_ptr->topo.graph.index,int*,
 			nnodes*sizeof(int),mpi_errno,"graph.index");
-    MPIU_CHKPMEM_MALLOC(graph_ptr->topo.graph.edges,int*,
+    MPIR_CHKPMEM_MALLOC(graph_ptr->topo.graph.edges,int*,
 			nedges*sizeof(int),mpi_errno,"graph.edges");
     for (i=0; i<nnodes; i++) 
 	graph_ptr->topo.graph.index[i] = indx[i];
@@ -114,7 +114,7 @@ int MPIR_Graph_create( MPIR_Comm *comm_ptr, int nnodes,
 
   fn_fail:
     /* --BEGIN ERROR HANDLING-- */
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
 #   ifdef HAVE_ERROR_CHECKING
     {
 	mpi_errno = MPIR_Err_create_code(
@@ -172,12 +172,12 @@ int MPI_Graph_create(MPI_Comm comm_old, int nnodes, const int indx[],
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GRAPH_CREATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GRAPH_CREATE);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GRAPH_CREATE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GRAPH_CREATE);
     
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -308,7 +308,7 @@ int MPI_Graph_create(MPI_Comm comm_old, int nnodes, const int indx[],
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GRAPH_CREATE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GRAPH_CREATE);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/topo/graphdimsget.c b/src/mpi/topo/graphdimsget.c
index d78d49e..b04e53b 100644
--- a/src/mpi/topo/graphdimsget.c
+++ b/src/mpi/topo/graphdimsget.c
@@ -58,11 +58,11 @@ int MPI_Graphdims_get(MPI_Comm comm, int *nnodes, int *nedges)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Topology *topo_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GRAPHDIMS_GET);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GRAPHDIMS_GET);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GRAPHDIMS_GET);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GRAPHDIMS_GET);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -109,7 +109,7 @@ int MPI_Graphdims_get(MPI_Comm comm, int *nnodes, int *nedges)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GRAPHDIMS_GET);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GRAPHDIMS_GET);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/topo/graphnbrcnt.c b/src/mpi/topo/graphnbrcnt.c
index 003556a..7e844f0 100644
--- a/src/mpi/topo/graphnbrcnt.c
+++ b/src/mpi/topo/graphnbrcnt.c
@@ -85,11 +85,11 @@ int MPI_Graph_neighbors_count(MPI_Comm comm, int rank, int *nneighbors)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_GRAPH_NEIGHBORS_COUNT);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GRAPH_NEIGHBORS_COUNT);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     /* Note that this routine does not require a CS_ENTER/EXIT */
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_GRAPH_NEIGHBORS_COUNT);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GRAPH_NEIGHBORS_COUNT);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -128,7 +128,7 @@ int MPI_Graph_neighbors_count(MPI_Comm comm, int rank, int *nneighbors)
     /* ... end of body of routine ... */
 
   fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_GRAPH_NEIGHBORS_COUNT);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GRAPH_NEIGHBORS_COUNT);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpi/topo/inhb_allgather.c b/src/mpi/topo/inhb_allgather.c
index f6ae155..53de7bc 100644
--- a/src/mpi/topo/inhb_allgather.c
+++ b/src/mpi/topo/inhb_allgather.c
@@ -33,45 +33,45 @@ int MPI_Ineighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sen
 #define FUNCNAME MPIR_Ineighbor_allgather_default
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ineighbor_allgather_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ineighbor_allgather_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int indegree, outdegree, weighted;
     int k,l;
     int *srcs, *dsts;
     MPI_Aint recvtype_extent;
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
 
     MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);
 
     /* This is the largest offset we add to recvbuf */
-    MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+    MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
                                      (comm_ptr->local_size * recvcount * recvtype_extent));
 
     mpi_errno = MPIR_Topo_canon_nhb_count(comm_ptr, &indegree, &outdegree, &weighted);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_CHKLMEM_MALLOC(srcs, int *, indegree*sizeof(int), mpi_errno, "srcs");
-    MPIU_CHKLMEM_MALLOC(dsts, int *, outdegree*sizeof(int), mpi_errno, "dsts");
+    MPIR_CHKLMEM_MALLOC(srcs, int *, indegree*sizeof(int), mpi_errno, "srcs");
+    MPIR_CHKLMEM_MALLOC(dsts, int *, outdegree*sizeof(int), mpi_errno, "dsts");
     mpi_errno = MPIR_Topo_canon_nhb(comm_ptr,
                                     indegree, srcs, MPI_UNWEIGHTED,
                                     outdegree, dsts, MPI_UNWEIGHTED);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
     for (k = 0; k < outdegree; ++k) {
-        mpi_errno = MPID_Sched_send(sendbuf, sendcount, sendtype, dsts[k], comm_ptr, s);
+        mpi_errno = MPIR_Sched_send(sendbuf, sendcount, sendtype, dsts[k], comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
     for (l = 0; l < indegree; ++l) {
         char *rb = ((char *)recvbuf) + l * recvcount * recvtype_extent;
-        mpi_errno = MPID_Sched_recv(rb, recvcount, recvtype, srcs[l], comm_ptr, s);
+        mpi_errno = MPIR_Sched_recv(rb, recvcount, recvtype, srcs[l], comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -86,22 +86,22 @@ int MPIR_Ineighbor_allgather_impl(const void *sendbuf, int sendcount, MPI_Dataty
     int mpi_errno = MPI_SUCCESS;
     int tag = -1;
     MPIR_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ineighbor_allgather != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Ineighbor_allgather != NULL);
     mpi_errno = comm_ptr->coll_fns->Ineighbor_allgather(sendbuf, sendcount, sendtype,
                                                         recvbuf, recvcount, recvtype,
                                                         comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -143,10 +143,10 @@ int MPI_Ineighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sen
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INEIGHBOR_ALLGATHER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INEIGHBOR_ALLGATHER);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INEIGHBOR_ALLGATHER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INEIGHBOR_ALLGATHER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -206,7 +206,7 @@ int MPI_Ineighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sen
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INEIGHBOR_ALLGATHER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INEIGHBOR_ALLGATHER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/topo/inhb_allgatherv.c b/src/mpi/topo/inhb_allgatherv.c
index 13e5964..865f1c1 100644
--- a/src/mpi/topo/inhb_allgatherv.c
+++ b/src/mpi/topo/inhb_allgatherv.c
@@ -33,7 +33,7 @@ int MPI_Ineighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype se
 #define FUNCNAME MPIR_Ineighbor_allgatherv_impl
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ineighbor_allgatherv_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ineighbor_allgatherv_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int indegree, outdegree, weighted;
@@ -41,41 +41,41 @@ int MPIR_Ineighbor_allgatherv_default(const void *sendbuf, int sendcount, MPI_Da
     int *srcs, *dsts;
     int comm_size;
     MPI_Aint recvtype_extent;
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
 
     comm_size = comm_ptr->local_size;
 
     MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);
 
     for (i = 0; i < comm_size; ++i) {
-        MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
                                          (displs[i] * recvtype_extent));
     }
 
     mpi_errno = MPIR_Topo_canon_nhb_count(comm_ptr, &indegree, &outdegree, &weighted);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_CHKLMEM_MALLOC(srcs, int *, indegree*sizeof(int), mpi_errno, "srcs");
-    MPIU_CHKLMEM_MALLOC(dsts, int *, outdegree*sizeof(int), mpi_errno, "dsts");
+    MPIR_CHKLMEM_MALLOC(srcs, int *, indegree*sizeof(int), mpi_errno, "srcs");
+    MPIR_CHKLMEM_MALLOC(dsts, int *, outdegree*sizeof(int), mpi_errno, "dsts");
     mpi_errno = MPIR_Topo_canon_nhb(comm_ptr,
                                     indegree, srcs, MPI_UNWEIGHTED,
                                     outdegree, dsts, MPI_UNWEIGHTED);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
     for (k = 0; k < outdegree; ++k) {
-        mpi_errno = MPID_Sched_send(sendbuf, sendcount, sendtype, dsts[k], comm_ptr, s);
+        mpi_errno = MPIR_Sched_send(sendbuf, sendcount, sendtype, dsts[k], comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
     for (l = 0; l < indegree; ++l) {
         char *rb = ((char *)recvbuf) + displs[l] * recvtype_extent;
-        mpi_errno = MPID_Sched_recv(rb, recvcounts[l], recvtype, srcs[l], comm_ptr, s);
+        mpi_errno = MPIR_Sched_recv(rb, recvcounts[l], recvtype, srcs[l], comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -90,22 +90,22 @@ int MPIR_Ineighbor_allgatherv_impl(const void *sendbuf, int sendcount, MPI_Datat
     int mpi_errno = MPI_SUCCESS;
     int tag = -1;
     MPIR_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ineighbor_allgatherv != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Ineighbor_allgatherv != NULL);
     mpi_errno = comm_ptr->coll_fns->Ineighbor_allgatherv(sendbuf, sendcount, sendtype,
                                                          recvbuf, recvcounts, displs, recvtype,
                                                          comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -148,10 +148,10 @@ int MPI_Ineighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype se
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INEIGHBOR_ALLGATHERV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INEIGHBOR_ALLGATHERV);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INEIGHBOR_ALLGATHERV);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INEIGHBOR_ALLGATHERV);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -212,7 +212,7 @@ int MPI_Ineighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype se
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INEIGHBOR_ALLGATHERV);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INEIGHBOR_ALLGATHERV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/topo/inhb_alltoall.c b/src/mpi/topo/inhb_alltoall.c
index fe7d71b..f06a392 100644
--- a/src/mpi/topo/inhb_alltoall.c
+++ b/src/mpi/topo/inhb_alltoall.c
@@ -33,29 +33,29 @@ int MPI_Ineighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype send
 #define FUNCNAME MPIR_Ineighbor_alltoall_default
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ineighbor_alltoall_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ineighbor_alltoall_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int indegree, outdegree, weighted;
     int k,l;
     int *srcs, *dsts;
     MPI_Aint sendtype_extent, recvtype_extent;
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
 
     MPID_Datatype_get_extent_macro(sendtype, sendtype_extent);
     MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);
 
     /* This is the largest offset we add to sendbuf */
-    MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
+    MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
                                      (comm_ptr->local_size * sendcount * sendtype_extent));
     /* This is the largest offset we add to recvbuf */
-    MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+    MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
                                      (comm_ptr->local_size * recvcount * recvtype_extent));
 
     mpi_errno = MPIR_Topo_canon_nhb_count(comm_ptr, &indegree, &outdegree, &weighted);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_CHKLMEM_MALLOC(srcs, int *, indegree*sizeof(int), mpi_errno, "srcs");
-    MPIU_CHKLMEM_MALLOC(dsts, int *, outdegree*sizeof(int), mpi_errno, "dsts");
+    MPIR_CHKLMEM_MALLOC(srcs, int *, indegree*sizeof(int), mpi_errno, "srcs");
+    MPIR_CHKLMEM_MALLOC(dsts, int *, outdegree*sizeof(int), mpi_errno, "dsts");
     mpi_errno = MPIR_Topo_canon_nhb(comm_ptr,
                                     indegree, srcs, MPI_UNWEIGHTED,
                                     outdegree, dsts, MPI_UNWEIGHTED);
@@ -63,20 +63,20 @@ int MPIR_Ineighbor_alltoall_default(const void *sendbuf, int sendcount, MPI_Data
 
     for (k = 0; k < outdegree; ++k) {
         char *sb = ((char *)sendbuf) + k * sendcount * sendtype_extent;
-        mpi_errno = MPID_Sched_send(sb, sendcount, sendtype, dsts[k], comm_ptr, s);
+        mpi_errno = MPIR_Sched_send(sb, sendcount, sendtype, dsts[k], comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
     for (l = 0; l < indegree; ++l) {
         char *rb = ((char *)recvbuf) + l * recvcount * recvtype_extent;
-        mpi_errno = MPID_Sched_recv(rb, recvcount, recvtype, srcs[l], comm_ptr, s);
+        mpi_errno = MPIR_Sched_recv(rb, recvcount, recvtype, srcs[l], comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -91,22 +91,22 @@ int MPIR_Ineighbor_alltoall_impl(const void *sendbuf, int sendcount, MPI_Datatyp
     int mpi_errno = MPI_SUCCESS;
     int tag = -1;
     MPIR_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ineighbor_alltoall != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Ineighbor_alltoall != NULL);
     mpi_errno = comm_ptr->coll_fns->Ineighbor_alltoall(sendbuf, sendcount, sendtype,
                                                        recvbuf, recvcount, recvtype,
                                                        comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -149,10 +149,10 @@ int MPI_Ineighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype send
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INEIGHBOR_ALLTOALL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INEIGHBOR_ALLTOALL);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INEIGHBOR_ALLTOALL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INEIGHBOR_ALLTOALL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -212,7 +212,7 @@ int MPI_Ineighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype send
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INEIGHBOR_ALLTOALL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INEIGHBOR_ALLTOALL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/topo/inhb_alltoallv.c b/src/mpi/topo/inhb_alltoallv.c
index 505f2ab..255499b 100644
--- a/src/mpi/topo/inhb_alltoallv.c
+++ b/src/mpi/topo/inhb_alltoallv.c
@@ -34,7 +34,7 @@ int MPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const i
 #define FUNCNAME MPIR_Ineighbor_alltoallv_default
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ineighbor_alltoallv_default(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ineighbor_alltoallv_default(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int indegree, outdegree, weighted;
@@ -42,7 +42,7 @@ int MPIR_Ineighbor_alltoallv_default(const void *sendbuf, const int sendcounts[]
     int *srcs, *dsts;
     int comm_size;
     MPI_Aint sendtype_extent, recvtype_extent;
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
 
     comm_size = comm_ptr->local_size;
 
@@ -50,16 +50,16 @@ int MPIR_Ineighbor_alltoallv_default(const void *sendbuf, const int sendcounts[]
     MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);
 
     for (i = 0; i < comm_size; ++i) {
-        MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
+        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
                                          (sdispls[i] * sendtype_extent));
-        MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
                                          (rdispls[i] * recvtype_extent));
     }
 
     mpi_errno = MPIR_Topo_canon_nhb_count(comm_ptr, &indegree, &outdegree, &weighted);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_CHKLMEM_MALLOC(srcs, int *, indegree*sizeof(int), mpi_errno, "srcs");
-    MPIU_CHKLMEM_MALLOC(dsts, int *, outdegree*sizeof(int), mpi_errno, "dsts");
+    MPIR_CHKLMEM_MALLOC(srcs, int *, indegree*sizeof(int), mpi_errno, "srcs");
+    MPIR_CHKLMEM_MALLOC(dsts, int *, outdegree*sizeof(int), mpi_errno, "dsts");
     mpi_errno = MPIR_Topo_canon_nhb(comm_ptr,
                                     indegree, srcs, MPI_UNWEIGHTED,
                                     outdegree, dsts, MPI_UNWEIGHTED);
@@ -67,20 +67,20 @@ int MPIR_Ineighbor_alltoallv_default(const void *sendbuf, const int sendcounts[]
 
     for (k = 0; k < outdegree; ++k) {
         char *sb = ((char *)sendbuf) + sdispls[k] * sendtype_extent;
-        mpi_errno = MPID_Sched_send(sb, sendcounts[k], sendtype, dsts[k], comm_ptr, s);
+        mpi_errno = MPIR_Sched_send(sb, sendcounts[k], sendtype, dsts[k], comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
     for (l = 0; l < indegree; ++l) {
         char *rb = ((char *)recvbuf) + rdispls[l] * recvtype_extent;
-        mpi_errno = MPID_Sched_recv(rb, recvcounts[l], recvtype, srcs[l], comm_ptr, s);
+        mpi_errno = MPIR_Sched_recv(rb, recvcounts[l], recvtype, srcs[l], comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -95,22 +95,22 @@ int MPIR_Ineighbor_alltoallv_impl(const void *sendbuf, const int sendcounts[], c
     int mpi_errno = MPI_SUCCESS;
     int tag = -1;
     MPIR_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ineighbor_alltoallv != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Ineighbor_alltoallv != NULL);
     mpi_errno = comm_ptr->coll_fns->Ineighbor_alltoallv(sendbuf, sendcounts, sdispls, sendtype,
                                                         recvbuf, recvcounts, rdispls, recvtype,
                                                         comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -154,10 +154,10 @@ int MPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const i
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INEIGHBOR_ALLTOALLV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INEIGHBOR_ALLTOALLV);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INEIGHBOR_ALLTOALLV);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INEIGHBOR_ALLTOALLV);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -216,7 +216,7 @@ int MPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const i
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INEIGHBOR_ALLTOALLV);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INEIGHBOR_ALLTOALLV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/topo/inhb_alltoallw.c b/src/mpi/topo/inhb_alltoallw.c
index 5692b19..e9b2d1d 100644
--- a/src/mpi/topo/inhb_alltoallw.c
+++ b/src/mpi/topo/inhb_alltoallw.c
@@ -33,18 +33,18 @@ int MPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[],
 #define FUNCNAME MPIR_Ineighbor_alltoallw_default
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIR_Ineighbor_alltoallw_default(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm *comm_ptr, MPID_Sched_t s)
+int MPIR_Ineighbor_alltoallw_default(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm *comm_ptr, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     int indegree, outdegree, weighted;
     int k,l;
     int *srcs, *dsts;
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
 
     mpi_errno = MPIR_Topo_canon_nhb_count(comm_ptr, &indegree, &outdegree, &weighted);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_CHKLMEM_MALLOC(srcs, int *, indegree*sizeof(int), mpi_errno, "srcs");
-    MPIU_CHKLMEM_MALLOC(dsts, int *, outdegree*sizeof(int), mpi_errno, "dsts");
+    MPIR_CHKLMEM_MALLOC(srcs, int *, indegree*sizeof(int), mpi_errno, "srcs");
+    MPIR_CHKLMEM_MALLOC(dsts, int *, outdegree*sizeof(int), mpi_errno, "dsts");
     mpi_errno = MPIR_Topo_canon_nhb(comm_ptr,
                                     indegree, srcs, MPI_UNWEIGHTED,
                                     outdegree, dsts, MPI_UNWEIGHTED);
@@ -52,26 +52,26 @@ int MPIR_Ineighbor_alltoallw_default(const void *sendbuf, const int sendcounts[]
 
     for (k = 0; k < outdegree; ++k) {
         char *sb;
-        MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT sendbuf + sdispls[k]);
+        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf + sdispls[k]);
 
         sb = ((char *)sendbuf) + sdispls[k];
-        mpi_errno = MPID_Sched_send(sb, sendcounts[k], sendtypes[k], dsts[k], comm_ptr, s);
+        mpi_errno = MPIR_Sched_send(sb, sendcounts[k], sendtypes[k], dsts[k], comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
     for (l = 0; l < indegree; ++l) {
         char *rb;
-        MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf + rdispls[l]);
+        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf + rdispls[l]);
 
         rb = ((char *)recvbuf) + rdispls[l];
-        mpi_errno = MPID_Sched_recv(rb, recvcounts[l], recvtypes[l], srcs[l], comm_ptr, s);
+        mpi_errno = MPIR_Sched_recv(rb, recvcounts[l], recvtypes[l], srcs[l], comm_ptr, s);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
-    MPID_SCHED_BARRIER(s);
+    MPIR_SCHED_BARRIER(s);
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -86,22 +86,22 @@ int MPIR_Ineighbor_alltoallw_impl(const void *sendbuf, const int sendcounts[], c
     int mpi_errno = MPI_SUCCESS;
     int tag = -1;
     MPIR_Request *reqp = NULL;
-    MPID_Sched_t s = MPID_SCHED_NULL;
+    MPIR_Sched_t s = MPIR_SCHED_NULL;
 
     *request = MPI_REQUEST_NULL;
 
-    mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag);
+    mpi_errno = MPIR_Sched_next_tag(comm_ptr, &tag);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    mpi_errno = MPID_Sched_create(&s);
+    mpi_errno = MPIR_Sched_create(&s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Ineighbor_alltoallw != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Ineighbor_alltoallw != NULL);
     mpi_errno = comm_ptr->coll_fns->Ineighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes,
                                                         recvbuf, recvcounts, rdispls, recvtypes,
                                                         comm_ptr, s);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp);
+    mpi_errno = MPIR_Sched_start(&s, comm_ptr, tag, &reqp);
     if (reqp)
         *request = reqp->handle;
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -145,10 +145,10 @@ int MPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const M
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INEIGHBOR_ALLTOALLW);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_INEIGHBOR_ALLTOALLW);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INEIGHBOR_ALLTOALLW);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_INEIGHBOR_ALLTOALLW);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -189,7 +189,7 @@ int MPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const M
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INEIGHBOR_ALLTOALLW);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_INEIGHBOR_ALLTOALLW);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/topo/nhb_allgather.c b/src/mpi/topo/nhb_allgather.c
index f27dd37..d5a1d39 100644
--- a/src/mpi/topo/nhb_allgather.c
+++ b/src/mpi/topo/nhb_allgather.c
@@ -57,8 +57,8 @@ int MPIR_Neighbor_allgather_impl(const void *sendbuf, int sendcount, MPI_Datatyp
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Neighbor_allgather != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Neighbor_allgather != NULL);
     mpi_errno = comm_ptr->coll_fns->Neighbor_allgather(sendbuf, sendcount, sendtype,
                                                        recvbuf, recvcount, recvtype,
                                                        comm_ptr);
@@ -104,10 +104,10 @@ int MPI_Neighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype send
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_NEIGHBOR_ALLGATHER);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_NEIGHBOR_ALLGATHER);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_NEIGHBOR_ALLGATHER);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_NEIGHBOR_ALLGATHER);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -162,7 +162,7 @@ int MPI_Neighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype send
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_NEIGHBOR_ALLGATHER);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_NEIGHBOR_ALLGATHER);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/topo/nhb_allgatherv.c b/src/mpi/topo/nhb_allgatherv.c
index cbba644..fbe762d 100644
--- a/src/mpi/topo/nhb_allgatherv.c
+++ b/src/mpi/topo/nhb_allgatherv.c
@@ -61,8 +61,8 @@ int MPIR_Neighbor_allgatherv_impl(const void *sendbuf, int sendcount, MPI_Dataty
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Neighbor_allgatherv != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Neighbor_allgatherv != NULL);
     mpi_errno = comm_ptr->coll_fns->Neighbor_allgatherv(sendbuf, sendcount, sendtype,
                                                         recvbuf, recvcounts, displs, recvtype,
                                                         comm_ptr);
@@ -105,10 +105,10 @@ int MPI_Neighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sen
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_NEIGHBOR_ALLGATHERV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_NEIGHBOR_ALLGATHERV);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_NEIGHBOR_ALLGATHERV);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_NEIGHBOR_ALLGATHERV);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -163,7 +163,7 @@ int MPI_Neighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sen
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_NEIGHBOR_ALLGATHERV);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_NEIGHBOR_ALLGATHERV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/topo/nhb_alltoall.c b/src/mpi/topo/nhb_alltoall.c
index 64d90c7..ffb6dba 100644
--- a/src/mpi/topo/nhb_alltoall.c
+++ b/src/mpi/topo/nhb_alltoall.c
@@ -59,8 +59,8 @@ int MPIR_Neighbor_alltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Neighbor_alltoall != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Neighbor_alltoall != NULL);
     mpi_errno = comm_ptr->coll_fns->Neighbor_alltoall(sendbuf, sendcount, sendtype,
                                                       recvbuf, recvcount, recvtype,
                                                       comm_ptr);
@@ -108,10 +108,10 @@ int MPI_Neighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendt
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_NEIGHBOR_ALLTOALL);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_NEIGHBOR_ALLTOALL);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_NEIGHBOR_ALLTOALL);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_NEIGHBOR_ALLTOALL);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -166,7 +166,7 @@ int MPI_Neighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendt
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_NEIGHBOR_ALLTOALL);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_NEIGHBOR_ALLTOALL);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/topo/nhb_alltoallv.c b/src/mpi/topo/nhb_alltoallv.c
index 9c17650..d0a1b9f 100644
--- a/src/mpi/topo/nhb_alltoallv.c
+++ b/src/mpi/topo/nhb_alltoallv.c
@@ -58,8 +58,8 @@ int MPIR_Neighbor_alltoallv_impl(const void *sendbuf, const int sendcounts[], co
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Neighbor_alltoallv != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Neighbor_alltoallv != NULL);
     mpi_errno = comm_ptr->coll_fns->Neighbor_alltoallv(sendbuf, sendcounts, sdispls, sendtype,
                                                        recvbuf, recvcounts, rdispls, recvtype,
                                                        comm_ptr);
@@ -103,10 +103,10 @@ int MPI_Neighbor_alltoallv(const void *sendbuf, const int sendcounts[], const in
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_NEIGHBOR_ALLTOALLV);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_NEIGHBOR_ALLTOALLV);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_NEIGHBOR_ALLTOALLV);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_NEIGHBOR_ALLTOALLV);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -161,7 +161,7 @@ int MPI_Neighbor_alltoallv(const void *sendbuf, const int sendcounts[], const in
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_NEIGHBOR_ALLTOALLV);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_NEIGHBOR_ALLTOALLV);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/topo/nhb_alltoallw.c b/src/mpi/topo/nhb_alltoallw.c
index d8a5614..0e1f2d3 100644
--- a/src/mpi/topo/nhb_alltoallw.c
+++ b/src/mpi/topo/nhb_alltoallw.c
@@ -57,8 +57,8 @@ int MPIR_Neighbor_alltoallw_impl(const void *sendbuf, const int sendcounts[], co
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(comm_ptr->coll_fns != NULL);
-    MPIU_Assert(comm_ptr->coll_fns->Neighbor_alltoallw != NULL);
+    MPIR_Assert(comm_ptr->coll_fns != NULL);
+    MPIR_Assert(comm_ptr->coll_fns->Neighbor_alltoallw != NULL);
     mpi_errno = comm_ptr->coll_fns->Neighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes,
                                                        recvbuf, recvcounts, rdispls, recvtypes,
                                                        comm_ptr);
@@ -103,10 +103,10 @@ int MPI_Neighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MP
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_NEIGHBOR_ALLTOALLW);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_NEIGHBOR_ALLTOALLW);
 
     MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_NEIGHBOR_ALLTOALLW);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_NEIGHBOR_ALLTOALLW);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -146,7 +146,7 @@ int MPI_Neighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MP
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_NEIGHBOR_ALLTOALLW);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_NEIGHBOR_ALLTOALLW);
     MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     return mpi_errno;
 
diff --git a/src/mpi/topo/topo_test.c b/src/mpi/topo/topo_test.c
index e36fbf2..0775ba3 100644
--- a/src/mpi/topo/topo_test.c
+++ b/src/mpi/topo/topo_test.c
@@ -61,11 +61,11 @@ int MPI_Topo_test(MPI_Comm comm, int *status)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
     MPIR_Topology *topo_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TOPO_TEST);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TOPO_TEST);
 
     MPIR_ERRTEST_INITIALIZED_ORDIE();
     
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TOPO_TEST);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TOPO_TEST);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -113,7 +113,7 @@ int MPI_Topo_test(MPI_Comm comm, int *status)
 #ifdef HAVE_ERROR_CHECKING
   fn_exit:
 #endif
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TOPO_TEST);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TOPO_TEST);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpi/topo/topoutil.c b/src/mpi/topo/topoutil.c
index 5b1225b..c5fa116 100644
--- a/src/mpi/topo/topoutil.c
+++ b/src/mpi/topo/topoutil.c
@@ -35,7 +35,7 @@ MPIR_Topology *MPIR_Topology_get( MPIR_Comm *comm_ptr )
 	return 0;
     }
 
-    mpi_errno = MPIR_CommGetAttr(comm_ptr->handle, MPIR_Topology_keyval,
+    mpi_errno = MPII_Comm_get_attr(comm_ptr->handle, MPIR_Topology_keyval,
                                  &topo_ptr, &flag, MPIR_ATTR_PTR );
     if (mpi_errno) return NULL;
     
@@ -52,7 +52,7 @@ int MPIR_Topology_put( MPIR_Comm *comm_ptr, MPIR_Topology *topo_ptr )
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(comm_ptr != NULL);
+    MPIR_Assert(comm_ptr != NULL);
 
     if (MPIR_Topology_keyval == MPI_KEYVAL_INVALID) {
 	/* Create a new keyval */
@@ -97,7 +97,7 @@ static int *MPIR_Copy_array( int n, const int a[], int *err )
 
     /* the copy of NULL is NULL */
     if (a == NULL) {
-        MPIU_Assert(n == 0);
+        MPIR_Assert(n == 0);
         return NULL;
     }
 
@@ -109,7 +109,7 @@ static int *MPIR_Copy_array( int n, const int a[], int *err )
 	return 0;
     }
     /* --END ERROR HANDLING-- */
-    MPIU_Memcpy(new_p, a, n * sizeof(int));
+    MPIR_Memcpy(new_p, a, n * sizeof(int));
     return new_p;
 }
 
@@ -132,7 +132,7 @@ static int MPIR_Topology_copy_fn ( MPI_Comm comm ATTRIBUTE((unused)),
 {
     MPIR_Topology *old_topology = (MPIR_Topology *)attr_in;
     MPIR_Topology *copy_topology = NULL;
-    MPIU_CHKPMEM_DECL(5);
+    MPIR_CHKPMEM_DECL(5);
     int mpi_errno = 0;
 
     MPL_UNREFERENCED_ARG(comm);
@@ -142,7 +142,7 @@ static int MPIR_Topology_copy_fn ( MPI_Comm comm ATTRIBUTE((unused)),
     *flag = 0;
     *(void **)attr_out = NULL;
 
-    MPIU_CHKPMEM_MALLOC(copy_topology, MPIR_Topology *, sizeof(MPIR_Topology), mpi_errno, "copy_topology");
+    MPIR_CHKPMEM_MALLOC(copy_topology, MPIR_Topology *, sizeof(MPIR_Topology), mpi_errno, "copy_topology");
 
     MPL_VG_MEM_INIT(copy_topology, sizeof(MPIR_Topology));
 
@@ -154,7 +154,7 @@ static int MPIR_Topology_copy_fn ( MPI_Comm comm ATTRIBUTE((unused)),
                                 old_topology->topo.kind_.array_field_, \
                                 &mpi_errno); \
             if (mpi_errno) MPIR_ERR_POP(mpi_errno); \
-            MPIU_CHKPMEM_REGISTER(copy_topology->topo.kind_.array_field_); \
+            MPIR_CHKPMEM_REGISTER(copy_topology->topo.kind_.array_field_); \
         } while (0)
 
     copy_topology->kind = old_topology->kind;
@@ -189,13 +189,13 @@ static int MPIR_Topology_copy_fn ( MPI_Comm comm ATTRIBUTE((unused)),
 
     *(void **)attr_out = (void *)copy_topology;
     *flag = 1;
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
 fn_exit:
     /* Return mpi_errno in case one of the copy array functions failed */
     return mpi_errno;
 fn_fail:
     /* --BEGIN ERROR HANDLING-- */
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
@@ -298,7 +298,7 @@ int MPIR_Topo_canon_nhb_count(MPIR_Comm *comm_ptr, int *indegree, int *outdegree
         *weighted  = FALSE;
     }
     else {
-        MPIU_Assert(FALSE);
+        MPIR_Assert(FALSE);
     }
 
 fn_exit:
@@ -317,9 +317,9 @@ int MPIR_Topo_canon_nhb(MPIR_Comm *comm_ptr,
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Topology *topo_ptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_TOPO_CANON_NHB);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_TOPO_CANON_NHB);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_TOPO_CANON_NHB);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_TOPO_CANON_NHB);
 
     topo_ptr = MPIR_Topology_get(comm_ptr);
 
@@ -331,17 +331,17 @@ int MPIR_Topo_canon_nhb(MPIR_Comm *comm_ptr,
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
     else if (topo_ptr->kind == MPI_GRAPH) {
-        MPIU_Assert(indegree == outdegree);
+        MPIR_Assert(indegree == outdegree);
         mpi_errno = MPIR_Graph_neighbors_impl(comm_ptr, comm_ptr->rank, indegree, sources);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPIU_Memcpy(dests, sources, outdegree*sizeof(*dests));
+        MPIR_Memcpy(dests, sources, outdegree*sizeof(*dests));
         /* ignore inweights/outweights */
     }
     else if (topo_ptr->kind == MPI_CART) {
         int d;
 
-        MPIU_Assert(indegree == outdegree);
-        MPIU_Assert(indegree == 2*topo_ptr->topo.cart.ndims);
+        MPIR_Assert(indegree == outdegree);
+        MPIR_Assert(indegree == 2*topo_ptr->topo.cart.ndims);
 
         for (d = 0; d < topo_ptr->topo.cart.ndims; ++d) {
             mpi_errno = MPIR_Cart_shift_impl(comm_ptr, d, 1, &sources[2*d], &sources[2*d+1]);
@@ -353,7 +353,7 @@ int MPIR_Topo_canon_nhb(MPIR_Comm *comm_ptr,
         /* ignore inweights/outweights */
     }
     else {
-        MPIU_Assert(FALSE);
+        MPIR_Assert(FALSE);
     }
 
 #ifdef MPL_USE_DBG_LOGGING
@@ -370,7 +370,7 @@ int MPIR_Topo_canon_nhb(MPIR_Comm *comm_ptr,
 #endif
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_TOPO_CANON_NHB);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_TOPO_CANON_NHB);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
diff --git a/src/mpi_t/cat_changed.c b/src/mpi_t/cat_changed.c
index a77ac52..da60f02 100644
--- a/src/mpi_t/cat_changed.c
+++ b/src/mpi_t/cat_changed.c
@@ -50,10 +50,10 @@ int MPI_T_category_changed(int *stamp)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_CATEGORY_CHANGED);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_CATEGORY_CHANGED);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_CATEGORY_CHANGED);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_CATEGORY_CHANGED);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -73,7 +73,7 @@ int MPI_T_category_changed(int *stamp)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_CATEGORY_CHANGED);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_CATEGORY_CHANGED);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/cat_get_categories.c b/src/mpi_t/cat_get_categories.c
index 3f992b7..a17dc89 100644
--- a/src/mpi_t/cat_get_categories.c
+++ b/src/mpi_t/cat_get_categories.c
@@ -77,10 +77,10 @@ int MPI_T_category_get_categories(int cat_index, int len, int indices[])
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_CATEGORY_GET_CATEGORIES);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_CATEGORY_GET_CATEGORIES);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_CATEGORY_GET_CATEGORIES);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_CATEGORY_GET_CATEGORIES);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -105,7 +105,7 @@ int MPI_T_category_get_categories(int cat_index, int len, int indices[])
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_CATEGORY_GET_CATEGORIES);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_CATEGORY_GET_CATEGORIES);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/cat_get_cvars.c b/src/mpi_t/cat_get_cvars.c
index a49fccb..3f9ed53 100644
--- a/src/mpi_t/cat_get_cvars.c
+++ b/src/mpi_t/cat_get_cvars.c
@@ -77,10 +77,10 @@ int MPI_T_category_get_cvars(int cat_index, int len, int indices[])
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_CATEGORY_GET_CVARS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_CATEGORY_GET_CVARS);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_CATEGORY_GET_CVARS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_CATEGORY_GET_CVARS);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -105,7 +105,7 @@ int MPI_T_category_get_cvars(int cat_index, int len, int indices[])
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_CATEGORY_GET_CVARS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_CATEGORY_GET_CVARS);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/cat_get_index.c b/src/mpi_t/cat_get_index.c
index e66349f..e50ced8 100644
--- a/src/mpi_t/cat_get_index.c
+++ b/src/mpi_t/cat_get_index.c
@@ -49,10 +49,10 @@ int MPI_T_category_get_index(const char *name, int *cat_index)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_CATEGORY_GET_INDEX);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_CATEGORY_GET_INDEX);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_CATEGORY_GET_INDEX);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_CATEGORY_GET_INDEX);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -82,7 +82,7 @@ int MPI_T_category_get_index(const char *name, int *cat_index)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_CATEGORY_GET_INDEX);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_CATEGORY_GET_INDEX);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/cat_get_info.c b/src/mpi_t/cat_get_info.c
index fa76a54..85f9d80 100644
--- a/src/mpi_t/cat_get_info.c
+++ b/src/mpi_t/cat_get_info.c
@@ -60,10 +60,10 @@ int MPI_T_category_get_info(int cat_index, char *name, int *name_len, char *desc
     int mpi_errno = MPI_SUCCESS;
     cat_table_entry_t *cat;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_CATEGORY_GET_INFO);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_CATEGORY_GET_INFO);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_CATEGORY_GET_INFO);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_CATEGORY_GET_INFO);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -96,7 +96,7 @@ int MPI_T_category_get_info(int cat_index, char *name, int *name_len, char *desc
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_CATEGORY_GET_INFO);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_CATEGORY_GET_INFO);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/cat_get_num.c b/src/mpi_t/cat_get_num.c
index b3a83df..db01146 100644
--- a/src/mpi_t/cat_get_num.c
+++ b/src/mpi_t/cat_get_num.c
@@ -45,10 +45,10 @@ int MPI_T_category_get_num(int *num_cat)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_CATEGORY_GET_NUM);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_CATEGORY_GET_NUM);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_CATEGORY_GET_NUM);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_CATEGORY_GET_NUM);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -68,7 +68,7 @@ int MPI_T_category_get_num(int *num_cat)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_CATEGORY_GET_NUM);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_CATEGORY_GET_NUM);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/cat_get_pvars.c b/src/mpi_t/cat_get_pvars.c
index 788b3dd..10c3b1d 100644
--- a/src/mpi_t/cat_get_pvars.c
+++ b/src/mpi_t/cat_get_pvars.c
@@ -77,10 +77,10 @@ int MPI_T_category_get_pvars(int cat_index, int len, int indices[])
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_CATEGORY_GET_PVARS);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_CATEGORY_GET_PVARS);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_CATEGORY_GET_PVARS);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_CATEGORY_GET_PVARS);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -105,7 +105,7 @@ int MPI_T_category_get_pvars(int cat_index, int len, int indices[])
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_CATEGORY_GET_PVARS);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_CATEGORY_GET_PVARS);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/cvar_get_index.c b/src/mpi_t/cvar_get_index.c
index f1a5d76..f49cbc4 100644
--- a/src/mpi_t/cvar_get_index.c
+++ b/src/mpi_t/cvar_get_index.c
@@ -49,10 +49,10 @@ int MPI_T_cvar_get_index(const char *name, int *cvar_index)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_CVAR_GET_INDEX);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_CVAR_GET_INDEX);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_CVAR_GET_INDEX);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_CVAR_GET_INDEX);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -82,7 +82,7 @@ int MPI_T_cvar_get_index(const char *name, int *cvar_index)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_CVAR_GET_INDEX);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_CVAR_GET_INDEX);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/cvar_get_info.c b/src/mpi_t/cvar_get_info.c
index 354b6b1..d97f0a0 100644
--- a/src/mpi_t/cvar_get_info.c
+++ b/src/mpi_t/cvar_get_info.c
@@ -64,10 +64,10 @@ int MPI_T_cvar_get_info(int cvar_index, char *name, int *name_len,
     int mpi_errno = MPI_SUCCESS;
     const cvar_table_entry_t *cvar;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_CVAR_GET_INFO);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_CVAR_GET_INFO);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_CVAR_GET_INFO);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_CVAR_GET_INFO);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -107,7 +107,7 @@ int MPI_T_cvar_get_info(int cvar_index, char *name, int *name_len,
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_CVAR_GET_INFO);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_CVAR_GET_INFO);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/cvar_get_num.c b/src/mpi_t/cvar_get_num.c
index eece0d1..13f19be 100644
--- a/src/mpi_t/cvar_get_num.c
+++ b/src/mpi_t/cvar_get_num.c
@@ -45,10 +45,10 @@ int MPI_T_cvar_get_num(int *num_cvar)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_CVAR_GET_NUM);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_CVAR_GET_NUM);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_CVAR_GET_NUM);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_CVAR_GET_NUM);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -69,7 +69,7 @@ int MPI_T_cvar_get_num(int *num_cvar)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_CVAR_GET_NUM);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_CVAR_GET_NUM);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/cvar_handle_alloc.c b/src/mpi_t/cvar_handle_alloc.c
index 3b502a8..8e615c9 100644
--- a/src/mpi_t/cvar_handle_alloc.c
+++ b/src/mpi_t/cvar_handle_alloc.c
@@ -36,12 +36,12 @@ int MPIR_T_cvar_handle_alloc_impl(int cvar_index, void *obj_handle, MPI_T_cvar_h
     int mpi_errno = MPI_SUCCESS;
     MPIR_T_cvar_handle_t *hnd;
 
-    MPIU_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_DECL(1);
 
     cvar_table_entry_t *cvar = (cvar_table_entry_t *) utarray_eltptr(cvar_table, cvar_index);
 
     /* Allocate handle memory */
-    MPIU_CHKPMEM_MALLOC(hnd, MPIR_T_cvar_handle_t*, sizeof(*hnd), mpi_errno, "control variable handle");
+    MPIR_CHKPMEM_MALLOC(hnd, MPIR_T_cvar_handle_t*, sizeof(*hnd), mpi_errno, "control variable handle");
 #ifdef HAVE_ERROR_CHECKING
     hnd->kind = MPIR_T_CVAR_HANDLE;
 #endif
@@ -65,11 +65,11 @@ int MPIR_T_cvar_handle_alloc_impl(int cvar_index, void *obj_handle, MPI_T_cvar_h
 
     *handle = hnd;
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
 fn_exit:
     return mpi_errno;
 fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -103,10 +103,10 @@ int MPI_T_cvar_handle_alloc(int cvar_index, void *obj_handle, MPI_T_cvar_handle
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_CVAR_HANDLE_ALLOC);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_CVAR_HANDLE_ALLOC);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_CVAR_HANDLE_ALLOC);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_CVAR_HANDLE_ALLOC);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -131,7 +131,7 @@ int MPI_T_cvar_handle_alloc(int cvar_index, void *obj_handle, MPI_T_cvar_handle
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_CVAR_HANDLE_ALLOC);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_CVAR_HANDLE_ALLOC);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/cvar_handle_free.c b/src/mpi_t/cvar_handle_free.c
index 9d240ed..8a50f0e 100644
--- a/src/mpi_t/cvar_handle_free.c
+++ b/src/mpi_t/cvar_handle_free.c
@@ -47,10 +47,10 @@ int MPI_T_cvar_handle_free(MPI_T_cvar_handle *handle)
     int mpi_errno = MPI_SUCCESS;
     MPIR_T_cvar_handle_t *hnd;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_CVAR_HANDLE_FREE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_CVAR_HANDLE_FREE);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_CVAR_HANDLE_FREE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_CVAR_HANDLE_FREE);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -72,7 +72,7 @@ int MPI_T_cvar_handle_free(MPI_T_cvar_handle *handle)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_CVAR_HANDLE_FREE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_CVAR_HANDLE_FREE);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/cvar_read.c b/src/mpi_t/cvar_read.c
index 19ae770..d30a6c4 100644
--- a/src/mpi_t/cvar_read.c
+++ b/src/mpi_t/cvar_read.c
@@ -39,7 +39,7 @@ int MPIR_T_cvar_read_impl(MPI_T_cvar_handle handle, void *buf)
 
     count = hnd->count;
     addr = hnd->addr;
-    MPIU_Assert(addr != NULL);
+    MPIR_Assert(addr != NULL);
 
     switch (hnd->datatype) {
     case MPI_INT:
@@ -103,10 +103,10 @@ int MPI_T_cvar_read(MPI_T_cvar_handle handle, void *buf)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_CVAR_READ);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_CVAR_READ);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_CVAR_READ);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_CVAR_READ);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -128,7 +128,7 @@ int MPI_T_cvar_read(MPI_T_cvar_handle handle, void *buf)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_CVAR_READ);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_CVAR_READ);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/cvar_write.c b/src/mpi_t/cvar_write.c
index 860fa85..291672a 100644
--- a/src/mpi_t/cvar_write.c
+++ b/src/mpi_t/cvar_write.c
@@ -47,7 +47,7 @@ int MPIR_T_cvar_write_impl(MPI_T_cvar_handle handle, const void *buf)
 
     count = hnd->count;
     addr = hnd->addr;
-    MPIU_Assert(addr != NULL);
+    MPIR_Assert(addr != NULL);
 
     switch (hnd->datatype) {
     case MPI_INT:
@@ -71,7 +71,7 @@ int MPIR_T_cvar_write_impl(MPI_T_cvar_handle handle, const void *buf)
             ((double *)addr)[i] = ((double *)buf)[i];
         break;
     case MPI_CHAR:
-        MPIU_Assert(count > strlen(buf)); /* Make sure buf will not overflow this cvar */
+        MPIR_Assert(count > strlen(buf)); /* Make sure buf will not overflow this cvar */
         MPL_strncpy(addr, buf, count);
         break;
     default:
@@ -112,10 +112,10 @@ int MPI_T_cvar_write(MPI_T_cvar_handle handle, const void *buf)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_CVAR_WRITE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_CVAR_WRITE);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_CVAR_WRITE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_CVAR_WRITE);
 
    /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -137,7 +137,7 @@ int MPI_T_cvar_write(MPI_T_cvar_handle handle, const void *buf)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_CVAR_WRITE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_CVAR_WRITE);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/enum_get_info.c b/src/mpi_t/enum_get_info.c
index ff4a8b7..068a107 100644
--- a/src/mpi_t/enum_get_info.c
+++ b/src/mpi_t/enum_get_info.c
@@ -53,10 +53,10 @@ int MPI_T_enum_get_info(MPI_T_enum enumtype, int *num, char *name, int *name_len
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_ENUM_GET_INFO);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_ENUM_GET_INFO);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_ENUM_GET_INFO);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_ENUM_GET_INFO);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -81,7 +81,7 @@ int MPI_T_enum_get_info(MPI_T_enum enumtype, int *num, char *name, int *name_len
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_ENUM_GET_INFO);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_ENUM_GET_INFO);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/enum_get_item.c b/src/mpi_t/enum_get_item.c
index 49574cc..7d9a31b 100644
--- a/src/mpi_t/enum_get_item.c
+++ b/src/mpi_t/enum_get_item.c
@@ -56,10 +56,10 @@ int MPI_T_enum_get_item(MPI_T_enum enumtype, int index, int *value, char *name,
     int mpi_errno = MPI_SUCCESS;
     enum_item_t *item;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_ENUM_GET_ITEM);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_ENUM_GET_ITEM);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_ENUM_GET_ITEM);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_ENUM_GET_ITEM);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -86,7 +86,7 @@ int MPI_T_enum_get_item(MPI_T_enum enumtype, int index, int *value, char *name,
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_ENUM_GET_ITEM);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_ENUM_GET_ITEM);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/mpit.c b/src/mpi_t/mpit.c
index 0ade54a..5107124 100644
--- a/src/mpi_t/mpit.c
+++ b/src/mpi_t/mpit.c
@@ -34,13 +34,13 @@ void MPIR_T_enum_create(const char *enum_name, MPI_T_enum *handle)
     MPIR_T_enum_t *e;
     static const UT_icd enum_item_icd = {sizeof(enum_item_t), NULL, NULL, NULL};
 
-    MPIU_Assert(enum_name);
-    MPIU_Assert(handle);
+    MPIR_Assert(enum_name);
+    MPIR_Assert(handle);
 
     utarray_extend_back(enum_table);
     e = (MPIR_T_enum_t *)utarray_back(enum_table);
     e->name = MPL_strdup(enum_name);
-    MPIU_Assert(e->name);
+    MPIR_Assert(e->name);
 #ifdef HAVE_ERROR_CHECKING
     e->kind = MPIR_T_ENUM_HANDLE;
 #endif
@@ -57,13 +57,13 @@ void MPIR_T_enum_add_item(MPI_T_enum handle, const char *item_name, int item_val
 {
     enum_item_t *item;
 
-    MPIU_Assert(handle);
-    MPIU_Assert(item_name);
+    MPIR_Assert(handle);
+    MPIR_Assert(item_name);
 
     utarray_extend_back(handle->items);
     item = (enum_item_t *)utarray_back(handle->items);
     item->name = MPL_strdup(item_name);
-    MPIU_Assert(item->name);
+    MPIR_Assert(item->name);
     item->value = item_value;
 }
 
@@ -90,7 +90,7 @@ static cat_table_entry_t *MPIR_T_cat_create(const char *cat_name)
     /* Record <cat_name, cat_idx> in cat_hash */
     cat_idx = utarray_len(cat_table) - 1;
     hash_entry = MPL_malloc(sizeof(name2index_hash_t));
-    MPIU_Assert(hash_entry);
+    MPIR_Assert(hash_entry);
     /* Need not to Strdup cat_name, since cat_table and cat_hash co-exist */
     hash_entry->name = cat_name;
     hash_entry->idx = cat_idx;
@@ -241,8 +241,8 @@ int MPIR_T_cat_add_desc(const char *cat_name, const char *cat_desc)
     cat_table_entry_t *cat;
 
     /* NULL args are not allowed */
-    MPIU_Assert(cat_name);
-    MPIU_Assert(cat_desc);
+    MPIR_Assert(cat_name);
+    MPIR_Assert(cat_desc);
 
     HASH_FIND_STR(cat_hash, cat_name, hash_entry);
 
@@ -250,14 +250,14 @@ int MPIR_T_cat_add_desc(const char *cat_name, const char *cat_desc)
         /* Found it, i.e., category already exists */
         cat_idx = hash_entry->idx;
         cat = (cat_table_entry_t *)utarray_eltptr(cat_table, cat_idx);
-        MPIU_Assert(cat->desc == NULL);
+        MPIR_Assert(cat->desc == NULL);
         cat->desc = MPL_strdup(cat_desc);
-        MPIU_Assert(cat->desc);
+        MPIR_Assert(cat->desc);
     } else {
         /* Not found, so create a new category */
         cat = MPIR_T_cat_create(cat_name);
         cat->desc = MPL_strdup(cat_desc);
-        MPIU_Assert(cat->desc);
+        MPIR_Assert(cat->desc);
         /* Notify categories have been changed */
         cat_stamp++;
     }
@@ -304,7 +304,7 @@ void MPIR_T_CVAR_REGISTER_impl(
         cvar_idx = hash_entry->idx;
         cvar = (cvar_table_entry_t *)utarray_eltptr(cvar_table, cvar_idx);
         /* Should never override an existing & active var */
-        MPIU_Assert(cvar->active != TRUE);
+        MPIR_Assert(cvar->active != TRUE);
         cvar->active = TRUE;
         /* FIXME: Do we need to check consistency between the old and new? */
     } else {
@@ -314,17 +314,17 @@ void MPIR_T_CVAR_REGISTER_impl(
         cvar->active = TRUE;
         cvar->datatype = dtype;
         cvar->name = MPL_strdup(name);
-        MPIU_Assert(cvar->name);
+        MPIR_Assert(cvar->name);
         if (dtype != MPI_CHAR) {
             cvar->addr = (void *)addr;
         } else {
             cvar->addr = MPL_malloc(count);
-            MPIU_Assert(cvar->addr);
+            MPIR_Assert(cvar->addr);
             if (defaultval.str == NULL) {
                 ((char *)(cvar->addr))[0] = '\0';
             } else {
                 /* Use greater (>), since count includes the terminating '\0', but strlen does not */
-                MPIU_Assert(count > strlen(defaultval.str));
+                MPIR_Assert(count > strlen(defaultval.str));
                 strcpy(cvar->addr, defaultval.str);
             }
         }
@@ -336,12 +336,12 @@ void MPIR_T_CVAR_REGISTER_impl(
         cvar->get_count = get_count;
         cvar->defaultval = defaultval;
         cvar->desc = MPL_strdup(desc);
-        MPIU_Assert(cvar->desc);
+        MPIR_Assert(cvar->desc);
 
         /* Record <name, index> in hash table */
         cvar_idx = utarray_len(cvar_table) - 1;
         hash_entry = MPL_malloc(sizeof(name2index_hash_t));
-        MPIU_Assert(hash_entry);
+        MPIR_Assert(hash_entry);
         /* Need not to Strdup name, since cvar_table and cvar_hash co-exist */
         hash_entry->name =name;
         hash_entry->idx = cvar_idx;
@@ -390,7 +390,7 @@ void MPIR_T_PVAR_REGISTER_impl(
         pvar_idx = hash_entry->idx;
         pvar = (pvar_table_entry_t *)utarray_eltptr(pvar_table, pvar_idx);
         /* Should never override an existing & active var */
-        MPIU_Assert(pvar->active != TRUE);
+        MPIR_Assert(pvar->active != TRUE);
         pvar->active = TRUE;
         /* FIXME: Do we need to check consistency between the old and new? */
     } else {
@@ -401,7 +401,7 @@ void MPIR_T_PVAR_REGISTER_impl(
         pvar->varclass = varclass;
         pvar->datatype = dtype;
         pvar->name = MPL_strdup(name);
-        MPIU_Assert(pvar->name);
+        MPIR_Assert(pvar->name);
         pvar->addr = addr;
         pvar->count = count;
         pvar->enumtype = etype;
@@ -411,12 +411,12 @@ void MPIR_T_PVAR_REGISTER_impl(
         pvar->get_value = get_value;
         pvar->get_count = get_count;
         pvar->desc = MPL_strdup(desc);
-        MPIU_Assert(pvar->desc);
+        MPIR_Assert(pvar->desc);
 
         /* Record <name, index> in hash table */
         pvar_idx = utarray_len(pvar_table) - 1;
         hash_entry = MPL_malloc(sizeof(name2index_hash_t));
-        MPIU_Assert(hash_entry);
+        MPIR_Assert(hash_entry);
         /* Need not to Strdup name, since pvar_table and pvar_hashs co-exist */
         hash_entry->name = name;
         hash_entry->idx = pvar_idx;
@@ -464,7 +464,7 @@ void MPIR_T_strncpy(char *dst, const char *src, int *len)
         }
         else {
             /* MPL_strncpy will always terminate the string */
-            MPIU_Assert(*len > 0);
+            MPIR_Assert(*len > 0);
             if (src != NULL) {
                 MPL_strncpy(dst, src, *len);
                 *len = (int)strlen(dst) + 1;
diff --git a/src/mpi_t/mpit_finalize.c b/src/mpi_t/mpit_finalize.c
index dbdfd39..04da543 100644
--- a/src/mpi_t/mpit_finalize.c
+++ b/src/mpi_t/mpit_finalize.c
@@ -202,8 +202,8 @@ int MPI_T_finalize(void)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_FINALIZE);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_FINALIZE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_FINALIZE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_FINALIZE);
 
     /* ... body of routine ...  */
 
@@ -221,7 +221,7 @@ int MPI_T_finalize(void)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_FINALIZE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_FINALIZE);
     return mpi_errno;
 
 fn_fail:
diff --git a/src/mpi_t/mpit_initthread.c b/src/mpi_t/mpit_initthread.c
index af1469a..d177384 100644
--- a/src/mpi_t/mpit_initthread.c
+++ b/src/mpi_t/mpit_initthread.c
@@ -115,8 +115,8 @@ int MPI_T_init_thread(int required, int *provided)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_INIT_THREAD);
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_INIT_THREAD);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_INIT_THREAD);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_INIT_THREAD);
 
     /* ... body of routine ...  */
 
@@ -140,7 +140,7 @@ int MPI_T_init_thread(int required, int *provided)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_INIT_THREAD);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_INIT_THREAD);
     return mpi_errno;
 
 fn_fail:
diff --git a/src/mpi_t/pvar_get_index.c b/src/mpi_t/pvar_get_index.c
index af5fce4..6ca9e59 100644
--- a/src/mpi_t/pvar_get_index.c
+++ b/src/mpi_t/pvar_get_index.c
@@ -50,10 +50,10 @@ int MPI_T_pvar_get_index(const char *name, int var_class, int *pvar_index)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_PVAR_GET_INDEX);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_PVAR_GET_INDEX);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_PVAR_GET_INDEX);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_PVAR_GET_INDEX);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -90,7 +90,7 @@ int MPI_T_pvar_get_index(const char *name, int var_class, int *pvar_index)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_PVAR_GET_INDEX);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_PVAR_GET_INDEX);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/pvar_get_info.c b/src/mpi_t/pvar_get_info.c
index 06dddda..22ea6b5 100644
--- a/src/mpi_t/pvar_get_info.c
+++ b/src/mpi_t/pvar_get_info.c
@@ -68,10 +68,10 @@ int MPI_T_pvar_get_info(int pvar_index, char *name, int *name_len, int *verbosit
     const pvar_table_entry_t *entry;
     const pvar_table_entry_t *info;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_PVAR_GET_INFO);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_PVAR_GET_INFO);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_PVAR_GET_INFO);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_PVAR_GET_INFO);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -127,7 +127,7 @@ int MPI_T_pvar_get_info(int pvar_index, char *name, int *name_len, int *verbosit
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_PVAR_GET_INFO);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_PVAR_GET_INFO);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/pvar_get_num.c b/src/mpi_t/pvar_get_num.c
index 462d70c..a74ca56 100644
--- a/src/mpi_t/pvar_get_num.c
+++ b/src/mpi_t/pvar_get_num.c
@@ -45,10 +45,10 @@ int MPI_T_pvar_get_num(int *num_pvar)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_PVAR_GET_NUM);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_PVAR_GET_NUM);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_PVAR_GET_NUM);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_PVAR_GET_NUM);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -68,7 +68,7 @@ int MPI_T_pvar_get_num(int *num_pvar)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_PVAR_GET_NUM);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_PVAR_GET_NUM);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/pvar_handle_alloc.c b/src/mpi_t/pvar_handle_alloc.c
index 846ed1e..c66e11e 100644
--- a/src/mpi_t/pvar_handle_alloc.c
+++ b/src/mpi_t/pvar_handle_alloc.c
@@ -51,7 +51,7 @@ int MPIR_T_pvar_handle_alloc_impl(MPI_T_pvar_session session, int pvar_index,
     const pvar_table_entry_t *info;
     MPIR_T_pvar_handle_t *hnd;
 
-    MPIU_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_DECL(1);
 
     info = (pvar_table_entry_t *) utarray_eltptr(pvar_table, pvar_index);
 
@@ -80,7 +80,7 @@ int MPIR_T_pvar_handle_alloc_impl(MPI_T_pvar_session session, int pvar_index,
     }
 
     /* Allocate memory and bzero it */
-    MPIU_CHKPMEM_CALLOC(hnd, MPIR_T_pvar_handle_t*, sizeof(*hnd) + extra,
+    MPIR_CHKPMEM_CALLOC(hnd, MPIR_T_pvar_handle_t*, sizeof(*hnd) + extra,
                         mpi_errno, "performance variable handle");
 #ifdef HAVE_ERROR_CHECKING
     hnd->kind = MPIR_T_PVAR_HANDLE;
@@ -120,7 +120,7 @@ int MPIR_T_pvar_handle_alloc_impl(MPI_T_pvar_session session, int pvar_index,
          * accum is zero since we called CALLOC before.
          */
         if (hnd->get_value == NULL)
-            MPIU_Memcpy(hnd->offset, hnd->addr, bytes*cnt);
+            MPIR_Memcpy(hnd->offset, hnd->addr, bytes*cnt);
         else
             hnd->get_value(hnd->addr, hnd->obj_handle, hnd->count, hnd->offset);
     }
@@ -163,11 +163,11 @@ int MPIR_T_pvar_handle_alloc_impl(MPI_T_pvar_session session, int pvar_index,
     *handle = hnd;
     *count = cnt;
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
 fn_exit:
     return mpi_errno;
 fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -204,10 +204,10 @@ int MPI_T_pvar_handle_alloc(MPI_T_pvar_session session, int pvar_index,
     int mpi_errno = MPI_SUCCESS;
     pvar_table_entry_t *entry;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_PVAR_HANDLE_ALLOC);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_PVAR_HANDLE_ALLOC);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_PVAR_HANDLE_ALLOC);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_PVAR_HANDLE_ALLOC);
 
     /* Validate parameters  */
 #   ifdef HAVE_ERROR_CHECKING
@@ -238,7 +238,7 @@ int MPI_T_pvar_handle_alloc(MPI_T_pvar_session session, int pvar_index,
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_PVAR_HANDLE_ALLOC);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_PVAR_HANDLE_ALLOC);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/pvar_handle_free.c b/src/mpi_t/pvar_handle_free.c
index ced8761..ca571ea 100644
--- a/src/mpi_t/pvar_handle_free.c
+++ b/src/mpi_t/pvar_handle_free.c
@@ -45,7 +45,7 @@ int MPIR_T_pvar_handle_free_impl(MPI_T_pvar_session session, MPI_T_pvar_handle *
             mark->first_used = 0;
             mark->first_started = 0;
         } else {
-            MPIU_Assert(mark->hlist);
+            MPIR_Assert(mark->hlist);
             if (mark->hlist == hnd) {
                 /* hnd happens to be the head */
                 mark->hlist = hnd->next2;
@@ -93,10 +93,10 @@ int MPI_T_pvar_handle_free(MPI_T_pvar_session session, MPI_T_pvar_handle *handle
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_PVAR_HANDLE_FREE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_PVAR_HANDLE_FREE);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_PVAR_HANDLE_FREE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_PVAR_HANDLE_FREE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -126,7 +126,7 @@ int MPI_T_pvar_handle_free(MPI_T_pvar_session session, MPI_T_pvar_handle *handle
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_PVAR_HANDLE_FREE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_PVAR_HANDLE_FREE);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/pvar_read.c b/src/mpi_t/pvar_read.c
index 62f3d58..d6ddd7f 100644
--- a/src/mpi_t/pvar_read.c
+++ b/src/mpi_t/pvar_read.c
@@ -134,13 +134,13 @@ int MPIR_T_pvar_read_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle,
     }
     else if (MPIR_T_pvar_is_sum(handle) && !MPIR_T_pvar_is_started(handle)) {
         /* A SUM is stopped. Return accum directly */
-        MPIU_Memcpy(buf, handle->accum, handle->bytes * handle->count);
+        MPIR_Memcpy(buf, handle->accum, handle->bytes * handle->count);
     }
     else if (MPIR_T_pvar_is_watermark(handle)) {
         /* Callback and array are not allowed for watermarks, since they
          * can not gurantee correct semantics of watermarks.
          */
-        MPIU_Assert(handle->get_value == NULL && handle->count == 1);
+        MPIR_Assert(handle->get_value == NULL && handle->count == 1);
 
         if (MPIR_T_pvar_is_first(handle)) {
             /* Current value of the first handle of a watermark is stored at
@@ -192,7 +192,7 @@ int MPIR_T_pvar_read_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle,
     } else {
         /* For STATE, LEVEL, SIZE, PERCENTAGE, no caching is needed */
         if (handle->get_value == NULL)
-            MPIU_Memcpy(buf, handle->addr, handle->bytes * handle->count);
+            MPIR_Memcpy(buf, handle->addr, handle->bytes * handle->count);
         else
             handle->get_value(handle->addr, handle->obj_handle,
                               handle->count, buf);
@@ -243,10 +243,10 @@ int MPI_T_pvar_read(MPI_T_pvar_session session, MPI_T_pvar_handle handle, void *
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_PVAR_READ);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_PVAR_READ);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_PVAR_READ);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_PVAR_READ);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -275,7 +275,7 @@ int MPI_T_pvar_read(MPI_T_pvar_session session, MPI_T_pvar_handle handle, void *
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_PVAR_READ);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_PVAR_READ);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/pvar_readreset.c b/src/mpi_t/pvar_readreset.c
index 0c3c514..e668986 100644
--- a/src/mpi_t/pvar_readreset.c
+++ b/src/mpi_t/pvar_readreset.c
@@ -76,10 +76,10 @@ int MPI_T_pvar_readreset(MPI_T_pvar_session session, MPI_T_pvar_handle handle, v
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_PVAR_READRESET);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_PVAR_READRESET);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_PVAR_READRESET);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_PVAR_READRESET);
 
     /* Validate parameters */
 #   ifdef HAVE_ERROR_CHECKING
@@ -114,7 +114,7 @@ int MPI_T_pvar_readreset(MPI_T_pvar_session session, MPI_T_pvar_handle handle, v
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_PVAR_READRESET);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_PVAR_READRESET);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/pvar_reset.c b/src/mpi_t/pvar_reset.c
index f50e43f..a96a488 100644
--- a/src/mpi_t/pvar_reset.c
+++ b/src/mpi_t/pvar_reset.c
@@ -43,7 +43,7 @@ int MPIR_T_pvar_reset_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle)
         /* Record current value as offset when pvar is running (i.e., started) */
         if (MPIR_T_pvar_is_started(handle)) {
             if (handle->get_value == NULL) {
-                MPIU_Memcpy(handle->offset, handle->addr, handle->bytes * handle->count);
+                MPIR_Memcpy(handle->offset, handle->addr, handle->bytes * handle->count);
             } else {
                 handle->get_value(handle->addr, handle->obj_handle,
                                   handle->count, handle->offset);
@@ -54,7 +54,7 @@ int MPIR_T_pvar_reset_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle)
             /* Use the current value as starting value when pvar is running */
             mark = (MPIR_T_pvar_watermark_t *)handle->addr;
             if (MPIR_T_pvar_is_first(handle)) {
-                MPIU_Assert(mark->first_used);
+                MPIR_Assert(mark->first_used);
                 mark->watermark = mark->current;
             } else {
                 handle->watermark = mark->current;
@@ -110,10 +110,10 @@ int MPI_T_pvar_reset(MPI_T_pvar_session session, MPI_T_pvar_handle handle)
     int mpi_errno = MPI_SUCCESS;
     MPIR_T_pvar_handle_t *hnd;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_PVAR_RESET);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_PVAR_RESET);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_PVAR_RESET);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_PVAR_RESET);
 
      /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -157,7 +157,7 @@ int MPI_T_pvar_reset(MPI_T_pvar_session session, MPI_T_pvar_handle handle)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_PVAR_RESET);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_PVAR_RESET);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/pvar_session_create.c b/src/mpi_t/pvar_session_create.c
index 8fde7f2..813e0f1 100644
--- a/src/mpi_t/pvar_session_create.c
+++ b/src/mpi_t/pvar_session_create.c
@@ -33,11 +33,11 @@ int MPI_T_pvar_session_create(MPI_T_pvar_session *session) __attribute__((weak,a
 int MPIR_T_pvar_session_create_impl(MPI_T_pvar_session *session)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_DECL(1);
 
     *session = MPI_T_PVAR_SESSION_NULL;
 
-    MPIU_CHKPMEM_MALLOC(*session, MPI_T_pvar_session, sizeof(**session), mpi_errno, "performance var session");
+    MPIR_CHKPMEM_MALLOC(*session, MPI_T_pvar_session, sizeof(**session), mpi_errno, "performance var session");
 
     /* essential for utlist to work */
     (*session)->hlist = NULL;
@@ -46,11 +46,11 @@ int MPIR_T_pvar_session_create_impl(MPI_T_pvar_session *session)
     (*session)->kind = MPIR_T_PVAR_SESSION;
 #endif
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
 fn_exit:
     return mpi_errno;
 fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -77,10 +77,10 @@ int MPI_T_pvar_session_create(MPI_T_pvar_session *session)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_PVAR_SESSION_CREATE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_PVAR_SESSION_CREATE);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_PVAR_SESSION_CREATE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_PVAR_SESSION_CREATE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -101,7 +101,7 @@ int MPI_T_pvar_session_create(MPI_T_pvar_session *session)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_PVAR_SESSION_CREATE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_PVAR_SESSION_CREATE);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/pvar_session_free.c b/src/mpi_t/pvar_session_free.c
index 51a93c5..edb9468 100644
--- a/src/mpi_t/pvar_session_free.c
+++ b/src/mpi_t/pvar_session_free.c
@@ -83,10 +83,10 @@ int MPI_T_pvar_session_free(MPI_T_pvar_session *session)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_PVAR_SESSION_FREE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_PVAR_SESSION_FREE);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_PVAR_SESSION_FREE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_PVAR_SESSION_FREE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -107,7 +107,7 @@ int MPI_T_pvar_session_free(MPI_T_pvar_session *session)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_PVAR_SESSION_FREE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_PVAR_SESSION_FREE);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/pvar_start.c b/src/mpi_t/pvar_start.c
index 1853fef..85995d6 100644
--- a/src/mpi_t/pvar_start.c
+++ b/src/mpi_t/pvar_start.c
@@ -42,7 +42,7 @@ int MPIR_T_pvar_start_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle)
          * accum is zero since handle allocation.
          */
         if (handle->get_value == NULL) {
-            MPIU_Memcpy(handle->offset, handle->addr, handle->bytes * handle->count);
+            MPIR_Memcpy(handle->offset, handle->addr, handle->bytes * handle->count);
         } else {
             handle->get_value(handle->addr, handle->obj_handle,
                               handle->count, handle->offset);
@@ -54,7 +54,7 @@ int MPIR_T_pvar_start_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle)
         mark = (MPIR_T_pvar_watermark_t *)handle->addr;
 
         if (MPIR_T_pvar_is_first(handle)) {
-            MPIU_Assert(mark->first_used);
+            MPIR_Assert(mark->first_used);
             mark->first_started = TRUE;
             if (!MPIR_T_pvar_is_oncestarted(handle))
                 mark->watermark = mark->current;
@@ -107,10 +107,10 @@ int MPI_T_pvar_start(MPI_T_pvar_session session, MPI_T_pvar_handle handle)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_PVAR_START);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_PVAR_START);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_PVAR_START);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_PVAR_START);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -151,7 +151,7 @@ int MPI_T_pvar_start(MPI_T_pvar_session session, MPI_T_pvar_handle handle)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_PVAR_START);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_PVAR_START);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/pvar_stop.c b/src/mpi_t/pvar_stop.c
index aa6b451..5719f0f 100644
--- a/src/mpi_t/pvar_stop.c
+++ b/src/mpi_t/pvar_stop.c
@@ -42,7 +42,7 @@ int MPIR_T_pvar_stop_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle)
     if (MPIR_T_pvar_is_sum(handle)) {
         /* Read the current value first */
         if (handle->get_value == NULL) {
-            MPIU_Memcpy(handle->current, handle->addr, handle->bytes * handle->count);
+            MPIR_Memcpy(handle->current, handle->addr, handle->bytes * handle->count);
         } else {
             handle->get_value(handle->addr, handle->obj_handle,
                               handle->count, handle->current);
@@ -88,7 +88,7 @@ int MPIR_T_pvar_stop_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle)
         /* When handle is first, clear the flag in pvar too */
         if (MPIR_T_pvar_is_first(handle)) {
             mark = (MPIR_T_pvar_watermark_t *)handle->addr;
-            MPIU_Assert(mark->first_used);
+            MPIR_Assert(mark->first_used);
             mark->first_started = FALSE;
         }
     }
@@ -136,10 +136,10 @@ int MPI_T_pvar_stop(MPI_T_pvar_session session, MPI_T_pvar_handle handle)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_PVAR_STOP);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_PVAR_STOP);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_PVAR_STOP);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_PVAR_STOP);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -185,7 +185,7 @@ int MPI_T_pvar_stop(MPI_T_pvar_session session, MPI_T_pvar_handle handle)
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_PVAR_STOP);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_PVAR_STOP);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpi_t/pvar_write.c b/src/mpi_t/pvar_write.c
index 888d018..6662c2d 100644
--- a/src/mpi_t/pvar_write.c
+++ b/src/mpi_t/pvar_write.c
@@ -74,10 +74,10 @@ int MPI_T_pvar_write(MPI_T_pvar_session session, MPI_T_pvar_handle handle, const
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPID_MPI_STATE_DECL(MPID_STATE_MPI_T_PVAR_WRITE);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_T_PVAR_WRITE);
     MPIR_ERRTEST_MPIT_INITIALIZED(mpi_errno);
     MPIR_T_THREAD_CS_ENTER();
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_T_PVAR_WRITE);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_T_PVAR_WRITE);
 
     /* Validate parameters, especially handles needing to be converted */
 #   ifdef HAVE_ERROR_CHECKING
@@ -113,7 +113,7 @@ int MPI_T_pvar_write(MPI_T_pvar_session session, MPI_T_pvar_handle handle, const
     /* ... end of body of routine ... */
 
 fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_T_PVAR_WRITE);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_T_PVAR_WRITE);
     MPIR_T_THREAD_CS_EXIT();
     return mpi_errno;
 
diff --git a/src/mpid/ch3/channels/nemesis/include/mpid_nem_fbox.h b/src/mpid/ch3/channels/nemesis/include/mpid_nem_fbox.h
index d6ed5d0..562efa0 100644
--- a/src/mpid/ch3/channels/nemesis/include/mpid_nem_fbox.h
+++ b/src/mpid/ch3/channels/nemesis/include/mpid_nem_fbox.h
@@ -39,7 +39,7 @@ static inline int poll_active_fboxes(MPID_nem_cell_ptr_t *cell)
             MPID_nem_fbox_mpich_t *fbox;
 
             fbox = MPID_nem_curr_fboxq_elem->fbox;
-            MPIU_Assert(fbox != NULL);
+            MPIR_Assert(fbox != NULL);
             if (OPA_load_acquire_int(&fbox->flag.value) &&
                 fbox->cell.pkt.header.seqno == MPID_nem_recv_seqno[MPID_nem_curr_fboxq_elem->grank])
             {
diff --git a/src/mpid/ch3/channels/nemesis/include/mpid_nem_generic_queue.h b/src/mpid/ch3/channels/nemesis/include/mpid_nem_generic_queue.h
index 5068d4d..c6a01fc 100644
--- a/src/mpid/ch3/channels/nemesis/include/mpid_nem_generic_queue.h
+++ b/src/mpid/ch3/channels/nemesis/include/mpid_nem_generic_queue.h
@@ -22,7 +22,7 @@
 #define GENERIC_Q_HEAD(q) ((q).head)
 
 #define GENERIC_Q_ENQUEUE_EMPTY(qp, ep, next_field) do {        \
-        MPIU_Assert (GENERIC_Q_EMPTY (*(qp)));                  \
+        MPIR_Assert (GENERIC_Q_EMPTY (*(qp)));                  \
         (qp)->head = (qp)->tail = ep;                           \
         (ep)->next_field = NULL;                                \
         PRINT_QUEUE (qp, next_field);                           \
@@ -55,7 +55,7 @@
    tail of a linked list of elements.  The list is inserted on the end
    of the queue. */
 #define GENERIC_Q_ENQUEUE_EMPTY_MULTIPLE(qp, ep0, ep1, next_field) do { \
-        MPIU_Assert (GENERIC_Q_EMPTY (*(qp)));                          \
+        MPIR_Assert (GENERIC_Q_EMPTY (*(qp)));                          \
         (qp)->head = ep0;                                               \
         (qp)->tail = ep1;                                               \
         (ep1)->next_field = NULL;                                       \
@@ -74,7 +74,7 @@
 
 
 #define GENERIC_Q_DEQUEUE(qp, epp, next_field) do {     \
-        MPIU_Assert (!GENERIC_Q_EMPTY (*(qp)));         \
+        MPIR_Assert (!GENERIC_Q_EMPTY (*(qp)));         \
         *(epp) = (qp)->head;                            \
         (qp)->head = (*(epp))->next_field;              \
         if ((qp)->head == NULL)                         \
@@ -84,7 +84,7 @@
 
 /* remove the elements from the top of the queue starting with ep0 through ep1 */
 #define GENERIC_Q_REMOVE_ELEMENTS(qp, ep0, ep1, next_field) do {        \
-        MPIU_Assert (GENERIC_Q_HEAD (*(qp)) == (ep0));                  \
+        MPIR_Assert (GENERIC_Q_HEAD (*(qp)) == (ep0));                  \
         (qp)->head = (ep1)->next_field;                                 \
         if ((qp)->head == NULL)                                         \
             (qp)->tail = NULL;                                          \
@@ -158,7 +158,7 @@
 #define GENERICM_Q_HEAD(q) ((q).head)
 
 #define GENERICM_Q_ENQUEUE_EMPTY(qp, ep, next_field_macro, next_field) do {     \
-        MPIU_Assert (GENERICM_Q_EMPTY (*(qp)));                                 \
+        MPIR_Assert (GENERICM_Q_EMPTY (*(qp)));                                 \
         (qp)->head = (qp)->tail = ep;                                           \
         next_field_macro(ep, next_field) = NULL;                                \
         PRINTM_QUEUE (qp, next_field_macro, next_field);                        \
@@ -191,7 +191,7 @@
    tail of a linked list of elements.  The list is inserted on the end
    of the queue. */
 #define GENERICM_Q_ENQUEUE_EMPTY_MULTIPLE(qp, ep0, ep1, next_field_macro, next_field) do {      \
-        MPIU_Assert (GENERICM_Q_EMPTY (*(qp)));                                                 \
+        MPIR_Assert (GENERICM_Q_EMPTY (*(qp)));                                                 \
         (qp)->head = ep0;                                                                       \
         (qp)->tail = ep1;                                                                       \
         next_field_macro(ep1, next_field) = NULL;                                               \
@@ -210,7 +210,7 @@
 
 
 #define GENERICM_Q_DEQUEUE(qp, epp, next_field_macro, next_field) do {  \
-        MPIU_Assert (!GENERICM_Q_EMPTY (*(qp)));                        \
+        MPIR_Assert (!GENERICM_Q_EMPTY (*(qp)));                        \
         *(epp) = (qp)->head;                                            \
         (qp)->head = next_field_macro(*(epp), next_field);              \
         if ((qp)->head == NULL)                                         \
@@ -219,7 +219,7 @@
 
 /* remove the elements from the top of the queue starting with ep0 through ep1 */
 #define GENERICM_Q_REMOVE_ELEMENTS(qp, ep0, ep1, next_field_macro, next_field) do {     \
-        MPIU_Assert (GENERICM_Q_HEAD (*(qp)) == (ep0));                                 \
+        MPIR_Assert (GENERICM_Q_HEAD (*(qp)) == (ep0));                                 \
         (qp)->head = next_field_macro(ep1, next_field);                                 \
         if ((qp)->head == NULL)                                                         \
             (qp)->tail = NULL;                                                          \
@@ -287,7 +287,7 @@
 #define GENERIC_L_HEAD(q) ((q).head)
 
 #define GENERIC_L_ADD_EMPTY(qp, ep, next_field, prev_field) do {        \
-        MPIU_Assert (GENERIC_L_EMPTY (*(qp)));                          \
+        MPIR_Assert (GENERIC_L_EMPTY (*(qp)));                          \
         (qp)->head = ep;                                                \
         (ep)->next_field = (ep)->prev_field = NULL;                     \
     } while (0)
@@ -305,7 +305,7 @@
     } while (0)
 
 #define GENERIC_L_REMOVE(qp, ep, next_field, prev_field) do {   \
-        MPIU_Assert (!GENERIC_L_EMPTY (*(qp)));                 \
+        MPIR_Assert (!GENERIC_L_EMPTY (*(qp)));                 \
         if ((ep)->prev_field)                                   \
             ((ep)->prev_field)->next_field = (ep)->next_field;  \
         else                                                    \
diff --git a/src/mpid/ch3/channels/nemesis/include/mpid_nem_inline.h b/src/mpid/ch3/channels/nemesis/include/mpid_nem_inline.h
index cdeb1b2..0beaa7e 100644
--- a/src/mpid/ch3/channels/nemesis/include/mpid_nem_inline.h
+++ b/src/mpid/ch3/channels/nemesis/include/mpid_nem_inline.h
@@ -106,8 +106,8 @@ MPID_nem_mpich_send_header (void* buf, int size, MPIDI_VC_t *vc, int *again)
 
     /*DO_PAPI (PAPI_reset (PAPI_EventSet)); */
 
-    MPIU_Assert (size == sizeof(MPIDI_CH3_Pkt_t));
-    MPIU_Assert (vc_ch->is_local);
+    MPIR_Assert (size == sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert (vc_ch->is_local);
 
     my_rank = MPID_nem_mem_region.rank;
 
@@ -125,7 +125,7 @@ MPID_nem_mpich_send_header (void* buf, int size, MPIDI_VC_t *vc, int *again)
         
         MPL_DBG_STMT (MPIDI_CH3_DBG_CHANNEL, VERBOSE, pbox->cell.pkt.header.type = MPID_NEM_PKT_MPICH_HEAD);
         
-        MPIU_Memcpy((void *)pbox->cell.pkt.p.payload, buf, size);
+        MPIR_Memcpy((void *)pbox->cell.pkt.p.payload, buf, size);
 
         OPA_store_release_int(&pbox->flag.value, 1);
 
@@ -171,7 +171,7 @@ MPID_nem_mpich_send_header (void* buf, int size, MPIDI_VC_t *vc, int *again)
     el->pkt.header.seqno   = vc_ch->send_seqno++;
     MPL_DBG_STMT (MPIDI_CH3_DBG_CHANNEL, VERBOSE, el->pkt.header.type = MPID_NEM_PKT_MPICH_HEAD);
     
-    MPIU_Memcpy((void *)el->pkt.p.payload, buf, size);
+    MPIR_Memcpy((void *)el->pkt.p.payload, buf, size);
     DO_PAPI (PAPI_accum_var (PAPI_EventSet, PAPI_vvalues11));
 
     MPL_DBG_MSG (MPIDI_CH3_DBG_CHANNEL, VERBOSE, "--> Sent queue");
@@ -229,12 +229,12 @@ MPID_nem_mpich_sendv (MPL_IOV **iov, int *n_iov, MPIDI_VC_t *vc, int *again)
     intptr_t payload_len;
     int my_rank;
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_SENDV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_SENDV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_SENDV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_SENDV);
 
-    MPIU_Assert (*n_iov > 0 && (*iov)->MPL_IOV_LEN > 0);
-    MPIU_Assert(vc_ch->is_local);
+    MPIR_Assert (*n_iov > 0 && (*iov)->MPL_IOV_LEN > 0);
+    MPIR_Assert(vc_ch->is_local);
 
     DO_PAPI (PAPI_reset (PAPI_EventSet));
 
@@ -269,7 +269,7 @@ MPID_nem_mpich_sendv (MPL_IOV **iov, int *n_iov, MPIDI_VC_t *vc, int *again)
     while (*n_iov && payload_len >= (*iov)->MPL_IOV_LEN)
     {
 	size_t _iov_len = (*iov)->MPL_IOV_LEN;
-	MPIU_Memcpy (cell_buf, (*iov)->MPL_IOV_BUF, _iov_len);
+	MPIR_Memcpy (cell_buf, (*iov)->MPL_IOV_BUF, _iov_len);
 	payload_len -= _iov_len;
 	cell_buf += _iov_len;
 	--(*n_iov);
@@ -278,7 +278,7 @@ MPID_nem_mpich_sendv (MPL_IOV **iov, int *n_iov, MPIDI_VC_t *vc, int *again)
     
     if (*n_iov && payload_len > 0)
     {
-	MPIU_Memcpy (cell_buf, (*iov)->MPL_IOV_BUF, payload_len);
+	MPIR_Memcpy (cell_buf, (*iov)->MPL_IOV_BUF, payload_len);
 	(*iov)->MPL_IOV_BUF = (char *)(*iov)->MPL_IOV_BUF + payload_len;
 	(*iov)->MPL_IOV_LEN -= payload_len;
  	payload_len = 0;
@@ -310,7 +310,7 @@ MPID_nem_mpich_sendv (MPL_IOV **iov, int *n_iov, MPIDI_VC_t *vc, int *again)
     *again = 1;
     goto fn_exit;
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_SENDV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_SENDV);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -334,14 +334,14 @@ MPID_nem_mpich_sendv_header (MPL_IOV **iov, int *n_iov,
     int my_rank;
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
     MPI_Aint buf_offset = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_SENDV_HEADER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_SENDV_HEADER);
     
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_SENDV_HEADER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_SENDV_HEADER);
 
-    MPIU_Assert(vc_ch->is_local);
+    MPIR_Assert(vc_ch->is_local);
 
     DO_PAPI (PAPI_reset (PAPI_EventSet));
-    MPIU_Assert (*n_iov > 0 && (*iov)->MPL_IOV_LEN == sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert (*n_iov > 0 && (*iov)->MPL_IOV_LEN == sizeof(MPIDI_CH3_Pkt_t));
 
     my_rank = MPID_nem_mem_region.rank;
 
@@ -359,8 +359,8 @@ MPID_nem_mpich_sendv_header (MPL_IOV **iov, int *n_iov,
         pbox->cell.pkt.header.seqno   = vc_ch->send_seqno++;
         MPL_DBG_STMT (MPIDI_CH3_DBG_CHANNEL, VERBOSE, pbox->cell.pkt.header.type = MPID_NEM_PKT_MPICH_HEAD);
         
-        MPIU_Memcpy((void *)pbox->cell.pkt.p.payload, (*iov)[0].MPL_IOV_BUF, (*iov)[0].MPL_IOV_LEN);
-        MPIU_Memcpy ((char *)pbox->cell.pkt.p.payload + (*iov)[0].MPL_IOV_LEN, (*iov)[1].MPL_IOV_BUF, (*iov)[1].MPL_IOV_LEN);
+        MPIR_Memcpy((void *)pbox->cell.pkt.p.payload, (*iov)[0].MPL_IOV_BUF, (*iov)[0].MPL_IOV_LEN);
+        MPIR_Memcpy ((char *)pbox->cell.pkt.p.payload + (*iov)[0].MPL_IOV_LEN, (*iov)[1].MPL_IOV_BUF, (*iov)[1].MPL_IOV_LEN);
         
         OPA_store_release_int(&pbox->flag.value, 1);
         *n_iov = 0;
@@ -398,15 +398,15 @@ MPID_nem_mpich_sendv_header (MPL_IOV **iov, int *n_iov,
     MPID_nem_queue_dequeue (MPID_nem_mem_region.my_freeQ, &el);
 #endif /*PREFETCH_CELL */
 
-    MPIU_Memcpy((void *)el->pkt.p.payload, (*iov)->MPL_IOV_BUF, sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Memcpy((void *)el->pkt.p.payload, (*iov)->MPL_IOV_BUF, sizeof(MPIDI_CH3_Pkt_t));
     buf_offset += sizeof(MPIDI_CH3_Pkt_t);
 
     if (ext_hdr_sz > 0) {
         /* ensure extended header fits in this cell. */
-        MPIU_Assert(MPID_NEM_MPICH_DATA_LEN - buf_offset >= ext_hdr_sz);
+        MPIR_Assert(MPID_NEM_MPICH_DATA_LEN - buf_offset >= ext_hdr_sz);
 
         /* when extended packet header exists, copy it */
-        MPIU_Memcpy((void *)((char *)(el->pkt.p.payload) + buf_offset), ext_hdr_ptr, ext_hdr_sz);
+        MPIR_Memcpy((void *)((char *)(el->pkt.p.payload) + buf_offset), ext_hdr_ptr, ext_hdr_sz);
         buf_offset += ext_hdr_sz;
     }
 
@@ -418,7 +418,7 @@ MPID_nem_mpich_sendv_header (MPL_IOV **iov, int *n_iov,
     while (*n_iov && payload_len >= (*iov)->MPL_IOV_LEN)
     {
 	size_t _iov_len = (*iov)->MPL_IOV_LEN;
-	MPIU_Memcpy (cell_buf, (*iov)->MPL_IOV_BUF, _iov_len);
+	MPIR_Memcpy (cell_buf, (*iov)->MPL_IOV_BUF, _iov_len);
 	payload_len -= _iov_len;
 	cell_buf += _iov_len;
 	--(*n_iov);
@@ -427,7 +427,7 @@ MPID_nem_mpich_sendv_header (MPL_IOV **iov, int *n_iov,
     
     if (*n_iov && payload_len > 0)
     {
-	MPIU_Memcpy (cell_buf, (*iov)->MPL_IOV_BUF, payload_len);
+	MPIR_Memcpy (cell_buf, (*iov)->MPL_IOV_BUF, payload_len);
 	(*iov)->MPL_IOV_BUF = (char *)(*iov)->MPL_IOV_BUF + payload_len;
 	(*iov)->MPL_IOV_LEN -= payload_len;
 	payload_len = 0;
@@ -460,7 +460,7 @@ MPID_nem_mpich_sendv_header (MPL_IOV **iov, int *n_iov,
     *again = 1;
     goto fn_exit;
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_SENDV_HEADER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_SENDV_HEADER);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -495,8 +495,8 @@ MPID_nem_mpich_send_seg_header (MPIDU_Segment *segment, intptr_t *segment_first,
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
     MPI_Aint buf_offset = 0;
 
-    MPIU_Assert(vc_ch->is_local); /* netmods will have their own implementation */
-    MPIU_Assert(header_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(vc_ch->is_local); /* netmods will have their own implementation */
+    MPIR_Assert(header_sz <= sizeof(MPIDI_CH3_Pkt_t));
     
     
     DO_PAPI (PAPI_reset (PAPI_EventSet));
@@ -509,14 +509,14 @@ MPID_nem_mpich_send_seg_header (MPIDU_Segment *segment, intptr_t *segment_first,
 	MPID_nem_fbox_mpich_t *pbox = vc_ch->fbox_out;
 
         /* Add a compiler time check on streaming unit size and FASTBOX size */
-        MPIU_Static_assert((MPIDI_CH3U_Acc_stream_size > MPID_NEM_FBOX_DATALEN),
+        MPIR_Static_assert((MPIDI_CH3U_Acc_stream_size > MPID_NEM_FBOX_DATALEN),
                            "RMA ACC Streaming unit size <= FASTBOX size in Nemesis.");
 
         /* NOTE: when FASTBOX is being used, streaming optimization is never triggered,
          * because streaming unit size is larger than FASTBOX size. In such case,
          * first offset (*segment_first) is zero, and last offset (segment_size)
          * is the data size */
-        MPIU_Assert(*segment_first == 0);
+        MPIR_Assert(*segment_first == 0);
 
         if (MPID_nem_fbox_is_full((MPID_nem_fbox_common_ptr_t)pbox))
             goto usequeue_l;
@@ -528,12 +528,12 @@ MPID_nem_mpich_send_seg_header (MPIDU_Segment *segment, intptr_t *segment_first,
             MPL_DBG_STMT (MPIDI_CH3_DBG_CHANNEL, VERBOSE, pbox->cell.pkt.header.type = MPID_NEM_PKT_MPICH_HEAD);
 
             /* copy header */
-            MPIU_Memcpy((void *)pbox->cell.pkt.p.payload, header, header_sz);
+            MPIR_Memcpy((void *)pbox->cell.pkt.p.payload, header, header_sz);
             
             /* copy data */
             last = segment_size;
             MPIDU_Segment_pack(segment, *segment_first, &last, (char *)pbox->cell.pkt.p.payload + sizeof(MPIDI_CH3_Pkt_t));
-            MPIU_Assert(last == segment_size);
+            MPIR_Assert(last == segment_size);
 
             OPA_store_release_int(&pbox->flag.value, 1);
 
@@ -574,13 +574,13 @@ MPID_nem_mpich_send_seg_header (MPIDU_Segment *segment, intptr_t *segment_first,
 #endif /*PREFETCH_CELL */
 
     /* copy header */
-    MPIU_Memcpy((void *)el->pkt.p.payload, header, header_sz);
+    MPIR_Memcpy((void *)el->pkt.p.payload, header, header_sz);
     
     buf_offset += sizeof(MPIDI_CH3_Pkt_t);
 
     if (ext_header_sz > 0) {
         /* when extended packet header exists, copy it */
-        MPIU_Memcpy((void *)((char *)(el->pkt.p.payload) + buf_offset), ext_header, ext_header_sz);
+        MPIR_Memcpy((void *)((char *)(el->pkt.p.payload) + buf_offset), ext_header, ext_header_sz);
         buf_offset += ext_header_sz;
     }
 
@@ -638,7 +638,7 @@ MPID_nem_mpich_send_seg (MPIDU_Segment *segment, intptr_t *segment_first, intptr
     intptr_t last;
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
 
-    MPIU_Assert(vc_ch->is_local); /* netmods will have their own implementation */    
+    MPIR_Assert(vc_ch->is_local); /* netmods will have their own implementation */
     
     DO_PAPI (PAPI_reset (PAPI_EventSet));
 
@@ -720,12 +720,12 @@ static inline void MPID_nem_mpich_dequeue_fastbox(int local_rank)
 {
     MPID_nem_fboxq_elem_t *el;
 
-    MPIU_Assert(local_rank < MPID_nem_mem_region.num_local);
+    MPIR_Assert(local_rank < MPID_nem_mem_region.num_local);
 
     el = &MPID_nem_fboxq_elem_list[local_rank];    
-    MPIU_Assert(el->fbox != NULL);
+    MPIR_Assert(el->fbox != NULL);
 
-    MPIU_Assert(el->usage);
+    MPIR_Assert(el->usage);
 
     --el->usage;
     if (el->usage == 0)
@@ -764,10 +764,10 @@ static inline void MPID_nem_mpich_enqueue_fastbox(int local_rank)
 {
     MPID_nem_fboxq_elem_t *el;
 
-    MPIU_Assert(local_rank < MPID_nem_mem_region.num_local);
+    MPIR_Assert(local_rank < MPID_nem_mem_region.num_local);
 
     el = &MPID_nem_fboxq_elem_list[local_rank];
-    MPIU_Assert(el->fbox != NULL);
+    MPIR_Assert(el->fbox != NULL);
 
     if (el->usage)
     {
@@ -962,7 +962,7 @@ MPID_nem_mpich_blocking_recv(MPID_nem_cell_ptr_t *cell, int *in_fbox, int comple
 
 #ifdef MPICH_IS_THREADED
     /* We should never enter this function in a multithreaded app */
-    MPIU_Assert(!MPIR_ThreadInfo.isThreaded);
+    MPIR_Assert(!MPIR_ThreadInfo.isThreaded);
 #endif
 
 #ifdef USE_FASTBOX
diff --git a/src/mpid/ch3/channels/nemesis/include/mpid_nem_queue.h b/src/mpid/ch3/channels/nemesis/include/mpid_nem_queue.h
index cead514..b8d536b 100644
--- a/src/mpid/ch3/channels/nemesis/include/mpid_nem_queue.h
+++ b/src/mpid/ch3/channels/nemesis/include/mpid_nem_queue.h
@@ -39,14 +39,14 @@ int MPID_nem_network_poll(int in_blocking_progress);
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static inline void MPID_nem_cell_init(MPID_nem_cell_ptr_t cell)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_CELL_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_CELL_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_CELL_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_CELL_INIT);
 
     MPID_NEM_SET_REL_NULL(cell->next);
     memset((void *)&cell->pkt, 0, sizeof(MPID_nem_pkt_header_t));
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_CELL_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_CELL_INIT);
 }
 
 #if defined(MPID_NEM_USE_LOCK_FREE_QUEUES)
@@ -57,15 +57,15 @@ static inline void MPID_nem_cell_init(MPID_nem_cell_ptr_t cell)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static inline void MPID_nem_queue_init(MPID_nem_queue_ptr_t qhead)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_QUEUE_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_QUEUE_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_QUEUE_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_QUEUE_INIT);
 
     MPID_NEM_SET_REL_NULL(qhead->head);
     MPID_NEM_SET_REL_NULL(qhead->my_head);
     MPID_NEM_SET_REL_NULL(qhead->tail);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_QUEUE_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_QUEUE_INIT);
 }
 
 #define MPID_NEM_USE_SHADOW_HEAD
@@ -245,16 +245,16 @@ MPID_nem_queue_dequeue (MPID_nem_queue_ptr_t qhead, MPID_nem_cell_ptr_t *e)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static inline void MPID_nem_queue_init(MPID_nem_queue_ptr_t qhead)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_QUEUE_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_QUEUE_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_QUEUE_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_QUEUE_INIT);
 
     MPID_NEM_SET_REL_NULL(qhead->head);
     MPID_NEM_SET_REL_NULL(qhead->my_head);
     MPID_NEM_SET_REL_NULL(qhead->tail);
     MPID_nem_queue_mutex_create(&qhead->lock, NULL);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_QUEUE_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_QUEUE_INIT);
 }
 
 static inline void
diff --git a/src/mpid/ch3/channels/nemesis/include/mpidi_ch3_impl.h b/src/mpid/ch3/channels/nemesis/include/mpidi_ch3_impl.h
index 92b27af..4037253 100644
--- a/src/mpid/ch3/channels/nemesis/include/mpidi_ch3_impl.h
+++ b/src/mpid/ch3/channels/nemesis/include/mpidi_ch3_impl.h
@@ -213,10 +213,10 @@ static inline int MPIDI_CH3I_SHM_Wins_append(MPIDI_SHM_Wins_list_t * list, MPIR_
 {
     int mpi_errno = MPI_SUCCESS;
     MPIDI_SHM_Win_t *tmp_ptr;
-    MPIU_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_DECL(1);
 
     /* FIXME: We should use a pool allocator here */
-    MPIU_CHKPMEM_MALLOC(tmp_ptr, MPIDI_SHM_Win_t *, sizeof(MPIDI_SHM_Win_t),
+    MPIR_CHKPMEM_MALLOC(tmp_ptr, MPIDI_SHM_Win_t *, sizeof(MPIDI_SHM_Win_t),
                         mpi_errno, "SHM window entry");
 
     tmp_ptr->next = NULL;
@@ -225,10 +225,10 @@ static inline int MPIDI_CH3I_SHM_Wins_append(MPIDI_SHM_Wins_list_t * list, MPIR_
     MPL_DL_APPEND(*list, tmp_ptr);
 
   fn_exit:
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
     return mpi_errno;
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
diff --git a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_cancel.c b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_cancel.c
index 794ff03..293e7cf 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_cancel.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_cancel.c
@@ -24,15 +24,15 @@
 int MPID_nem_llc_cancel_recv(struct MPIDI_VC *vc, struct MPIR_Request *req)
 {
     int canceled;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_CANCEL_RECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_CANCEL_RECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_CANCEL_RECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_CANCEL_RECV);
 
     /* returns zero in case request is canceled */
     canceled = LLC_req_approve_recv((LLC_cmd_t *) REQ_FIELD(req, cmds));
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_CANCEL_RECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_CANCEL_RECV);
     return canceled;
   fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_fini.c b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_fini.c
index c937260..33feb54 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_fini.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_fini.c
@@ -18,15 +18,15 @@ int MPID_nem_llc_finalize(void)
 {
     int mpi_errno = MPI_SUCCESS;
     int rc;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_FINALIZE);
 
     rc = LLC_finalize();
     MPIR_ERR_CHKANDJUMP(rc != 0, mpi_errno, MPI_ERR_OTHER, "**fail");
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_FINALIZE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_init.c b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_init.c
index 4ca395e..b49a31a 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_init.c
@@ -53,8 +53,8 @@ int MPID_nem_llc_kvs_put_binary(int from, const char *postfix, const uint8_t * b
     char key[256], val[256], str[256];
     int j;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_KVS_PUT_BINARY);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_KVS_PUT_BINARY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_KVS_PUT_BINARY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_KVS_PUT_BINARY);
 
     mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);
     MPIR_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPIDI_PG_GetConnKVSname");
@@ -71,7 +71,7 @@ int MPID_nem_llc_kvs_put_binary(int from, const char *postfix, const uint8_t * b
     pmi_errno = PMI_KVS_Put(kvs_name, key, val);
     MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**PMI_KVS_Put");
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_KVS_PUT_BINARY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_KVS_PUT_BINARY);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -89,8 +89,8 @@ int MPID_nem_llc_kvs_get_binary(int from, const char *postfix, char *buf, int le
     char key[256], val[256], str[256];
     int j;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_KVS_GET_BINARY);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_KVS_GET_BINARY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_KVS_GET_BINARY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_KVS_GET_BINARY);
 
     mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);
     dprintf("kvs_get_binary,kvs_name=%s\n", kvs_name);
@@ -112,7 +112,7 @@ int MPID_nem_llc_kvs_get_binary(int from, const char *postfix, char *buf, int le
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_KVS_GET_BINARY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_KVS_GET_BINARY);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -126,9 +126,9 @@ int MPID_nem_llc_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
 {
     int mpi_errno = MPI_SUCCESS, pmi_errno, llc_errno;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_INIT);
 
     llc_errno = LLC_init(TYPE_MPI);
     MPIR_ERR_CHKANDJUMP(llc_errno, mpi_errno, MPI_ERR_OTHER, "**LLC_init");
@@ -155,7 +155,7 @@ int MPID_nem_llc_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
     MPIDI_Anysource_improbe_fn = MPID_nem_llc_anysource_improbe;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_INIT);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -168,12 +168,12 @@ int MPID_nem_llc_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
 int MPID_nem_llc_get_business_card(int my_rank, char **bc_val_p, int *val_max_sz_p)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_GET_BUSINESS_CARD);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_GET_BUSINESS_CARD);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_GET_BUSINESS_CARD);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_GET_BUSINESS_CARD);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_GET_BUSINESS_CARD);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_GET_BUSINESS_CARD);
     return mpi_errno;
     //fn_fail:
     goto fn_exit;
@@ -186,12 +186,12 @@ int MPID_nem_llc_get_business_card(int my_rank, char **bc_val_p, int *val_max_sz
 int MPID_nem_llc_connect_to_root(const char *business_card, MPIDI_VC_t * new_vc)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_CONNECT_TO_ROOT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_CONNECT_TO_ROOT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_CONNECT_TO_ROOT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_CONNECT_TO_ROOT);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_CONNECT_TO_ROOT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_CONNECT_TO_ROOT);
     return mpi_errno;
     //fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_poll.c b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_poll.c
index 0cc55e9..6cd6b32 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_poll.c
@@ -28,9 +28,9 @@ static void MPID_nem_llc_recv_handler(void *vp_vc, uint64_t raddr, void *buf, si
 int MPID_nem_llc_poll(int in_blocking_progress)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_POLL);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_POLL);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_POLL);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_POLL);
 
     {
         int rc;
@@ -42,7 +42,7 @@ int MPID_nem_llc_poll(int in_blocking_progress)
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_POLL);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_POLL);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -57,19 +57,19 @@ static void MPID_nem_llc_send_handler(void *cba, uint64_t * p_reqid)
     /* int mpi_errno = 0; */
     MPIR_Request *sreq = cba;   /* from llc_writev(,,,,cbarg,) */
     MPIR_Request_kind_t kind;
-    /* MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_SEND_HANDLER); */
+    /* MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_SEND_HANDLER); */
 
-    /* MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_SEND_HANDLER); */
+    /* MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_SEND_HANDLER); */
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "llc_send_handler");
 
-    MPIU_Assert(sreq != NULL);
+    MPIR_Assert(sreq != NULL);
 
     if (sreq == (void *) 0xdeadbeefUL) {
         MPIDI_VC_t *vc = (void *) p_reqid[0];
         MPID_nem_llc_vc_area *vc_llc;
 
-        MPIU_Assert(vc != NULL);
+        MPIR_Assert(vc != NULL);
         /* printf("from credit %p (pg_rank %d)\n", vc, vc->pg_rank); */
 
         vc_llc = VC_LLC(vc);
@@ -132,7 +132,7 @@ static void MPID_nem_llc_send_handler(void *cba, uint64_t * p_reqid)
                 vc = sreq->ch.vc;       /* before callback */
                 reqFn = sreq->dev.OnDataAvail;
                 if (reqFn == 0) {
-                    MPIU_Assert(reqtype != MPIDI_REQUEST_TYPE_GET_RESP);
+                    MPIR_Assert(reqtype != MPIDI_REQUEST_TYPE_GET_RESP);
 
                     r_mpi_errno = MPID_Request_complete(sreq);
                     if (r_mpi_errno != MPI_SUCCESS) {
@@ -146,7 +146,7 @@ static void MPID_nem_llc_send_handler(void *cba, uint64_t * p_reqid)
                     if (r_mpi_errno)
                         MPIR_ERR_POP(r_mpi_errno);
                     if (complete == 0) {
-                        MPIU_Assert(complete == TRUE);
+                        MPIR_Assert(complete == TRUE);
                     }
                     MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, ".... complete2");
                 }
@@ -167,7 +167,7 @@ static void MPID_nem_llc_send_handler(void *cba, uint64_t * p_reqid)
     }
 
   fn_exit:
-    /* MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_SEND_HANDLER); */
+    /* MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_SEND_HANDLER); */
     return /* mpi_errno */ ;
   fn_fail:
     goto fn_exit;
@@ -181,9 +181,9 @@ static void MPID_nem_llc_recv_handler(void *vp_vc, uint64_t raddr, void *buf, si
 {
     int mpi_errno = 0;
     MPIDI_VC_t *vc = vp_vc;
-    /* MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_RECV_HANDLER); */
+    /* MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_RECV_HANDLER); */
 
-    /* MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_RECV_HANDLER); */
+    /* MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_RECV_HANDLER); */
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "llc_recv_handler");
 
@@ -221,7 +221,7 @@ static void MPID_nem_llc_recv_handler(void *vp_vc, uint64_t raddr, void *buf, si
     }
 
   fn_exit:
-    /* MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_RECV_HANDLER); */
+    /* MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_RECV_HANDLER); */
     return;
     //fn_fail:
     goto fn_exit;
@@ -240,8 +240,8 @@ int MPID_nem_llc_recv_posted(struct MPIDI_VC *vc, struct MPIR_Request *req)
     MPI_Aint dt_true_lb;
     int i;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_RECV_POSTED);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_RECV_POSTED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_RECV_POSTED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_RECV_POSTED);
 
     /* req->dev.datatype is set in MPID_irecv --> MPIDI_CH3U_Recvq_FDU_or_AEP */
     MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr,
@@ -300,11 +300,11 @@ int MPID_nem_llc_recv_posted(struct MPIDI_VC *vc, struct MPIR_Request *req)
         *(int32_t *) ((uint8_t *) & cmd[0].tag) = req->dev.match.parts.tag;
     }
 
-    *(MPIU_Context_id_t *) ((uint8_t *) & cmd[0].tag + sizeof(int32_t)) =
+    *(MPIR_Context_id_t *) ((uint8_t *) & cmd[0].tag + sizeof(int32_t)) =
         req->dev.match.parts.context_id;
-    MPIU_Assert(sizeof(LLC_tag_t) >= sizeof(int32_t) + sizeof(MPIU_Context_id_t));
-    memset((uint8_t *) & cmd[0].tag + sizeof(int32_t) + sizeof(MPIU_Context_id_t),
-           0, sizeof(LLC_tag_t) - sizeof(int32_t) - sizeof(MPIU_Context_id_t));
+    MPIR_Assert(sizeof(LLC_tag_t) >= sizeof(int32_t) + sizeof(MPIR_Context_id_t));
+    memset((uint8_t *) & cmd[0].tag + sizeof(int32_t) + sizeof(MPIR_Context_id_t),
+           0, sizeof(LLC_tag_t) - sizeof(int32_t) - sizeof(MPIR_Context_id_t));
 
 
     dprintf("llc_recv_posted,tag=");
@@ -336,7 +336,7 @@ int MPID_nem_llc_recv_posted(struct MPIDI_VC *vc, struct MPIR_Request *req)
     MPIR_ERR_CHKANDJUMP(llc_errno != LLC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**LLC_post");
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_RECV_POSTED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_RECV_POSTED);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -349,13 +349,13 @@ int MPID_nem_llc_recv_posted(struct MPIDI_VC *vc, struct MPIR_Request *req)
 void MPID_nem_llc_anysource_posted(MPIR_Request * req)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_AYSOURCE_POSTED);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_AYSOURCE_POSTED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_AYSOURCE_POSTED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_AYSOURCE_POSTED);
 
     mpi_errno = MPID_nem_llc_recv_posted(NULL, req);
-    MPIU_Assert(mpi_errno == MPI_SUCCESS);
+    MPIR_Assert(mpi_errno == MPI_SUCCESS);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_AYSOURCE_POSTED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_AYSOURCE_POSTED);
 }
 
 #undef FUNCNAME
@@ -366,15 +366,15 @@ int MPID_nem_llc_anysource_matched(MPIR_Request * req)
 {
     int matched = FALSE;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_ANYSOURCE_MATCHED);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_ANYSOURCE_MATCHED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_ANYSOURCE_MATCHED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_ANYSOURCE_MATCHED);
 
     /* FIXME : How to call a cancel_recv function */
     /* If LLC_postedq is still having this request, delete it.
      * Ohterwise, return TURE */
     matched = LLC_req_approve_recv((LLC_cmd_t *) REQ_FIELD(req, cmds));
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_ANYSOURCE_MATCHED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_ANYSOURCE_MATCHED);
 
     return matched;
 }
diff --git a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_probe.c b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_probe.c
index e2276c4..96d2b09 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_probe.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_probe.c
@@ -26,13 +26,13 @@ int MPID_nem_llc_probe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm, i
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_PROBE);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_PROBE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_PROBE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_PROBE);
     dprintf("llc_probe,source=%d,tag=%d\n", source, tag);
 
     /* NOTE : This function is not used. Because 'vc->comm_ops->probe()' is not used */
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_PROBE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_PROBE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -51,8 +51,8 @@ int MPID_nem_llc_iprobe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm,
     LLC_probe_t probe;
     LLC_match_mask_t mask;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_IPROBE);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_IPROBE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_IPROBE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_IPROBE);
     dprintf("llc_iprobe,source=%d,tag=%d\n", source, tag);
 
     mask.rank = ~0;
@@ -66,17 +66,17 @@ int MPID_nem_llc_iprobe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm,
         *(int32_t *) ((uint8_t *) & _tag) = tag;
     }
 
-    *(MPIU_Context_id_t *) ((uint8_t *) & _tag + sizeof(int32_t)) =
+    *(MPIR_Context_id_t *) ((uint8_t *) & _tag + sizeof(int32_t)) =
         comm->recvcontext_id + context_offset;
-    memset((uint8_t *) & _tag + sizeof(int32_t) + sizeof(MPIU_Context_id_t),
-           0, sizeof(LLC_tag_t) - sizeof(int32_t) - sizeof(MPIU_Context_id_t));
+    memset((uint8_t *) & _tag + sizeof(int32_t) + sizeof(MPIR_Context_id_t),
+           0, sizeof(LLC_tag_t) - sizeof(int32_t) - sizeof(MPIR_Context_id_t));
 
     if (source == MPI_ANY_SOURCE) {
         rank = LLC_ANY_SOURCE;
         mask.rank = 0;
     }
     else {
-        MPIU_Assert(vc);
+        MPIR_Assert(vc);
         rank = VC_FIELD(vc, remote_endpoint_addr);
     }
 
@@ -90,7 +90,7 @@ int MPID_nem_llc_iprobe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm,
         else {
             int found = 0;
             found = convert_rank_llc2mpi(comm, probe.rank, &status->MPI_SOURCE);
-            MPIU_Assert(found);
+            MPIR_Assert(found);
         }
         status->MPI_TAG = probe.tag & 0xffffffff;
         MPIR_STATUS_SET_COUNT(*status, probe.len);
@@ -102,7 +102,7 @@ int MPID_nem_llc_iprobe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm,
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_IPROBE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_IPROBE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -122,8 +122,8 @@ int MPID_nem_llc_improbe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm,
     LLC_match_mask_t mask;
     LLC_cmd_t *msg = NULL;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_IMPROBE);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_IMPROBE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_IMPROBE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_IMPROBE);
     dprintf("llc_improbe,source=%d,tag=%d\n", source, tag);
 
     mask.rank = ~0;
@@ -137,17 +137,17 @@ int MPID_nem_llc_improbe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm,
         *(int32_t *) ((uint8_t *) & _tag) = tag;
     }
 
-    *(MPIU_Context_id_t *) ((uint8_t *) & _tag + sizeof(int32_t)) =
+    *(MPIR_Context_id_t *) ((uint8_t *) & _tag + sizeof(int32_t)) =
         comm->recvcontext_id + context_offset;
-    memset((uint8_t *) & _tag + sizeof(int32_t) + sizeof(MPIU_Context_id_t),
-           0, sizeof(LLC_tag_t) - sizeof(int32_t) - sizeof(MPIU_Context_id_t));
+    memset((uint8_t *) & _tag + sizeof(int32_t) + sizeof(MPIR_Context_id_t),
+           0, sizeof(LLC_tag_t) - sizeof(int32_t) - sizeof(MPIR_Context_id_t));
 
     if (source == MPI_ANY_SOURCE) {
         rank = LLC_ANY_SOURCE;
         mask.rank = 0;
     }
     else {
-        MPIU_Assert(vc);
+        MPIR_Assert(vc);
         rank = VC_FIELD(vc, remote_endpoint_addr);
     }
 
@@ -158,7 +158,7 @@ int MPID_nem_llc_improbe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm,
         *flag = 1;
 
         req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-        MPIU_Object_set_ref(req, 2);
+        MPIR_Object_set_ref(req, 2);
         req->kind = MPIR_REQUEST_KIND__MPROBE;
         req->comm = comm;
         MPIR_Comm_add_ref(comm);
@@ -173,13 +173,13 @@ int MPID_nem_llc_improbe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm,
         else {
             int found = 0;
             found = convert_rank_llc2mpi(comm, probe.rank, &req->status.MPI_SOURCE);
-            MPIU_Assert(found);
+            MPIR_Assert(found);
         }
         req->status.MPI_TAG = probe.tag & 0xffffffff;
         req->dev.recv_data_sz = probe.len;
         MPIR_STATUS_SET_COUNT(req->status, req->dev.recv_data_sz);
         req->dev.tmpbuf = MPL_malloc(req->dev.recv_data_sz);
-        MPIU_Assert(req->dev.tmpbuf);
+        MPIR_Assert(req->dev.tmpbuf);
 
         /* receive message in req->dev.tmpbuf */
         LLC_cmd_t *cmd = LLC_cmd_alloc2(1, 1, 1);
@@ -231,7 +231,7 @@ int MPID_nem_llc_improbe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm,
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_IMPROBE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_IMPROBE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_send.c b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_send.c
index 02e179e..773d352 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_send.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_send.c
@@ -31,8 +31,8 @@ int MPID_nem_llc_isend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Data
     MPI_Aint dt_true_lb;
     int i;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_ISEND);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_ISEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_ISEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_ISEND);
 
     dprintf
         ("llc_isend,%d->%d,buf=%p,count=%d,datatype=%08x,dest=%d,tag=%08x,comm=%p,context_offset=%d\n",
@@ -44,8 +44,8 @@ int MPID_nem_llc_isend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Data
     dprintf("llc_isend,LLC_my_rank=%d\n", LLC_my_rank);
 
     struct MPIR_Request *sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-    MPIU_Assert(sreq != NULL);
-    MPIU_Object_set_ref(sreq, 2);
+    MPIR_Assert(sreq != NULL);
+    MPIR_Object_set_ref(sreq, 2);
     sreq->kind = MPIR_REQUEST_KIND__SEND;
 
     /* Used in llc_poll --> MPID_nem_llc_send_handler */
@@ -90,11 +90,11 @@ int MPID_nem_llc_isend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Data
     /* Prepare bit-vector to perform tag-match. We use the same bit-vector as in CH3 layer. */
     /* See src/mpid/ch3/src/mpid_isend.c */
     *(int32_t *) ((uint8_t *) & cmd[0].tag) = tag;
-    *(MPIU_Context_id_t *) ((uint8_t *) & cmd[0].tag + sizeof(int32_t)) =
+    *(MPIR_Context_id_t *) ((uint8_t *) & cmd[0].tag + sizeof(int32_t)) =
         comm->context_id + context_offset;
-    MPIU_Assert(sizeof(LLC_tag_t) >= sizeof(int32_t) + sizeof(MPIU_Context_id_t));
-    memset((uint8_t *) & cmd[0].tag + sizeof(int32_t) + sizeof(MPIU_Context_id_t),
-           0, sizeof(LLC_tag_t) - sizeof(int32_t) - sizeof(MPIU_Context_id_t));
+    MPIR_Assert(sizeof(LLC_tag_t) >= sizeof(int32_t) + sizeof(MPIR_Context_id_t));
+    memset((uint8_t *) & cmd[0].tag + sizeof(int32_t) + sizeof(MPIR_Context_id_t),
+           0, sizeof(LLC_tag_t) - sizeof(int32_t) - sizeof(MPIR_Context_id_t));
 
     dprintf("llc_isend,tag=");
     for (i = 0; i < sizeof(LLC_tag_t); i++) {
@@ -128,11 +128,11 @@ int MPID_nem_llc_isend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Data
         intptr_t segment_first = 0;
         intptr_t segment_size = data_sz;
         intptr_t last = segment_size;
-        MPIU_Assert(last > 0);
+        MPIR_Assert(last > 0);
         REQ_FIELD(sreq, pack_buf) = MPL_malloc((size_t) data_sz);
         MPIR_ERR_CHKANDJUMP(!REQ_FIELD(sreq, pack_buf), mpi_errno, MPI_ERR_OTHER, "**outofmemory");
         MPIDU_Segment_pack(segment_ptr, segment_first, &last, (char *) (REQ_FIELD(sreq, pack_buf)));
-        MPIU_Assert(last == data_sz);
+        MPIR_Assert(last == data_sz);
         write_from_buf = REQ_FIELD(sreq, pack_buf);
     }
 
@@ -152,7 +152,7 @@ int MPID_nem_llc_isend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Data
 
   fn_exit:
     *req_out = sreq;
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_ISEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_ISEND);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -169,14 +169,14 @@ int MPID_nem_llc_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, intptr_t hdr_sz, vo
     MPIR_Request *sreq = NULL;
     MPID_nem_llc_vc_area *vc_llc = 0;
     int need_to_queue = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_ISTARTCONTIGMSG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_ISTARTCONTIGMSG);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_ISTARTCONTIGMSG);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_ISTARTCONTIGMSG);
 
     dprintf("llc_iStartContigMsg,%d->%d,hdr=%p,hdr_sz=%ld,data=%p,data_sz=%ld\n",
             MPIDI_Process.my_pg_rank, vc->pg_rank, hdr, hdr_sz, data, data_sz);
 
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
     MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "llc_iStartContigMsg");
     MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr);
     MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "vc.pg_rank = %d", vc->pg_rank);
@@ -187,8 +187,8 @@ int MPID_nem_llc_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, intptr_t hdr_sz, vo
 
     /* create a request */
     sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-    MPIU_Assert(sreq != NULL);
-    MPIU_Object_set_ref(sreq, 2);
+    MPIR_Assert(sreq != NULL);
+    MPIR_Object_set_ref(sreq, 2);
     sreq->kind = MPIR_REQUEST_KIND__SEND;
 
     sreq->ch.vc = vc;
@@ -244,7 +244,7 @@ int MPID_nem_llc_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, intptr_t hdr_sz, vo
 
   fn_exit:
     *sreq_ptr = sreq;
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_ISTARTCONTIGMSG);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_ISTARTCONTIGMSG);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -260,9 +260,9 @@ int MPID_nem_llc_iSendContig(MPIDI_VC_t * vc, MPIR_Request * sreq, void *hdr, in
     int mpi_errno = MPI_SUCCESS;
     MPID_nem_llc_vc_area *vc_llc = 0;
     int need_to_queue = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_ISENDCONTIGMSG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_ISENDCONTIGMSG);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_ISENDCONTIGMSG);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_ISENDCONTIGMSG);
 
     if (sreq->kind == MPIR_REQUEST_KIND__UNDEFINED) {
         sreq->kind = MPIR_REQUEST_KIND__SEND;
@@ -270,7 +270,7 @@ int MPID_nem_llc_iSendContig(MPIDI_VC_t * vc, MPIR_Request * sreq, void *hdr, in
     dprintf("llc_iSendConitig,sreq=%p,hdr=%p,hdr_sz=%ld,data=%p,data_sz=%ld\n",
             sreq, hdr, hdr_sz, data, data_sz);
 
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
     MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "llc_iSendContig");
     MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr);
     MPL_DBG_PKT(vc, hdr, "isendcontig");
@@ -280,7 +280,7 @@ int MPID_nem_llc_iSendContig(MPIDI_VC_t * vc, MPIR_Request * sreq, void *hdr, in
         MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "pkt->type  = %d", pkt->type);
     }
 
-    MPIU_Assert(sreq != NULL);
+    MPIR_Assert(sreq != NULL);
     MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "OnDataAvail= %p", sreq->dev.OnDataAvail);
     sreq->ch.vc = vc;
     sreq->dev.iov_offset = 0;
@@ -328,7 +328,7 @@ int MPID_nem_llc_iSendContig(MPIDI_VC_t * vc, MPIR_Request * sreq, void *hdr, in
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_ISENDCONTIGMSG);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_ISENDCONTIGMSG);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -342,17 +342,17 @@ int MPID_nem_llc_SendNoncontig(MPIDI_VC_t * vc, MPIR_Request * sreq, void *hdr,
                                intptr_t hdr_sz)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_SENDNONCONTIG);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_SENDNONCONTIG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_SENDNONCONTIG);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_SENDNONCONTIG);
     MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "llc_SendNoncontig");
 
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
 
     intptr_t data_sz;
     MPID_nem_llc_vc_area *vc_llc = 0;
     int need_to_queue = 0;
 
-    MPIU_Assert(sreq->dev.segment_first == 0);
+    MPIR_Assert(sreq->dev.segment_first == 0);
     REQ_FIELD(sreq, rma_buf) = NULL;
 
     sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) hdr;
@@ -404,7 +404,7 @@ int MPID_nem_llc_SendNoncontig(MPIDI_VC_t * vc, MPIR_Request * sreq, void *hdr,
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_SENDNONCONTIG);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_SENDNONCONTIG);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -418,13 +418,13 @@ int MPID_nem_llc_send_queued(MPIDI_VC_t * vc, rque_t * send_queue)
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_nem_llc_vc_area *vc_llc;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_SEND_QUEUED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_SEND_QUEUED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_SEND_QUEUED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_SEND_QUEUED);
 
-    MPIU_Assert(vc != NULL);
+    MPIR_Assert(vc != NULL);
     vc_llc = VC_LLC(vc);
-    MPIU_Assert(vc_llc != NULL);
+    MPIR_Assert(vc_llc != NULL);
 
     while (!MPIDI_CH3I_Sendq_empty(*send_queue)) {
         ssize_t ret = 0;
@@ -462,7 +462,7 @@ int MPID_nem_llc_send_queued(MPIDI_VC_t * vc, rque_t * send_queue)
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_SEND_QUEUED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_SEND_QUEUED);
     return mpi_errno;
     //fn_fail:
     goto fn_exit;
@@ -477,11 +477,11 @@ int MPIDI_nem_llc_Rqst_iov_update(MPIR_Request * mreq, intptr_t consume)
     int ret = TRUE;
     /* intptr_t oconsume = consume; */
     int iv, nv;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_NEM_LLC_RQST_IOV_UPDATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_NEM_LLC_RQST_IOV_UPDATE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_NEM_LLC_RQST_IOV_UPDATE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_NEM_LLC_RQST_IOV_UPDATE);
 
-    MPIU_Assert(consume >= 0);
+    MPIR_Assert(consume >= 0);
 
     MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "iov_update() : consume    %d", (int) consume);
     MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "iov_update() : iov_count  %d", mreq->dev.iov_count);
@@ -512,7 +512,7 @@ int MPIDI_nem_llc_Rqst_iov_update(MPIR_Request * mreq, intptr_t consume)
     MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "iov_update() : iov_offset %ld", mreq->dev.iov_offset);
     MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "iov_update() = %d", ret);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_NEM_LLC_RQST_IOV_UPDATE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_NEM_LLC_RQST_IOV_UPDATE);
     return ret;
 }
 
@@ -724,14 +724,14 @@ int llc_poll(int in_blocking_poll, llc_send_f sfnc, llc_recv_f rfnc)
         if (nevents == 0) {
             break;
         }
-        MPIU_Assert(nevents == 1);
+        MPIR_Assert(nevents == 1);
 
         switch (events[0].type) {
         case LLC_EVENT_SEND_LEFT:{
                 dprintf("llc_poll,EVENT_SEND_LEFT\n");
                 lcmd = (LLC_cmd_t *) events[0].side.initiator.req_id;
-                MPIU_Assert(lcmd != 0);
-                MPIU_Assert(lcmd->opcode == LLC_OPCODE_SEND || lcmd->opcode == LLC_OPCODE_SSEND);
+                MPIR_Assert(lcmd != 0);
+                MPIR_Assert(lcmd->opcode == LLC_OPCODE_SEND || lcmd->opcode == LLC_OPCODE_SSEND);
 
                 if (events[0].side.initiator.error_code != LLC_ERROR_SUCCESS) {
                     printf("llc_poll,error_code=%d\n", events[0].side.initiator.error_code);
@@ -751,8 +751,8 @@ int llc_poll(int in_blocking_poll, llc_send_f sfnc, llc_recv_f rfnc)
         case LLC_EVENT_UNSOLICITED_LEFT:{
                 dprintf("llc_poll,EVENT_UNSOLICITED_LEFT\n");
                 lcmd = (LLC_cmd_t *) events[0].side.initiator.req_id;
-                MPIU_Assert(lcmd != 0);
-                MPIU_Assert(lcmd->opcode == LLC_OPCODE_UNSOLICITED);
+                MPIR_Assert(lcmd != 0);
+                MPIR_Assert(lcmd->opcode == LLC_OPCODE_UNSOLICITED);
 
                 struct llc_cmd_area *usr;
                 usr = (void *) lcmd->usr_area;
@@ -784,7 +784,7 @@ int llc_poll(int in_blocking_poll, llc_send_f sfnc, llc_recv_f rfnc)
                 bsiz = events[0].side.responder.length;
 #ifndef	notdef_hsiz_hack
 #if defined(__sparc__)
-                MPIU_Assert(((uintptr_t) buff % 8) == 0);
+                MPIR_Assert(((uintptr_t) buff % 8) == 0);
 #endif
 #endif /* notdef_hsiz_hack */
                 {
@@ -863,7 +863,7 @@ int llc_poll(int in_blocking_poll, llc_send_f sfnc, llc_recv_f rfnc)
                         found =
                             convert_rank_llc2mpi(req->comm, events[0].side.initiator.rank,
                                                  &req->status.MPI_SOURCE);
-                        MPIU_Assert(found);
+                        MPIR_Assert(found);
                     }
 
                     if (unlikely(events[0].side.initiator.error_code == LLC_ERROR_TRUNCATE)) {
@@ -877,7 +877,7 @@ int llc_poll(int in_blocking_poll, llc_send_f sfnc, llc_recv_f rfnc)
                     /* Dequeue request from posted queue.
                      * It's posted in MPID_Irecv --> MPIDI_CH3U_Recvq_FDU_or_AEP */
                     int found = MPIDI_CH3U_Recvq_DP(req);
-                    MPIU_Assert(found);
+                    MPIR_Assert(found);
                 }
 
                 /* Mark completion on rreq */
@@ -891,7 +891,7 @@ int llc_poll(int in_blocking_poll, llc_send_f sfnc, llc_recv_f rfnc)
                 MPIR_Request *req;
 
                 lcmd = (LLC_cmd_t *) events[0].side.initiator.req_id;
-                MPIU_Assert(lcmd != 0);
+                MPIR_Assert(lcmd != 0);
 
                 req = ((struct llc_cmd_area *) lcmd->usr_area)->cbarg;
 
@@ -964,8 +964,8 @@ int MPID_nem_llc_issend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Dat
     MPI_Aint dt_true_lb;
     int i;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_ISSEND);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_ISSEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_ISSEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_ISSEND);
 
     dprintf
         ("llc_isend,%d->%d,buf=%p,count=%d,datatype=%08x,dest=%d,tag=%08x,comm=%p,context_offset=%d\n",
@@ -977,8 +977,8 @@ int MPID_nem_llc_issend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Dat
     dprintf("llc_isend,LLC_my_rank=%d\n", LLC_my_rank);
 
     struct MPIR_Request *sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-    MPIU_Assert(sreq != NULL);
-    MPIU_Object_set_ref(sreq, 2);
+    MPIR_Assert(sreq != NULL);
+    MPIR_Object_set_ref(sreq, 2);
     sreq->kind = MPIR_REQUEST_KIND__SEND;
 
     /* Used in llc_poll --> MPID_nem_llc_send_handler */
@@ -1015,11 +1015,11 @@ int MPID_nem_llc_issend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Dat
     /* Prepare bit-vector to perform tag-match. We use the same bit-vector as in CH3 layer. */
     /* See src/mpid/ch3/src/mpid_isend.c */
     *(int32_t *) ((uint8_t *) & cmd[0].tag) = tag;
-    *(MPIU_Context_id_t *) ((uint8_t *) & cmd[0].tag + sizeof(int32_t)) =
+    *(MPIR_Context_id_t *) ((uint8_t *) & cmd[0].tag + sizeof(int32_t)) =
         comm->context_id + context_offset;
-    MPIU_Assert(sizeof(LLC_tag_t) >= sizeof(int32_t) + sizeof(MPIU_Context_id_t));
-    memset((uint8_t *) & cmd[0].tag + sizeof(int32_t) + sizeof(MPIU_Context_id_t),
-           0, sizeof(LLC_tag_t) - sizeof(int32_t) - sizeof(MPIU_Context_id_t));
+    MPIR_Assert(sizeof(LLC_tag_t) >= sizeof(int32_t) + sizeof(MPIR_Context_id_t));
+    memset((uint8_t *) & cmd[0].tag + sizeof(int32_t) + sizeof(MPIR_Context_id_t),
+           0, sizeof(LLC_tag_t) - sizeof(int32_t) - sizeof(MPIR_Context_id_t));
 
     dprintf("llc_isend,tag=");
     for (i = 0; i < sizeof(LLC_tag_t); i++) {
@@ -1049,11 +1049,11 @@ int MPID_nem_llc_issend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Dat
         intptr_t segment_first = 0;
         intptr_t segment_size = data_sz;
         intptr_t last = segment_size;
-        MPIU_Assert(last > 0);
+        MPIR_Assert(last > 0);
         REQ_FIELD(sreq, pack_buf) = MPL_malloc((size_t) data_sz);
         MPIR_ERR_CHKANDJUMP(!REQ_FIELD(sreq, pack_buf), mpi_errno, MPI_ERR_OTHER, "**outofmemory");
         MPIDU_Segment_pack(segment_ptr, segment_first, &last, (char *) (REQ_FIELD(sreq, pack_buf)));
-        MPIU_Assert(last == data_sz);
+        MPIR_Assert(last == data_sz);
         write_from_buf = REQ_FIELD(sreq, pack_buf);
     }
 
@@ -1073,7 +1073,7 @@ int MPID_nem_llc_issend(struct MPIDI_VC *vc, const void *buf, int count, MPI_Dat
 
   fn_exit:
     *request = sreq;
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_ISSEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_ISSEND);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_vc.c b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_vc.c
index 0c68da7..0b1fcb7 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_vc.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_vc.c
@@ -51,16 +51,16 @@ static MPIDI_Comm_ops_t comm_ops = {
 int MPID_nem_llc_vc_init(MPIDI_VC_t * vc)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_VC_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_VC_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_VC_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_VC_INIT);
 
     mpi_errno = llc_vc_init(vc);
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_VC_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_VC_INIT);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -73,9 +73,9 @@ int MPID_nem_llc_vc_init(MPIDI_VC_t * vc)
 int MPID_nem_llc_vc_destroy(MPIDI_VC_t * vc)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_VC_DESTROY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_VC_DESTROY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_VC_DESTROY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_VC_DESTROY);
     /* free any resources associated with this VC here */
 
     {
@@ -90,7 +90,7 @@ int MPID_nem_llc_vc_destroy(MPIDI_VC_t * vc)
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_VC_DESTROY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_VC_DESTROY);
     return mpi_errno;
     //fn_fail:
     goto fn_exit;
@@ -103,16 +103,16 @@ int MPID_nem_llc_vc_destroy(MPIDI_VC_t * vc)
 int MPID_nem_llc_vc_terminate(MPIDI_VC_t * vc)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LLC_VC_TERMINATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LLC_VC_TERMINATE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LLC_VC_TERMINATE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LLC_VC_TERMINATE);
 
     dprintf("llc_vc_terminate,enter,%d->%d\n", MPIDI_Process.my_pg_rank, vc->pg_rank);
 
     mpi_errno = MPIDI_CH3U_Handle_connection(vc, MPIDI_VC_EVENT_TERMINATED);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LLC_VC_TERMINATE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LLC_VC_TERMINATE);
     return mpi_errno;
     //fn_fail:
     goto fn_exit;
@@ -193,10 +193,9 @@ static int MPID_nem_llc_vc_prnt(MPIDI_VC_t * vc)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    /* MPIU_OBJECT_HEADER; */
+    /* MPIR_OBJECT_HEADER; */
     /* src/include/mpir_objects.h */
     /* int handle; */
-    /* MPIU_Handle_ref_count ref_count; */
     /* MPIDI_VC_State_t state; */
     /* struct MPIDI_PG *pg; */
     /* int pg_rank; */
diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h
index f577c68..f941a8c 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h
@@ -279,7 +279,7 @@ static inline void _mxm_progress_cb(void *user_data)
     int mpi_errno = MPI_SUCCESS;
 
     mpi_errno = MPIDI_CH3_Progress_poke();
-    MPIU_Assert(mpi_errno == MPI_SUCCESS);
+    MPIR_Assert(mpi_errno == MPI_SUCCESS);
 }
 
 static inline void _mxm_req_wait(mxm_req_base_t * req)
@@ -302,7 +302,7 @@ static inline int _mxm_eager_threshold(void)
 /*
  * Tag management section
  */
-static inline mxm_tag_t _mxm_tag_mpi2mxm(int mpi_tag, MPIU_Context_id_t context_id)
+static inline mxm_tag_t _mxm_tag_mpi2mxm(int mpi_tag, MPIR_Context_id_t context_id)
 {
     mxm_tag_t mxm_tag;
 
diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_init.c b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_init.c
index 9034745..ba382ed 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_init.c
@@ -128,12 +128,12 @@ int MPID_nem_mxm_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
     int r;
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MXM_INIT);
-    MPIDI_FUNC_ENTER(MPID_STATE_MXM_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MXM_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MXM_INIT);
 
     /* first make sure that our private fields in the vc and req fit into the area provided  */
-    MPIU_Assert(sizeof(MPID_nem_mxm_vc_area) <= MPIDI_NEM_VC_NETMOD_AREA_LEN);
-    MPIU_Assert(sizeof(MPID_nem_mxm_req_area) <= MPIDI_NEM_REQ_NETMOD_AREA_LEN);
+    MPIR_Assert(sizeof(MPID_nem_mxm_vc_area) <= MPIDI_NEM_VC_NETMOD_AREA_LEN);
+    MPIR_Assert(sizeof(MPID_nem_mxm_req_area) <= MPIDI_NEM_REQ_NETMOD_AREA_LEN);
 
 
     /* mpich-specific initialization of mxm */
@@ -187,7 +187,7 @@ int MPID_nem_mxm_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
     MPIDI_Anysource_improbe_fn = MPID_nem_mxm_anysource_improbe;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MXM_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MXM_INIT);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -201,8 +201,8 @@ int MPID_nem_mxm_finalize(void)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MXM_FINALIZE);
-    MPIDI_FUNC_ENTER(MPID_STATE_MXM_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MXM_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MXM_FINALIZE);
 
     _mxm_barrier();
 
@@ -211,7 +211,7 @@ int MPID_nem_mxm_finalize(void)
         MPIR_ERR_POP(mpi_errno);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MXM_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MXM_FINALIZE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -226,8 +226,8 @@ int MPID_nem_mxm_get_business_card(int my_rank, char **bc_val_p, int *val_max_sz
     int mpi_errno = MPI_SUCCESS;
     int str_errno = MPL_STR_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MXM_GET_BUSINESS_CARD);
-    MPIDI_FUNC_ENTER(MPID_STATE_MXM_GET_BUSINESS_CARD);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MXM_GET_BUSINESS_CARD);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MXM_GET_BUSINESS_CARD);
 
     str_errno = MPL_str_add_binary_arg(bc_val_p, val_max_sz_p, MXM_MPICH_ENDPOINT_KEY,
                                         _mxm_obj.mxm_ep_addr, _mxm_obj.mxm_ep_addr_size);
@@ -237,7 +237,7 @@ int MPID_nem_mxm_get_business_card(int my_rank, char **bc_val_p, int *val_max_sz
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MXM_GET_BUSINESS_CARD);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MXM_GET_BUSINESS_CARD);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -251,13 +251,13 @@ int MPID_nem_mxm_connect_to_root(const char *business_card, MPIDI_VC_t * new_vc)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MXM_CONNECT_TO_ROOT);
-    MPIDI_FUNC_ENTER(MPID_STATE_MXM_CONNECT_TO_ROOT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MXM_CONNECT_TO_ROOT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MXM_CONNECT_TO_ROOT);
 
     MPIR_ERR_SETFATAL(mpi_errno, MPI_ERR_OTHER, "**notimpl");
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MXM_CONNECT_TO_ROOT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MXM_CONNECT_TO_ROOT);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -273,11 +273,11 @@ int MPID_nem_mxm_vc_init(MPIDI_VC_t * vc)
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
     MPID_nem_mxm_vc_area *vc_area = VC_BASE(vc);
 
-    MPIDI_STATE_DECL(MPID_STATE_MXM_VC_INIT);
-    MPIDI_FUNC_ENTER(MPID_STATE_MXM_VC_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MXM_VC_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MXM_VC_INIT);
 
     /* local connection is used for any source communication */
-    MPIU_Assert(MPID_nem_mem_region.rank != vc->lpid);
+    MPIR_Assert(MPID_nem_mem_region.rank != vc->lpid);
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE,
                      (MPL_DBG_FDEST,
                       "[%i]=== connecting  to  %i  \n", MPID_nem_mem_region.rank, vc->lpid));
@@ -323,7 +323,7 @@ int MPID_nem_mxm_vc_init(MPIDI_VC_t * vc)
     vc_ch->iSendContig = MPID_nem_mxm_iSendContig;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MXM_VC_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MXM_VC_INIT);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -337,8 +337,8 @@ int MPID_nem_mxm_vc_destroy(MPIDI_VC_t * vc)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MXM_VC_DESTROY);
-    MPIDI_FUNC_ENTER(MPID_STATE_MXM_VC_DESTROY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MXM_VC_DESTROY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MXM_VC_DESTROY);
 
     /* Do nothing because
      * finalize is called before vc destroy as result it is not possible
@@ -354,7 +354,7 @@ int MPID_nem_mxm_vc_destroy(MPIDI_VC_t * vc)
 #endif
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MXM_VC_DESTROY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MXM_VC_DESTROY);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -369,8 +369,8 @@ int MPID_nem_mxm_vc_terminate(MPIDI_VC_t * vc)
     int mpi_errno = MPI_SUCCESS;
     MPID_nem_mxm_vc_area *vc_area = VC_BASE(vc);
 
-    MPIDI_STATE_DECL(MPID_STATE_MXM_VC_TERMINATE);
-    MPIDI_FUNC_ENTER(MPID_STATE_MXM_VC_TERMINATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MXM_VC_TERMINATE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MXM_VC_TERMINATE);
 
     if (vc->state != MPIDI_VC_STATE_CLOSED) {
         /* VC is terminated as a result of a fault.  Complete
@@ -388,7 +388,7 @@ int MPID_nem_mxm_vc_terminate(MPIDI_VC_t * vc)
         MPIR_ERR_POP(mpi_errno);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MXM_VC_TERMINATE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MXM_VC_TERMINATE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -498,7 +498,7 @@ static int _mxm_init(int rank, int size)
 
     list_init(&_mxm_obj.free_queue);
     list_grow_mxm_req(&_mxm_obj.free_queue);
-    MPIU_Assert(list_length(&_mxm_obj.free_queue) == MXM_MPICH_MAX_REQ);
+    MPIR_Assert(list_length(&_mxm_obj.free_queue) == MXM_MPICH_MAX_REQ);
 
     _mxm_obj.sreq_queue.head = _mxm_obj.sreq_queue.tail = NULL;
 
@@ -598,7 +598,7 @@ static int _mxm_connect(MPID_nem_mxm_ep_t * ep, const char *business_card,
 
     list_init(&ep->free_queue);
     list_grow_mxm_req(&ep->free_queue);
-    MPIU_Assert(list_length(&ep->free_queue) == MXM_MPICH_MAX_REQ);
+    MPIR_Assert(list_length(&ep->free_queue) == MXM_MPICH_MAX_REQ);
 
   fn_exit:
     return mpi_errno;
@@ -611,7 +611,7 @@ static int _mxm_disconnect(MPID_nem_mxm_ep_t * ep)
     int mpi_errno = MPI_SUCCESS;
     mxm_error_t ret = MXM_OK;
 
-    MPIU_Assert(ep);
+    MPIR_Assert(ep);
 
     if (ep->mxm_conn) {
         ret = mxm_ep_disconnect(ep->mxm_conn);
@@ -636,9 +636,9 @@ static int _mxm_add_comm(MPIR_Comm * comm, void *param)
     int mpi_errno = MPI_SUCCESS;
     mxm_error_t ret = MXM_OK;
     mxm_mq_h *mq_h_v;
-    MPIU_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_DECL(1);
 
-    MPIU_CHKPMEM_MALLOC(mq_h_v, mxm_mq_h *, sizeof(mxm_mq_h) * 2, mpi_errno,
+    MPIR_CHKPMEM_MALLOC(mq_h_v, mxm_mq_h *, sizeof(mxm_mq_h) * 2, mpi_errno,
                         "mxm_mq_h_context_ptr");
 
     _dbg_mxm_output(6, "Add COMM comm %p (rank %d type %d context %d | %d size %d | %d) \n",
@@ -662,10 +662,10 @@ static int _mxm_add_comm(MPIR_Comm * comm, void *param)
     comm->dev.ch.netmod_priv = (void *) mq_h_v;
 
   fn_exit:
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
     return mpi_errno;
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_poll.c b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_poll.c
index 3c922b1..60bddea 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_poll.c
@@ -30,8 +30,8 @@ int MPID_nem_mxm_poll(int in_blocking_progress)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *req = NULL;
 
-    MPIDI_STATE_DECL(MPID_STATE_MXM_POLL);
-    MPIDI_FUNC_ENTER(MPID_STATE_MXM_POLL);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MXM_POLL);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MXM_POLL);
 
     while (!MPID_nem_mxm_queue_empty(mxm_obj->sreq_queue)) {
         MPID_nem_mxm_queue_dequeue(&mxm_obj->sreq_queue, &req);
@@ -43,7 +43,7 @@ int MPID_nem_mxm_poll(int in_blocking_progress)
         MPIR_ERR_POP(mpi_errno);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MXM_POLL);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MXM_POLL);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -96,17 +96,17 @@ void MPID_nem_mxm_anysource_posted(MPIR_Request * req)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ANYSOURCE_POSTED);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ANYSOURCE_POSTED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ANYSOURCE_POSTED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MXM_ANYSOURCE_POSTED);
 
     _dbg_mxm_output(5, "Any Source ========> Posting req %p \n", req);
 
     mpi_errno = MPID_nem_mxm_recv(NULL, req);
-    MPIU_Assert(mpi_errno == MPI_SUCCESS);
+    MPIR_Assert(mpi_errno == MPI_SUCCESS);
 
     _dbg_mxm_out_req(req);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ANYSOURCE_POSTED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MXM_ANYSOURCE_POSTED);
 }
 
 
@@ -129,8 +129,8 @@ int MPID_nem_mxm_anysource_matched(MPIR_Request * req)
      * with a network capable of matching, and the same request is matched
      * by the network and, e.g., shared-memory.
      */
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ANYSOURCE_MATCHED);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ANYSOURCE_MATCHED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ANYSOURCE_MATCHED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MXM_ANYSOURCE_MATCHED);
 
     _dbg_mxm_output(5, "Any Source ========> Matching req %p \n", req);
 
@@ -143,7 +143,7 @@ int MPID_nem_mxm_anysource_matched(MPIR_Request * req)
 
     _dbg_mxm_out_req(req);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ANYSOURCE_MATCHED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MXM_ANYSOURCE_MATCHED);
     return matched;
 }
 
@@ -160,18 +160,18 @@ int MPID_nem_mxm_recv(MPIDI_VC_t * vc, MPIR_Request * rreq)
     MPI_Aint dt_true_lb;
     MPIDU_Datatype*dt_ptr;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_RECV);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_RECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MXM_RECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MXM_RECV);
 
-    MPIU_Assert(rreq);
-    MPIU_Assert(((rreq->dev.match.parts.rank == MPI_ANY_SOURCE) && (vc == NULL)) ||
+    MPIR_Assert(rreq);
+    MPIR_Assert(((rreq->dev.match.parts.rank == MPI_ANY_SOURCE) && (vc == NULL)) ||
                 (vc && !vc->ch.is_local));
 
     MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, data_sz,
                             dt_ptr, dt_true_lb);
 
     {
-        MPIU_Context_id_t context_id = rreq->dev.match.parts.context_id;
+        MPIR_Context_id_t context_id = rreq->dev.match.parts.context_id;
         int tag = rreq->dev.match.parts.tag;
         MPID_nem_mxm_vc_area *vc_area = NULL;
         MPID_nem_mxm_req_area *req_area = NULL;
@@ -221,7 +221,7 @@ int MPID_nem_mxm_recv(MPIDI_VC_t * vc, MPIR_Request * rreq)
         _dbg_mxm_out_req(rreq);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_RECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MXM_RECV);
     return mpi_errno;
   fn_fail:ATTRIBUTE((unused))
         goto fn_exit;
@@ -246,7 +246,7 @@ static int _mxm_handle_rreq(MPIR_Request * req)
     /* an MPI_ANY_SOURCE request may have been previously removed from the
      * CH3 queue by an FDP (find and dequeue posted) operation */
     if (req->dev.match.parts.rank != MPI_ANY_SOURCE) {
-        MPIU_Assert(found);
+        MPIR_Assert(found);
     }
 
     MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, userbuf_sz, dt_ptr,
@@ -304,7 +304,7 @@ static int _mxm_handle_rreq(MPIR_Request * req)
             iov_buf = req_area->iov_buf;
             if (last && n_iov > 0) {
                 iov = MPL_malloc(n_iov * sizeof(*iov));
-                MPIU_Assert(iov);
+                MPIR_Assert(iov);
 
                 for (index = 0; index < n_iov; index++) {
                     iov[index].MPL_IOV_BUF = iov_buf[index].ptr;
@@ -334,7 +334,7 @@ static int _mxm_handle_rreq(MPIR_Request * req)
     }
 
     MPIDI_CH3U_Handle_recv_req(req->ch.vc, req, &complete);
-    MPIU_Assert(complete == TRUE);
+    MPIR_Assert(complete == TRUE);
 
     if (tmp_buf)
         MPL_free(tmp_buf);
@@ -349,7 +349,7 @@ static void _mxm_recv_completion_cb(void *context)
     mxm_recv_req_t *mxm_rreq;
     MPID_nem_mxm_req_area *req_area = NULL;
 
-    MPIU_Assert(req);
+    MPIR_Assert(req);
     _dbg_mxm_out_req(req);
 
     req_area = REQ_BASE(req);
@@ -387,7 +387,7 @@ static int _mxm_irecv(MPID_nem_mxm_ep_t * ep, MPID_nem_mxm_req_area * req, int i
     mxm_recv_req_t *mxm_rreq;
     list_head_t *free_queue = NULL;
 
-    MPIU_Assert(req);
+    MPIR_Assert(req);
 
     free_queue = (ep ? &ep->free_queue : &mxm_obj->free_queue);
     req->mxm_req = list_dequeue_mxm_req(free_queue);
@@ -459,13 +459,13 @@ static int _mxm_process_rdtype(MPIR_Request ** rreq_p, MPI_Datatype datatype,
     last = rreq->dev.segment_size;
     MPIDU_Segment_count_contig_blocks(rreq->dev.segment_ptr, rreq->dev.segment_first, &last,
                                      (MPI_Aint *) & n_iov);
-    MPIU_Assert(n_iov > 0);
+    MPIR_Assert(n_iov > 0);
     iov = MPL_malloc(n_iov * sizeof(*iov));
-    MPIU_Assert(iov);
+    MPIR_Assert(iov);
 
     last = rreq->dev.segment_size;
     MPIDU_Segment_unpack_vector(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, iov, &n_iov);
-    MPIU_Assert(last == rreq->dev.segment_size);
+    MPIR_Assert(last == rreq->dev.segment_size);
 
 #if defined(MXM_DEBUG) && (MXM_DEBUG > 0)
     _dbg_mxm_output(7, "Recv Noncontiguous data vector %i entries (free slots : %i)\n", n_iov,
@@ -479,7 +479,7 @@ static int _mxm_process_rdtype(MPIR_Request ** rreq_p, MPI_Datatype datatype,
     if (n_iov <= MXM_REQ_DATA_MAX_IOV) {
         if (n_iov > MXM_MPICH_MAX_IOV) {
             *iov_buf = (mxm_req_buffer_t *) MPL_malloc(n_iov * sizeof(**iov_buf));
-            MPIU_Assert(*iov_buf);
+            MPIR_Assert(*iov_buf);
         }
 
         for (index = 0; index < n_iov; index++) {
@@ -494,7 +494,7 @@ static int _mxm_process_rdtype(MPIR_Request ** rreq_p, MPI_Datatype datatype,
         MPI_Aint packsize = 0;
         MPIR_Pack_size_impl(rreq->dev.user_count, rreq->dev.datatype, &packsize);
         rreq->dev.tmpbuf = MPL_malloc((size_t) packsize);
-        MPIU_Assert(rreq->dev.tmpbuf);
+        MPIR_Assert(rreq->dev.tmpbuf);
         rreq->dev.tmpbuf_sz = packsize;
         (*iov_buf)[0].ptr = rreq->dev.tmpbuf;
         (*iov_buf)[0].length = (size_t) packsize;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_probe.c b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_probe.c
index 88d1b7a..27d2a2f 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_probe.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_probe.c
@@ -26,8 +26,8 @@ int MPID_nem_mxm_probe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm, i
     mxm_mq_h *mq_h_v = (mxm_mq_h *) comm->dev.ch.netmod_priv;
     MPID_nem_mxm_vc_area *vc_area = (vc ? VC_BASE(vc) : NULL);
 
-    MPIDI_STATE_DECL(MPID_STATE_MXM_PROBE);
-    MPIDI_FUNC_ENTER(MPID_STATE_MXM_PROBE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MXM_PROBE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MXM_PROBE);
 
     mxm_req.base.state = MXM_REQ_NEW;
     mxm_req.base.mq = mq_h_v[0];
@@ -52,7 +52,7 @@ int MPID_nem_mxm_probe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm, i
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MXM_PROBE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MXM_PROBE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -72,8 +72,8 @@ int MPID_nem_mxm_iprobe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm,
     mxm_mq_h *mq_h_v = (mxm_mq_h *) comm->dev.ch.netmod_priv;
     MPID_nem_mxm_vc_area *vc_area = (vc ? VC_BASE(vc) : NULL);
 
-    MPIDI_STATE_DECL(MPID_STATE_MXM_IPROBE);
-    MPIDI_FUNC_ENTER(MPID_STATE_MXM_IPROBE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MXM_IPROBE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MXM_IPROBE);
 
     mxm_req.base.state = MXM_REQ_NEW;
     mxm_req.base.mq = mq_h_v[0];
@@ -98,7 +98,7 @@ int MPID_nem_mxm_iprobe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm,
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MXM_IPROBE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MXM_IPROBE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -119,8 +119,8 @@ int MPID_nem_mxm_improbe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm,
     mxm_mq_h *mq_h_v = (mxm_mq_h *) comm->dev.ch.netmod_priv;
     MPID_nem_mxm_vc_area *vc_area = (vc ? VC_BASE(vc) : NULL);
 
-    MPIDI_STATE_DECL(MPID_STATE_MXM_IMPROBE);
-    MPIDI_FUNC_ENTER(MPID_STATE_MXM_IMPROBE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MXM_IMPROBE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MXM_IMPROBE);
 
     mxm_req.base.state = MXM_REQ_NEW;
     mxm_req.base.mq = mq_h_v[0];
@@ -136,7 +136,7 @@ int MPID_nem_mxm_improbe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm,
         *flag = 1;
 
         req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-        MPIU_Object_set_ref(req, 2);
+        MPIR_Object_set_ref(req, 2);
         req->kind = MPIR_REQUEST_KIND__MPROBE;
         req->comm = comm;
         MPIR_Comm_add_ref(comm);
@@ -151,7 +151,7 @@ int MPID_nem_mxm_improbe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm,
         req->dev.recv_data_sz = mxm_req.completion.sender_len;
         MPIR_STATUS_SET_COUNT(req->status, req->dev.recv_data_sz);
         req->dev.tmpbuf = MPL_malloc(req->dev.recv_data_sz);
-        MPIU_Assert(req->dev.tmpbuf);
+        MPIR_Assert(req->dev.tmpbuf);
 
         mxm_req.base.completed_cb = NULL;
         mxm_req.base.context = req;
@@ -190,7 +190,7 @@ int MPID_nem_mxm_improbe(MPIDI_VC_t * vc, int source, int tag, MPIR_Comm * comm,
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MXM_IMPROBE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MXM_IMPROBE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_send.c b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_send.c
index d6313b6..a2573dc 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_send.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_send.c
@@ -39,14 +39,14 @@ int MPID_nem_mxm_iSendContig(MPIDI_VC_t * vc, MPIR_Request * sreq, void *hdr, in
     MPID_nem_mxm_vc_area *vc_area = NULL;
     MPID_nem_mxm_req_area *req_area = NULL;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG);
 
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
     MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "mxm_iSendContig");
     MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr);
 
-    MPIU_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t));
 
     _dbg_mxm_output(5,
                     "iSendContig ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n",
@@ -86,7 +86,7 @@ int MPID_nem_mxm_iSendContig(MPIDI_VC_t * vc, MPIR_Request * sreq, void *hdr, in
         MPIR_ERR_POP(mpi_errno);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -105,18 +105,18 @@ int MPID_nem_mxm_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, intptr_t hdr_sz, vo
     MPID_nem_mxm_vc_area *vc_area = NULL;
     MPID_nem_mxm_req_area *req_area = NULL;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG);
 
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
     MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "mxm_iStartContigMsg");
     MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr);
 
     /* create a request */
     sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-    MPIU_Assert(sreq != NULL);
-    MPIU_Object_set_ref(sreq, 2);
-    MPIU_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(sreq != NULL);
+    MPIR_Object_set_ref(sreq, 2);
+    MPIR_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t));
     sreq->kind = MPIR_REQUEST_KIND__SEND;
     sreq->dev.OnDataAvail = NULL;
     sreq->dev.tmpbuf = NULL;
@@ -151,7 +151,7 @@ int MPID_nem_mxm_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, intptr_t hdr_sz, vo
 
   fn_exit:
     *sreq_ptr = sreq;
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -170,13 +170,13 @@ int MPID_nem_mxm_SendNoncontig(MPIDI_VC_t * vc, MPIR_Request * sreq, void *hdr,
     MPID_nem_mxm_vc_area *vc_area = NULL;
     MPID_nem_mxm_req_area *req_area = NULL;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG);
 
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
     MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "MPID_nem_mxm_iSendNoncontig");
 
-    MPIU_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t));
 
     _dbg_mxm_output(5,
                     "SendNoncontig ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n",
@@ -206,13 +206,13 @@ int MPID_nem_mxm_SendNoncontig(MPIDI_VC_t * vc, MPIR_Request * sreq, void *hdr,
      * either "last <= 0" or "last-sreq->dev.segment_first <=0" to this
      * layer. In future, if upper layer passes such kind of packet, the
      * judgement of the following IF branch needs to be modified. */
-    MPIU_Assert(last > 0 && last - sreq->dev.segment_first > 0);
+    MPIR_Assert(last > 0 && last - sreq->dev.segment_first > 0);
 
     if (last > 0) {
         sreq->dev.tmpbuf = MPL_malloc((size_t) (sreq->dev.segment_size - sreq->dev.segment_first));
-        MPIU_Assert(sreq->dev.tmpbuf);
+        MPIR_Assert(sreq->dev.tmpbuf);
         MPIDU_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.tmpbuf);
-        MPIU_Assert(last == sreq->dev.segment_size);
+        MPIR_Assert(last == sreq->dev.segment_size);
 
         req_area->iov_buf[req_area->iov_count].ptr = sreq->dev.tmpbuf;
         req_area->iov_buf[req_area->iov_count].length = last - sreq->dev.segment_first;
@@ -229,7 +229,7 @@ int MPID_nem_mxm_SendNoncontig(MPIDI_VC_t * vc, MPIR_Request * sreq, void *hdr,
         MPIR_ERR_POP(mpi_errno);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -254,14 +254,14 @@ int MPID_nem_mxm_send(MPIDI_VC_t * vc, const void *buf, MPI_Aint count, MPI_Data
     MPID_nem_mxm_req_area *req_area = NULL;
     mxm_mq_h *mq_h_v = (mxm_mq_h *) comm->dev.ch.netmod_priv;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_SEND);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_SEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MXM_SEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MXM_SEND);
 
     MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
 
     /* create a request */
     MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
-    MPIU_Assert(sreq != NULL);
+    MPIR_Assert(sreq != NULL);
     MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
 
     MPIDI_VC_FAI_send_seqnum(vc, seqnum);
@@ -307,7 +307,7 @@ int MPID_nem_mxm_send(MPIDI_VC_t * vc, const void *buf, MPI_Aint count, MPI_Data
             last = data_sz;
             if (packsize > 0) {
                 sreq->dev.tmpbuf = MPL_malloc((size_t) packsize);
-                MPIU_Assert(sreq->dev.tmpbuf);
+                MPIR_Assert(sreq->dev.tmpbuf);
                 MPIDU_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
                 MPIDU_Segment_pack(sreq->dev.segment_ptr, 0, &last, sreq->dev.tmpbuf);
 
@@ -332,7 +332,7 @@ int MPID_nem_mxm_send(MPIDI_VC_t * vc, const void *buf, MPI_Aint count, MPI_Data
 
   fn_exit:
     *sreq_ptr = sreq;
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_SEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MXM_SEND);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -357,14 +357,14 @@ int MPID_nem_mxm_ssend(MPIDI_VC_t * vc, const void *buf, MPI_Aint count, MPI_Dat
     MPID_nem_mxm_req_area *req_area = NULL;
     mxm_mq_h *mq_h_v = (mxm_mq_h *) comm->dev.ch.netmod_priv;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_SSEND);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_SSEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MXM_SSEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MXM_SSEND);
 
     MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
 
     /* create a request */
     MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
-    MPIU_Assert(sreq != NULL);
+    MPIR_Assert(sreq != NULL);
     MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SSEND);
 
     MPIDI_VC_FAI_send_seqnum(vc, seqnum);
@@ -410,7 +410,7 @@ int MPID_nem_mxm_ssend(MPIDI_VC_t * vc, const void *buf, MPI_Aint count, MPI_Dat
             last = data_sz;
             if (packsize > 0) {
                 sreq->dev.tmpbuf = MPL_malloc((size_t) packsize);
-                MPIU_Assert(sreq->dev.tmpbuf);
+                MPIR_Assert(sreq->dev.tmpbuf);
                 MPIDU_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
                 MPIDU_Segment_pack(sreq->dev.segment_ptr, 0, &last, sreq->dev.tmpbuf);
 
@@ -435,7 +435,7 @@ int MPID_nem_mxm_ssend(MPIDI_VC_t * vc, const void *buf, MPI_Aint count, MPI_Dat
 
   fn_exit:
     *sreq_ptr = sreq;
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_SSEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MXM_SSEND);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -460,14 +460,14 @@ int MPID_nem_mxm_isend(MPIDI_VC_t * vc, const void *buf, MPI_Aint count, MPI_Dat
     MPID_nem_mxm_req_area *req_area = NULL;
     mxm_mq_h *mq_h_v = (mxm_mq_h *) comm->dev.ch.netmod_priv;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISEND);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ISEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MXM_ISEND);
 
     MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
 
     /* create a request */
     MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
-    MPIU_Assert(sreq != NULL);
+    MPIR_Assert(sreq != NULL);
     MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
 
     MPIDI_VC_FAI_send_seqnum(vc, seqnum);
@@ -513,7 +513,7 @@ int MPID_nem_mxm_isend(MPIDI_VC_t * vc, const void *buf, MPI_Aint count, MPI_Dat
             last = data_sz;
             if (packsize > 0) {
                 sreq->dev.tmpbuf = MPL_malloc((size_t) packsize);
-                MPIU_Assert(sreq->dev.tmpbuf);
+                MPIR_Assert(sreq->dev.tmpbuf);
                 MPIDU_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
                 MPIDU_Segment_pack(sreq->dev.segment_ptr, 0, &last, sreq->dev.tmpbuf);
 
@@ -538,7 +538,7 @@ int MPID_nem_mxm_isend(MPIDI_VC_t * vc, const void *buf, MPI_Aint count, MPI_Dat
 
   fn_exit:
     *sreq_ptr = sreq;
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ISEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MXM_ISEND);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -563,14 +563,14 @@ int MPID_nem_mxm_issend(MPIDI_VC_t * vc, const void *buf, MPI_Aint count, MPI_Da
     MPID_nem_mxm_req_area *req_area = NULL;
     mxm_mq_h *mq_h_v = (mxm_mq_h *) comm->dev.ch.netmod_priv;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISSEND);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ISSEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISSEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MXM_ISSEND);
 
     MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
 
     /* create a request */
     MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
-    MPIU_Assert(sreq != NULL);
+    MPIR_Assert(sreq != NULL);
     MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SSEND);
 
     MPIDI_VC_FAI_send_seqnum(vc, seqnum);
@@ -617,7 +617,7 @@ int MPID_nem_mxm_issend(MPIDI_VC_t * vc, const void *buf, MPI_Aint count, MPI_Da
             last = data_sz;
             if (packsize > 0) {
                 sreq->dev.tmpbuf = MPL_malloc((size_t) packsize);
-                MPIU_Assert(sreq->dev.tmpbuf);
+                MPIR_Assert(sreq->dev.tmpbuf);
                 MPIDU_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
                 MPIDU_Segment_pack(sreq->dev.segment_ptr, 0, &last, sreq->dev.tmpbuf);
 
@@ -641,7 +641,7 @@ int MPID_nem_mxm_issend(MPIDI_VC_t * vc, const void *buf, MPI_Aint count, MPI_Da
 
   fn_exit:
     *sreq_ptr = sreq;
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ISSEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MXM_ISSEND);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -674,7 +674,7 @@ int _mxm_handle_sreq(MPIR_Request * req)
     }
 
     MPIDI_CH3U_Handle_send_req(req->ch.vc, req, &complete);
-    MPIU_Assert(complete == TRUE);
+    MPIR_Assert(complete == TRUE);
 
     return complete;
 }
@@ -686,7 +686,7 @@ static void _mxm_send_completion_cb(void *context)
     MPID_nem_mxm_vc_area *vc_area = NULL;
     MPID_nem_mxm_req_area *req_area = NULL;
 
-    MPIU_Assert(req);
+    MPIR_Assert(req);
     _dbg_mxm_out_req(req);
 
     vc_area = VC_BASE(req->ch.vc);
@@ -714,8 +714,8 @@ static int _mxm_isend(MPID_nem_mxm_ep_t * ep, MPID_nem_mxm_req_area * req,
     mxm_send_req_t *mxm_sreq;
     list_head_t *free_queue = NULL;
 
-    MPIU_Assert(ep);
-    MPIU_Assert(req);
+    MPIR_Assert(ep);
+    MPIR_Assert(req);
 
     free_queue = &ep->free_queue;
     req->mxm_req = list_dequeue_mxm_req(free_queue);
@@ -809,13 +809,13 @@ static int _mxm_process_sdtype(MPIR_Request ** sreq_p, MPI_Datatype datatype,
     last = sreq->dev.segment_size;
     MPIDU_Segment_count_contig_blocks(sreq->dev.segment_ptr, sreq->dev.segment_first, &last,
                                      (MPI_Aint *) & n_iov);
-    MPIU_Assert(n_iov > 0);
+    MPIR_Assert(n_iov > 0);
     iov = MPL_malloc(n_iov * sizeof(*iov));
-    MPIU_Assert(iov);
+    MPIR_Assert(iov);
 
     last = sreq->dev.segment_size;
     MPIDU_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, iov, &n_iov);
-    MPIU_Assert(last == sreq->dev.segment_size);
+    MPIR_Assert(last == sreq->dev.segment_size);
 
 #if defined(MXM_DEBUG) && (MXM_DEBUG > 0)
     _dbg_mxm_output(7, "Send Noncontiguous data vector %i entries (free slots : %i)\n", n_iov,
@@ -828,7 +828,7 @@ static int _mxm_process_sdtype(MPIR_Request ** sreq_p, MPI_Datatype datatype,
 
     if (n_iov > MXM_MPICH_MAX_IOV) {
         *iov_buf = (mxm_req_buffer_t *) MPL_malloc(n_iov * sizeof(**iov_buf));
-        MPIU_Assert(*iov_buf);
+        MPIR_Assert(*iov_buf);
     }
 
     for (index = 0; index < n_iov; index++) {
@@ -850,9 +850,9 @@ static int _mxm_process_sdtype(MPIR_Request ** sreq_p, MPI_Datatype datatype,
         int offset = 0;
         sreq->dev.tmpbuf = MPL_malloc(size_to_copy);
         sreq->dev.tmpbuf_sz = size_to_copy;
-        MPIU_Assert(sreq->dev.tmpbuf);
+        MPIR_Assert(sreq->dev.tmpbuf);
         for (index = (MXM_REQ_DATA_MAX_IOV - 1); index < n_iov; index++) {
-            MPIU_Memcpy((char *) (sreq->dev.tmpbuf) + offset, iov[index].MPL_IOV_BUF,
+            MPIR_Memcpy((char *) (sreq->dev.tmpbuf) + offset, iov[index].MPL_IOV_BUF,
                         iov[index].MPL_IOV_LEN);
             offset += iov[index].MPL_IOV_LEN;
         }
diff --git a/src/mpid/ch3/channels/nemesis/netmod/none/none.c b/src/mpid/ch3/channels/nemesis/netmod/none/none.c
index f35f3b9..6c55181 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/none/none.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/none/none.c
@@ -15,13 +15,13 @@ static int nm_init(MPIDI_PG_t *pg_p, int pg_rank,
 
 static int nm_get_business_card(int my_rank, char **bc_val_p, int *val_max_sz_p)
 {
-    MPIU_Assertp(0);
+    MPIR_Assertp(0);
     return MPI_SUCCESS;
 }
 
 static int nm_connect_to_root(const char *business_card, MPIDI_VC_t *new_vc)
 {
-    MPIU_Assertp(0);
+    MPIR_Assertp(0);
     return MPI_SUCCESS;
 }
 
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_cm.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_cm.c
index 2816090..29965e7 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_cm.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_cm.c
@@ -48,7 +48,7 @@ static inline MPIDI_VC_t *ofi_wc_to_vc(cq_tagged_entry_t * wc)
             vc = VC_OFI(vc)->next;
         }
         if (NULL == vc) {
-            MPIU_Assertp(0);
+            MPIR_Assertp(0);
         }
     }
     else {
@@ -69,7 +69,7 @@ static inline MPIDI_VC_t *ofi_wc_to_vc(cq_tagged_entry_t * wc)
             MPIDI_PG_Get_vc(pg, get_psource(match_bits), &vc);
         }
         else {
-            MPIU_Assert(0);
+            MPIR_Assert(0);
         }
     }
     END_FUNC(FCNAME);
@@ -105,9 +105,9 @@ static inline int MPID_nem_ofi_conn_req_callback(cq_tagged_entry_t * wc, MPIR_Re
 
     BEGIN_FUNC(FCNAME);
 
-    MPIU_Memcpy(bc, rreq->dev.user_buf, wc->len);
+    MPIR_Memcpy(bc, rreq->dev.user_buf, wc->len);
     bc[wc->len] = '\0';
-    MPIU_Assert(gl_data.conn_req == rreq);
+    MPIR_Assert(gl_data.conn_req == rreq);
     FI_RC_RETRY(fi_trecv(gl_data.endpoint,
                    gl_data.conn_req->dev.user_buf,
                    OFI_KVSAPPSTRLEN,
@@ -118,10 +118,10 @@ static inline int MPID_nem_ofi_conn_req_callback(cq_tagged_entry_t * wc, MPIR_Re
                    (void *) &(REQ_OFI(gl_data.conn_req)->ofi_context)), trecv);
 
     addr = MPL_malloc(gl_data.bound_addrlen);
-    MPIU_Assertp(addr);
+    MPIR_Assertp(addr);
 
     vc = MPL_malloc(sizeof(MPIDI_VC_t));
-    MPIU_Assertp(vc);
+    MPIR_Assertp(vc);
 
     MPIDI_VC_Init(vc, NULL, 0);
     MPIDI_CH3I_NM_OFI_RC(MPIDI_GetTagFromPort(bc, &vc->port_name_tag));
@@ -167,7 +167,7 @@ static inline int MPID_nem_ofi_handle_packet(cq_tagged_entry_t * wc ATTRIBUTE((u
     BEGIN_FUNC(FCNAME);
     if (MPIR_cc_get(rreq->cc) == 1) {
       vc = REQ_OFI(rreq)->vc;
-      MPIU_Assert(vc);
+      MPIR_Assert(vc);
       MPIDI_CH3I_NM_OFI_RC(MPID_nem_handle_pkt(vc, REQ_OFI(rreq)->pack_buffer, REQ_OFI(rreq)->pack_buffer_size));
       MPL_free(REQ_OFI(rreq)->pack_buffer);
     }
@@ -212,7 +212,7 @@ static inline int MPID_nem_ofi_preposted_callback(cq_tagged_entry_t * wc, MPIR_R
     BEGIN_FUNC(FCNAME);
 
     vc = ofi_wc_to_vc(wc);
-    MPIU_Assert(vc);
+    MPIR_Assert(vc);
     VC_READY_CHECK(vc);
 
     pkt_len = REQ_OFI(rreq)->msg_bytes;
@@ -249,7 +249,7 @@ static inline int MPID_nem_ofi_preposted_callback(cq_tagged_entry_t * wc, MPIR_R
                      gl_data.mr,
                      VC_OFI(vc)->direct_addr,
                      wc->tag | MPID_MSG_CTS, &(REQ_OFI(sreq)->ofi_context)), tsend);
-    MPIU_Assert(gl_data.persistent_req == rreq);
+    MPIR_Assert(gl_data.persistent_req == rreq);
 
     FI_RC_RETRY(fi_trecv(gl_data.endpoint,
                    &REQ_OFI(rreq)->msg_bytes,
@@ -401,8 +401,8 @@ int MPID_nem_ofi_vc_connect(MPIDI_VC_t * vc)
 
     BEGIN_FUNC(FCNAME);
     addr = MPL_malloc(gl_data.bound_addrlen);
-    MPIU_Assert(addr);
-    MPIU_Assert(1 != VC_OFI(vc)->ready);
+    MPIR_Assert(addr);
+    MPIR_Assert(1 != VC_OFI(vc)->ready);
 
     if (!vc->pg || !vc->pg->getConnInfo) {
         goto fn_exit;
@@ -473,7 +473,7 @@ int MPID_nem_ofi_vc_destroy(MPIDI_VC_t * vc)
             prev = VC_OFI(prev)->next;
         }
 
-        MPIU_Assert(prev != NULL);
+        MPIR_Assert(prev != NULL);
 
         if (VC_OFI(prev)->next == vc) {
             VC_OFI(prev)->next = VC_OFI(vc)->next;
@@ -482,7 +482,7 @@ int MPID_nem_ofi_vc_destroy(MPIDI_VC_t * vc)
             gl_data.cm_vcs = VC_OFI(vc)->next;
         }
         else {
-            MPIU_Assert(0);
+            MPIR_Assert(0);
         }
     }
     VC_OFI(vc)->ready = 0;
@@ -530,8 +530,8 @@ int MPID_nem_ofi_connect_to_root(const char *business_card, MPIDI_VC_t * new_vc)
     BEGIN_FUNC(FCNAME);
     addr = MPL_malloc(gl_data.bound_addrlen);
     bc = MPL_malloc(OFI_KVSAPPSTRLEN);
-    MPIU_Assertp(addr);
-    MPIU_Assertp(bc);
+    MPIR_Assertp(addr);
+    MPIR_Assertp(bc);
     my_bc = bc;
     if (!business_card || business_card[0] != 't') {
         mpi_errno = MPI_ERR_OTHER;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
index 959503b..3974407 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
@@ -130,13 +130,13 @@ typedef struct {
 #undef FUNCNAME
 #define FUNCNAME nothing
 #define BEGIN_FUNC(FUNCNAME)                    \
-  MPIDI_STATE_DECL(FUNCNAME);                   \
-  MPIDI_FUNC_ENTER(FUNCNAME);
+  MPIR_FUNC_VERBOSE_STATE_DECL(FUNCNAME);                   \
+  MPIR_FUNC_VERBOSE_ENTER(FUNCNAME);
 #define END_FUNC(FUNCNAME)                      \
-  MPIDI_FUNC_EXIT(FUNCNAME);
+  MPIR_FUNC_VERBOSE_EXIT(FUNCNAME);
 #define END_FUNC_RC(FUNCNAME) \
   fn_exit:                    \
-  MPIDI_FUNC_EXIT(FUNCNAME);  \
+  MPIR_FUNC_VERBOSE_EXIT(FUNCNAME);  \
   return mpi_errno;           \
 fn_fail:                      \
   goto fn_exit;
@@ -219,11 +219,11 @@ fn_fail:                      \
 #define OFI_ADDR_INIT(src, vc, remote_proc) \
 ({                                          \
   if (MPI_ANY_SOURCE != src) {              \
-    MPIU_Assert(vc != NULL);                \
+    MPIR_Assert(vc != NULL);                \
     VC_READY_CHECK(vc);                     \
     remote_proc = VC_OFI(vc)->direct_addr;  \
   } else {                                  \
-    MPIU_Assert(vc == NULL);                \
+    MPIR_Assert(vc == NULL);                \
     remote_proc = FI_ADDR_UNSPEC;           \
   }                                         \
 })
@@ -250,9 +250,9 @@ static inline int MPID_nem_ofi_create_req(MPIR_Request ** request, int refcnt)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *req;
     req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-    MPIU_Assert(req);
+    MPIR_Assert(req);
     MPIDI_Request_clear_dbg(req);
-    MPIU_Object_set_ref(req, refcnt);
+    MPIR_Object_set_ref(req, refcnt);
     MPID_nem_ofi_init_req(req);
     *request = req;
     return mpi_errno;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
index 26c5b5b..cbfceb0 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
@@ -53,7 +53,7 @@ int MPID_nem_ofi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
     MPIDI_VC_t *vc;
 
     BEGIN_FUNC(FCNAME);
-    MPIU_CHKLMEM_DECL(2);
+    MPIR_CHKLMEM_DECL(2);
 
     compile_time_checking();
     /* ------------------------------------------------------------------------ */
@@ -85,7 +85,7 @@ int MPID_nem_ofi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
     hints->rx_attr->msg_order = FI_ORDER_SAS;
 
     hints->ep_attr->mem_tag_format = MEM_TAG_FORMAT;
-    MPIU_Assert(pg_p->size < ((1 << MPID_RANK_BITS) - 1));
+    MPIR_Assert(pg_p->size < ((1 << MPID_RANK_BITS) - 1));
 
     /* ------------------------------------------------------------------------ */
     /* FI_VERSION provides binary backward and forward compatibility support    */
@@ -246,7 +246,7 @@ int MPID_nem_ofi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
     /* from KVS and store them in local  */
     /* table                             */
     /* --------------------------------- */
-    MPIU_CHKLMEM_MALLOC(addrs, char *, pg_p->size * gl_data.bound_addrlen, mpi_errno, "addrs");
+    MPIR_CHKLMEM_MALLOC(addrs, char *, pg_p->size * gl_data.bound_addrlen, mpi_errno, "addrs");
 
     for (i = 0; i < pg_p->size; ++i) {
         sprintf(key, "OFI-%d", i);
@@ -290,7 +290,7 @@ int MPID_nem_ofi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
   fn_exit:
     if (fi_addrs)
         MPL_free(fi_addrs);
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     END_FUNC(FCNAME);
     return mpi_errno;
   fn_fail:
@@ -336,11 +336,11 @@ static inline int compile_time_checking()
     OFI_COMPILE_TIME_ASSERT(sizeof(MPID_nem_ofi_vc_t) <= MPIDI_NEM_VC_NETMOD_AREA_LEN);
     OFI_COMPILE_TIME_ASSERT(sizeof(MPID_nem_ofi_req_t) <= MPIDI_NEM_REQ_NETMOD_AREA_LEN);
     OFI_COMPILE_TIME_ASSERT(sizeof(iovec_t) == sizeof(MPL_IOV));
-    MPIU_Assert(((void *) &(((iovec_t *) 0)->iov_base)) ==
+    MPIR_Assert(((void *) &(((iovec_t *) 0)->iov_base)) ==
                 ((void *) &(((MPL_IOV *) 0)->MPL_IOV_BUF)));
-    MPIU_Assert(((void *) &(((iovec_t *) 0)->iov_len)) ==
+    MPIR_Assert(((void *) &(((iovec_t *) 0)->iov_len)) ==
                 ((void *) &(((MPL_IOV *) 0)->MPL_IOV_LEN)));
-    MPIU_Assert(sizeof(((iovec_t *) 0)->iov_len) == sizeof(((MPL_IOV *) 0)->MPL_IOV_LEN));
+    MPIR_Assert(sizeof(((iovec_t *) 0)->iov_len) == sizeof(((MPL_IOV *) 0)->MPL_IOV_LEN));
 
     /* ------------------------------------------------------------------------ */
     /* Generate the MPICH catalog files                                         */
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c
index 29258c8..47da3c1 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c
@@ -224,7 +224,7 @@ int MPID_nem_ofi_iSendContig(MPIDI_VC_t * vc,
     intptr_t buf_offset = 0;
     size_t         pkt_len;
     BEGIN_FUNC(FCNAME);
-    MPIU_Assert(hdr_sz <= (intptr_t) sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(hdr_sz <= (intptr_t) sizeof(MPIDI_CH3_Pkt_t));
     MPID_nem_ofi_init_req(sreq);
     pkt_len = sizeof(MPIDI_CH3_Pkt_t) + sreq->dev.ext_hdr_sz + data_sz;
     if (sreq->dev.ext_hdr_sz > 0 && gl_data.iov_limit > 2) {
@@ -238,15 +238,15 @@ int MPID_nem_ofi_iSendContig(MPIDI_VC_t * vc,
       REQ_OFI(sreq)->iov[2].iov_base = data;
       REQ_OFI(sreq)->iov[2].iov_len  = data_sz;
       REQ_OFI(sreq)->iov_count       = 3;
-      MPIU_Memcpy(REQ_OFI(sreq)->real_hdr, hdr, hdr_sz);
-      MPIU_Memcpy(REQ_OFI(sreq)->real_hdr + sizeof(MPIDI_CH3_Pkt_t),
+      MPIR_Memcpy(REQ_OFI(sreq)->real_hdr, hdr, hdr_sz);
+      MPIR_Memcpy(REQ_OFI(sreq)->real_hdr + sizeof(MPIDI_CH3_Pkt_t),
                   sreq->dev.ext_hdr_ptr, sreq->dev.ext_hdr_sz);
       }
     else if(sreq->dev.ext_hdr_sz == 0 && gl_data.iov_limit > 1) {
         REQ_OFI(sreq)->real_hdr = MPL_malloc(sizeof(MPIDI_CH3_Pkt_t));
         MPIR_ERR_CHKANDJUMP1(REQ_OFI(sreq)->real_hdr == NULL, mpi_errno, MPI_ERR_OTHER,
                              "**nomem", "**nomem %s", "iSendContig header allocation");
-        MPIU_Memcpy(REQ_OFI(sreq)->real_hdr, hdr, hdr_sz);
+        MPIR_Memcpy(REQ_OFI(sreq)->real_hdr, hdr, hdr_sz);
         REQ_OFI(sreq)->iov[0].iov_base = REQ_OFI(sreq)->real_hdr;
         REQ_OFI(sreq)->iov[0].iov_len  = sizeof(MPIDI_CH3_Pkt_t);
         REQ_OFI(sreq)->iov[1].iov_base = data;
@@ -257,13 +257,13 @@ int MPID_nem_ofi_iSendContig(MPIDI_VC_t * vc,
       pack_buffer = MPL_malloc(pkt_len);
       MPIR_ERR_CHKANDJUMP1(pack_buffer == NULL, mpi_errno, MPI_ERR_OTHER,
                            "**nomem", "**nomem %s", "iSendContig pack buffer allocation");
-      MPIU_Memcpy(pack_buffer, hdr, hdr_sz);
+      MPIR_Memcpy(pack_buffer, hdr, hdr_sz);
       buf_offset += sizeof(MPIDI_CH3_Pkt_t);
       if (sreq->dev.ext_hdr_sz > 0) {
-        MPIU_Memcpy(pack_buffer + buf_offset, sreq->dev.ext_hdr_ptr, sreq->dev.ext_hdr_sz);
+        MPIR_Memcpy(pack_buffer + buf_offset, sreq->dev.ext_hdr_ptr, sreq->dev.ext_hdr_sz);
         buf_offset += sreq->dev.ext_hdr_sz;
       }
-      MPIU_Memcpy(pack_buffer + buf_offset, data, data_sz);
+      MPIR_Memcpy(pack_buffer + buf_offset, data, data_sz);
     }
     START_COMM();
     END_FUNC_RC(FCNAME);
@@ -284,7 +284,7 @@ int MPID_nem_ofi_SendNoncontig(MPIDI_VC_t * vc,
     void          *data       = NULL;
     size_t         pkt_len;
     BEGIN_FUNC(FCNAME);
-    MPIU_Assert(hdr_sz <= (intptr_t) sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(hdr_sz <= (intptr_t) sizeof(MPIDI_CH3_Pkt_t));
     MPID_nem_ofi_init_req(sreq);
     first = sreq->dev.segment_first;
     last = sreq->dev.segment_size;
@@ -293,10 +293,10 @@ int MPID_nem_ofi_SendNoncontig(MPIDI_VC_t * vc,
     pack_buffer = MPL_malloc(pkt_len);
     MPIR_ERR_CHKANDJUMP1(pack_buffer == NULL, mpi_errno, MPI_ERR_OTHER,
                          "**nomem", "**nomem %s", "SendNonContig pack buffer allocation");
-    MPIU_Memcpy(pack_buffer, hdr, hdr_sz);
+    MPIR_Memcpy(pack_buffer, hdr, hdr_sz);
     buf_offset += sizeof(MPIDI_CH3_Pkt_t);
     if (sreq->dev.ext_hdr_sz > 0) {
-        MPIU_Memcpy(pack_buffer + buf_offset, sreq->dev.ext_hdr_ptr, sreq->dev.ext_hdr_sz);
+        MPIR_Memcpy(pack_buffer + buf_offset, sreq->dev.ext_hdr_ptr, sreq->dev.ext_hdr_sz);
         buf_offset += sreq->dev.ext_hdr_sz;
     }
     MPIDU_Segment_pack(sreq->dev.segment_ptr, first, &last, pack_buffer + buf_offset);
@@ -319,7 +319,7 @@ int MPID_nem_ofi_iStartContigMsg(MPIDI_VC_t * vc,
     uint64_t match_bits;
     size_t   pkt_len;
     BEGIN_FUNC(FCNAME);
-    MPIU_Assert(hdr_sz <= (intptr_t) sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(hdr_sz <= (intptr_t) sizeof(MPIDI_CH3_Pkt_t));
 
     MPID_nem_ofi_create_req(&sreq, 2);
     sreq->kind = MPIR_REQUEST_KIND__SEND;
@@ -328,7 +328,7 @@ int MPID_nem_ofi_iStartContigMsg(MPIDI_VC_t * vc,
     pkt_len = sizeof(MPIDI_CH3_Pkt_t) + data_sz;
     if(gl_data.iov_limit > 1) {
       REQ_OFI(sreq)->real_hdr = MPL_malloc(sizeof(MPIDI_CH3_Pkt_t));
-      MPIU_Memcpy(REQ_OFI(sreq)->real_hdr, hdr, hdr_sz);
+      MPIR_Memcpy(REQ_OFI(sreq)->real_hdr, hdr, hdr_sz);
       REQ_OFI(sreq)->iov[0].iov_base = REQ_OFI(sreq)->real_hdr;
       REQ_OFI(sreq)->iov[0].iov_len  = sizeof(MPIDI_CH3_Pkt_t);
       REQ_OFI(sreq)->iov[1].iov_base = data;
@@ -339,9 +339,9 @@ int MPID_nem_ofi_iStartContigMsg(MPIDI_VC_t * vc,
       pack_buffer = MPL_malloc(pkt_len);
       MPIR_ERR_CHKANDJUMP1(pack_buffer == NULL, mpi_errno, MPI_ERR_OTHER,
                            "**nomem", "**nomem %s", "iStartContig pack buffer allocation");
-      MPIU_Memcpy((void *) pack_buffer, hdr, hdr_sz);
+      MPIR_Memcpy((void *) pack_buffer, hdr, hdr_sz);
       if (data_sz)
-        MPIU_Memcpy((void *) (pack_buffer + sizeof(MPIDI_CH3_Pkt_t)), data, data_sz);
+        MPIR_Memcpy((void *) (pack_buffer + sizeof(MPIDI_CH3_Pkt_t)), data, data_sz);
     }
     START_COMM();
     *sreq_ptr = sreq;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_progress.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_progress.c
index f3ec603..e35d050 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_progress.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_progress.c
@@ -74,11 +74,11 @@ int MPID_nem_ofi_poll(int in_blocking_poll)
                     continue;
                 }
                 else {
-                    MPIU_Assert(0);
+                    MPIR_Assert(0);
                 }
             }
             else {
-                MPIU_Assert(0);
+                MPIR_Assert(0);
             }
         }
         else if (ret == -FI_EAGAIN)
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tag_layout.h b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tag_layout.h
index 5c092cb..e3f0320 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tag_layout.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tag_layout.h
@@ -39,7 +39,7 @@
 /* ******************************** */
 /* Tag Manipulation inlines         */
 /* ******************************** */
-static inline uint64_t init_sendtag(MPIU_Context_id_t contextid, int source, int tag, uint64_t type)
+static inline uint64_t init_sendtag(MPIR_Context_id_t contextid, int source, int tag, uint64_t type)
 {
     uint64_t match_bits = 0;
     match_bits |= ((uint64_t)source) << MPID_SOURCE_SHIFT;
@@ -50,7 +50,7 @@ static inline uint64_t init_sendtag(MPIU_Context_id_t contextid, int source, int
 
 /* receive posting */
 static inline uint64_t init_recvtag(uint64_t * mask_bits,
-                                    MPIU_Context_id_t contextid, int source, int tag)
+                                    MPIR_Context_id_t contextid, int source, int tag)
 {
     uint64_t match_bits = 0;
     *mask_bits = MPID_SYNC_SEND;
@@ -112,7 +112,7 @@ static inline int get_port(uint64_t match_bits)
 /* ******************************** */
 /* Tag Manipulation inlines         */
 /* ******************************** */
-static inline uint64_t init_sendtag_2(MPIU_Context_id_t contextid, int tag, uint64_t type)
+static inline uint64_t init_sendtag_2(MPIR_Context_id_t contextid, int tag, uint64_t type)
 {
     uint64_t match_bits = 0;
     match_bits |= ((uint64_t)contextid) << MPID_CTXID_SHIFT;
@@ -122,7 +122,7 @@ static inline uint64_t init_sendtag_2(MPIU_Context_id_t contextid, int tag, uint
 
 /* receive posting */
 static inline uint64_t init_recvtag_2(uint64_t * mask_bits,
-                                    MPIU_Context_id_t contextid, int tag)
+                                    MPIR_Context_id_t contextid, int tag)
 {
     uint64_t match_bits = 0;
     *mask_bits = MPID_SYNC_SEND;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged_template.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged_template.c
index 5cfae09..fbfc0bb 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged_template.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged_template.c
@@ -64,7 +64,7 @@ int ADD_SUFFIX(MPID_nem_ofi_recv_callback)(cq_tagged_entry_t * wc, MPIR_Request
         vc = REQ_OFI(rreq)->vc;
         if (!vc) {      /* MPI_ANY_SOURCE -- Post message from status, complete the VC */
             vc = rreq->comm->dev.vcrt->vcr_table[src];
-            MPIU_Assert(vc);
+            MPIR_Assert(vc);
         }
 #if API_SET == API_SET_1
         ssend_bits = init_sendtag(rreq->dev.match.parts.context_id,
@@ -237,7 +237,7 @@ ADD_SUFFIX(send_lightweight)(struct MPIDI_VC *vc,
     uint64_t match_bits = init_sendtag_2(comm->context_id + context_offset, tag, MPID_NORMAL_SEND);
 #endif
 
-    MPIU_Assert(data_sz <= gl_data.max_buffered_send);
+    MPIR_Assert(data_sz <= gl_data.max_buffered_send);
 
 #if API_SET == API_SET_1
     FI_RC_RETRY(fi_tinject(gl_data.endpoint,
@@ -383,7 +383,7 @@ int ADD_SUFFIX(MPID_nem_ofi_recv_posted)(struct MPIDI_VC *vc, struct MPIR_Reques
     intptr_t data_sz;
     MPI_Aint dt_true_lb;
     MPIDU_Datatype*dt_ptr;
-    MPIU_Context_id_t context_id;
+    MPIR_Context_id_t context_id;
     char *recv_buffer;
     BEGIN_FUNC(FCNAME);
 
@@ -456,6 +456,6 @@ void ADD_SUFFIX(MPID_nem_ofi_anysource_posted)(MPIR_Request * rreq)
     int mpi_errno = MPI_SUCCESS;
     BEGIN_FUNC(FCNAME);
     mpi_errno = ADD_SUFFIX(MPID_nem_ofi_recv_posted)(NULL, rreq);
-    MPIU_Assert(mpi_errno == MPI_SUCCESS);
+    MPIR_Assert(mpi_errno == MPI_SUCCESS);
     END_FUNC(FCNAME);
 }
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h
index 68aab68..2ace101 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h
@@ -69,7 +69,7 @@ static inline MPID_nem_ptl_req_area * REQ_PTL(MPIR_Request *req) {
 
 #define MPID_nem_ptl_request_create_sreq(sreq_, errno_, comm_) do {                                             \
         (sreq_) = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);               \
-        MPIU_Object_set_ref((sreq_), 2);                                                                        \
+        MPIR_Object_set_ref((sreq_), 2);                                                                        \
         (sreq_)->kind               = MPIR_REQUEST_KIND__SEND;                                                        \
         MPIR_Comm_add_ref(comm_);                                                                               \
         (sreq_)->comm               = comm_;                                                                    \
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c
index 9f7d691..49b1311 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c
@@ -104,9 +104,9 @@ static int get_target_info(int rank, ptl_process_t *id, ptl_pt_index_t local_dat
     int mpi_errno = MPI_SUCCESS;
     struct MPIDI_VC *vc;
     MPID_nem_ptl_vc_area *vc_ptl;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_GET_TARGET_INFO);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_GET_TARGET_INFO);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_GET_TARGET_INFO);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_GET_TARGET_INFO);
 
     MPIDI_PG_Get_vc(MPIDI_Process.my_pg, rank, &vc);
     vc_ptl = VC_PTL(vc);
@@ -117,7 +117,7 @@ static int get_target_info(int rank, ptl_process_t *id, ptl_pt_index_t local_dat
 
     *id = vc_ptl->id;
 
-    MPIU_Assert(local_data_pt == MPIDI_nem_ptl_pt || local_data_pt == MPIDI_nem_ptl_get_pt ||
+    MPIR_Assert(local_data_pt == MPIDI_nem_ptl_pt || local_data_pt == MPIDI_nem_ptl_get_pt ||
                 local_data_pt == MPIDI_nem_ptl_control_pt);
 
     if (local_data_pt == MPIDI_nem_ptl_pt) {
@@ -134,7 +134,7 @@ static int get_target_info(int rank, ptl_process_t *id, ptl_pt_index_t local_dat
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_GET_TARGET_INFO);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_GET_TARGET_INFO);
     return mpi_errno;
 
  fn_fail:
@@ -152,19 +152,19 @@ static int ptl_init(MPIDI_PG_t *pg_p, int pg_rank, char **bc_val_p, int *val_max
     int ret;
     ptl_md_t md;
     ptl_ni_limits_t desired;
-    MPIDI_STATE_DECL(MPID_STATE_PTL_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_PTL_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_PTL_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_PTL_INIT);
 
     /* first make sure that our private fields in the vc and req fit into the area provided  */
-    MPIU_Assert(sizeof(MPID_nem_ptl_vc_area) <= MPIDI_NEM_VC_NETMOD_AREA_LEN);
-    MPIU_Assert(sizeof(MPID_nem_ptl_req_area) <= MPIDI_NEM_REQ_NETMOD_AREA_LEN);
+    MPIR_Assert(sizeof(MPID_nem_ptl_vc_area) <= MPIDI_NEM_VC_NETMOD_AREA_LEN);
+    MPIR_Assert(sizeof(MPID_nem_ptl_req_area) <= MPIDI_NEM_REQ_NETMOD_AREA_LEN);
 
     /* Make sure our IOV is the same as portals4's IOV */
-    MPIU_Assert(sizeof(ptl_iovec_t) == sizeof(MPL_IOV));
-    MPIU_Assert(((void*)&(((ptl_iovec_t*)0)->iov_base)) == ((void*)&(((MPL_IOV*)0)->MPL_IOV_BUF)));
-    MPIU_Assert(((void*)&(((ptl_iovec_t*)0)->iov_len))  == ((void*)&(((MPL_IOV*)0)->MPL_IOV_LEN)));
-    MPIU_Assert(sizeof(((ptl_iovec_t*)0)->iov_len) == sizeof(((MPL_IOV*)0)->MPL_IOV_LEN));
+    MPIR_Assert(sizeof(ptl_iovec_t) == sizeof(MPL_IOV));
+    MPIR_Assert(((void*)&(((ptl_iovec_t*)0)->iov_base)) == ((void*)&(((MPL_IOV*)0)->MPL_IOV_BUF)));
+    MPIR_Assert(((void*)&(((ptl_iovec_t*)0)->iov_len))  == ((void*)&(((MPL_IOV*)0)->MPL_IOV_LEN)));
+    MPIR_Assert(sizeof(((ptl_iovec_t*)0)->iov_len) == sizeof(((MPL_IOV*)0)->MPL_IOV_LEN));
             
 
     mpi_errno = MPIDI_CH3I_Register_anysource_notification(MPID_nem_ptl_anysource_posted, MPID_nem_ptl_anysource_matched);
@@ -286,7 +286,7 @@ static int ptl_init(MPIDI_PG_t *pg_p, int pg_rank, char **bc_val_p, int *val_max
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_PTL_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_PTL_INIT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -301,8 +301,8 @@ static int ptl_finalize(void)
     int mpi_errno = MPI_SUCCESS;
     int ret;
     ptl_handle_eq_t eqs[5];
-    MPIDI_STATE_DECL(MPID_STATE_PTL_FINALIZE);
-    MPIDI_FUNC_ENTER(MPID_STATE_PTL_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_PTL_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_PTL_FINALIZE);
 
     /* shut down other modules */
     mpi_errno = MPID_nem_ptl_nm_finalize();
@@ -353,7 +353,7 @@ static int ptl_finalize(void)
     PtlFini();
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_PTL_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_PTL_FINALIZE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -372,9 +372,9 @@ static int get_business_card(int my_rank, char **bc_val_p, int *val_max_sz_p)
     int str_errno = MPL_STR_SUCCESS;
     int ret;
     ptl_process_t my_ptl_id;
-    MPIDI_STATE_DECL(MPID_STATE_GET_BUSINESS_CARD);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_GET_BUSINESS_CARD);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_GET_BUSINESS_CARD);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_GET_BUSINESS_CARD);
 
     ret = PtlGetId(MPIDI_nem_ptl_ni, &my_ptl_id);
     MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlgetid", "**ptlgetid %s", MPID_nem_ptl_strerror(ret));
@@ -428,7 +428,7 @@ static int get_business_card(int my_rank, char **bc_val_p, int *val_max_sz_p)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_GET_BUSINESS_CARD);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_GET_BUSINESS_CARD);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -441,14 +441,14 @@ static int get_business_card(int my_rank, char **bc_val_p, int *val_max_sz_p)
 static int connect_to_root(const char *business_card, MPIDI_VC_t *new_vc)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_CONNECT_TO_ROOT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONNECT_TO_ROOT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CONNECT_TO_ROOT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONNECT_TO_ROOT);
 
     MPIR_ERR_SETFATAL(mpi_errno, MPI_ERR_OTHER, "**notimpl");
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_CONNECT_TO_ROOT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONNECT_TO_ROOT);
     return mpi_errno;
 
  fn_fail:
@@ -464,9 +464,9 @@ static int vc_init(MPIDI_VC_t *vc)
     int mpi_errno = MPI_SUCCESS;
     MPIDI_CH3I_VC *const vc_ch = &vc->ch;
     MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
-    MPIDI_STATE_DECL(MPID_STATE_VC_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_VC_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_VC_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_VC_INIT);
 
     vc->sendNoncontig_fn   = MPID_nem_ptl_SendNoncontig;
     vc_ch->iStartContigMsg = MPID_nem_ptl_iStartContigMsg;
@@ -496,7 +496,7 @@ static int vc_init(MPIDI_VC_t *vc)
 
     mpi_errno = MPID_nem_ptl_init_id(vc);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_VC_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_VC_INIT);
     return mpi_errno;
 }
 
@@ -523,9 +523,9 @@ int MPID_nem_ptl_get_id_from_bc(const char *business_card, ptl_process_t *id, pt
     int mpi_errno = MPI_SUCCESS;
     int ret;
     int len;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_GET_ID_FROM_BC);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_GET_ID_FROM_BC);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_GET_ID_FROM_BC);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_GET_ID_FROM_BC);
 
     ret = MPL_str_get_binary_arg(business_card, NID_KEY, (char *)&id->phys.nid, sizeof(id->phys.nid), &len);
     MPIR_ERR_CHKANDJUMP(ret != MPL_STR_SUCCESS || len != sizeof(id->phys.nid), mpi_errno, MPI_ERR_OTHER, "**badbusinesscard");
@@ -552,7 +552,7 @@ int MPID_nem_ptl_get_id_from_bc(const char *business_card, ptl_process_t *id, pt
     MPIR_ERR_CHKANDJUMP(ret != MPL_STR_SUCCESS || len != sizeof(*ptrc), mpi_errno, MPI_ERR_OTHER, "**badbusinesscard");
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_GET_ID_FROM_BC);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_GET_ID_FROM_BC);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -567,9 +567,9 @@ int vc_terminate(MPIDI_VC_t *vc)
     int mpi_errno = MPI_SUCCESS;
     int req_errno = MPI_SUCCESS;
     MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
-    MPIDI_STATE_DECL(MPID_STATE_VC_TERMINATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_VC_TERMINATE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_VC_TERMINATE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_VC_TERMINATE);
 
      if (vc->state != MPIDI_VC_STATE_CLOSED) {
         /* VC is terminated as a result of a fault.  Complete
@@ -587,7 +587,7 @@ int vc_terminate(MPIDI_VC_t *vc)
      }
     
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_VC_TERMINATE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_VC_TERMINATE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -603,15 +603,15 @@ int MPID_nem_ptl_vc_terminated(MPIDI_VC_t *vc)
     /* This is called when the VC is to be terminated once all queued
        sends have been sent. */
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_NEM_PTL_VC_TERMINATED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_NEM_PTL_VC_TERMINATED);
 
-    MPIDI_FUNC_ENTER(MPID_NEM_PTL_VC_TERMINATED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_NEM_PTL_VC_TERMINATED);
 
     mpi_errno = MPIDI_CH3U_Handle_connection(vc, MPIDI_VC_EVENT_TERMINATED);
     if(mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_NEM_PTL_VC_TERMINATED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_NEM_PTL_VC_TERMINATED);
     return mpi_errno;
  fn_fail:
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
@@ -629,14 +629,14 @@ int MPID_nem_ptl_init_id(MPIDI_VC_t *vc)
     char *bc;
     int pmi_errno;
     int val_max_sz;
-    MPIU_CHKLMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_INIT_ID);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_INIT_ID);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_INIT_ID);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_INIT_ID);
 
     pmi_errno = PMI_KVS_Get_value_length_max(&val_max_sz);
     MPIR_ERR_CHKANDJUMP1(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %d", pmi_errno);
-    MPIU_CHKLMEM_MALLOC(bc, char *, val_max_sz, mpi_errno, "bc");
+    MPIR_CHKLMEM_MALLOC(bc, char *, val_max_sz, mpi_errno, "bc");
 
     mpi_errno = vc->pg->getConnInfo(vc->pg_rank, bc, val_max_sz, vc->pg);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -649,8 +649,8 @@ int MPID_nem_ptl_init_id(MPIDI_VC_t *vc)
     MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
     
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_INIT_ID);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_INIT_ID);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_lmt.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_lmt.c
index f6cdc06..c9ed406 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_lmt.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_lmt.c
@@ -32,7 +32,7 @@ int MPID_nem_ptl_lmt_initiate_lmt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *rts_pkt, MPIR
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPID_nem_ptl_lmt_start_send(MPIDI_VC_t *vc, MPIR_Request *sreq, MPL_IOV r_cookie)
 {
-    MPIU_Assertp(0 && "This function shouldn't be called.");
+    MPIR_Assertp(0 && "This function shouldn't be called.");
     return MPI_ERR_INTERN;
 }
 
@@ -43,7 +43,7 @@ int MPID_nem_ptl_lmt_start_send(MPIDI_VC_t *vc, MPIR_Request *sreq, MPL_IOV r_co
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPID_nem_ptl_lmt_handle_cookie(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV s_cookie)
 {
-    MPIU_Assertp(0 && "This function shouldn't be called.");
+    MPIR_Assertp(0 && "This function shouldn't be called.");
     return MPI_ERR_INTERN;
 }
 
@@ -54,7 +54,7 @@ int MPID_nem_ptl_lmt_handle_cookie(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV s_
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPID_nem_ptl_lmt_done_send(MPIDI_VC_t *vc, MPIR_Request *req)
 {
-    MPIU_Assertp(0 && "This function shouldn't be called.");
+    MPIR_Assertp(0 && "This function shouldn't be called.");
     return MPI_ERR_INTERN;
 }
 
@@ -65,6 +65,6 @@ int MPID_nem_ptl_lmt_done_send(MPIDI_VC_t *vc, MPIR_Request *req)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPID_nem_ptl_lmt_done_recv(MPIDI_VC_t *vc, MPIR_Request *req)
 {
-    MPIU_Assertp(0 && "This function shouldn't be called.");
+    MPIR_Assertp(0 && "This function shouldn't be called.");
     return MPI_ERR_INTERN;
 }
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_nm.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_nm.c
index 2a0cf8e..8ed7100 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_nm.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_nm.c
@@ -96,9 +96,9 @@ int MPID_nem_ptl_nm_init(void)
     int i;
     int ret;
     char *tmp_ptr;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_NM_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_NM_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_NM_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_NM_INIT);
 
     /* init recv */
     overflow_me.length = BUFSIZE;
@@ -148,7 +148,7 @@ int MPID_nem_ptl_nm_init(void)
     reorder_queue.head = reorder_queue.tail = NULL;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_NM_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_NM_INIT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -163,9 +163,9 @@ int MPID_nem_ptl_nm_finalize(void)
     int mpi_errno = MPI_SUCCESS;
     int ret;
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_NM_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_NM_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_NM_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_NM_FINALIZE);
 
     for (i = 0; i < NUM_RECV_BUFS; ++i) {
         ret = PtlMEUnlink(me_handles[i]);
@@ -181,7 +181,7 @@ int MPID_nem_ptl_nm_finalize(void)
     MPL_free(recvbufs[0]);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_NM_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_NM_FINALIZE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -202,18 +202,18 @@ static inline int send_pkt(MPIDI_VC_t *vc, void *hdr_p, void *data_p, intptr_t d
     const size_t remaining = data_sz - sent_sz;
     const size_t sendbuf_sz = SENDBUF_SIZE(sent_sz+sreq->dev.ext_hdr_sz+(remaining?sizeof(ptl_size_t):0));
     ptl_match_bits_t match_bits = NPTL_MATCH(sreq->handle, CTL_TAG, MPIDI_Process.my_pg_rank);
-    MPIDI_STATE_DECL(MPID_STATE_SEND_PKT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SEND_PKT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_SEND_PKT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_SEND_PKT);
     
     sendbuf = MPL_malloc(sendbuf_sz);
-    MPIU_Assert(sendbuf != NULL);
-    MPIU_Memcpy(sendbuf, hdr_p, sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(sendbuf != NULL);
+    MPIR_Memcpy(sendbuf, hdr_p, sizeof(MPIDI_CH3_Pkt_t));
     sendbuf_ptr = sendbuf + sizeof(MPIDI_CH3_Pkt_t);
 
     if (sreq->dev.ext_hdr_sz > 0) {
         /* copy extended packet header to send buf */
-        MPIU_Memcpy(sendbuf_ptr, sreq->dev.ext_hdr_ptr, sreq->dev.ext_hdr_sz);
+        MPIR_Memcpy(sendbuf_ptr, sreq->dev.ext_hdr_ptr, sreq->dev.ext_hdr_sz);
         sendbuf_ptr += sreq->dev.ext_hdr_sz;
     }
 
@@ -222,7 +222,7 @@ static inline int send_pkt(MPIDI_VC_t *vc, void *hdr_p, void *data_p, intptr_t d
     REQ_PTL(sreq)->put_done = 0;
 
     if (data_sz) {
-        MPIU_Memcpy(sendbuf_ptr, data_p, sent_sz);
+        MPIR_Memcpy(sendbuf_ptr, data_p, sent_sz);
         sendbuf_ptr += sent_sz;
         if (remaining) {
             /* The address/offset for the remote side to do the get is last in the buffer */
@@ -248,7 +248,7 @@ static inline int send_pkt(MPIDI_VC_t *vc, void *hdr_p, void *data_p, intptr_t d
     vc_ptl->num_queued_sends++;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_SEND_PKT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_SEND_PKT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -269,17 +269,17 @@ static int send_noncontig_pkt(MPIDI_VC_t *vc, MPIR_Request *sreq, void *hdr_p)
     const size_t remaining = data_sz - sent_sz;
     const size_t sendbuf_sz = SENDBUF_SIZE(sent_sz+sreq->dev.ext_hdr_sz+(remaining?sizeof(ptl_size_t):0));
     ptl_match_bits_t match_bits = NPTL_MATCH(sreq->handle, CTL_TAG, MPIDI_Process.my_pg_rank);
-    MPIDI_STATE_DECL(MPID_STATE_SEND_NONCONTIG_PKT);
-    MPIDI_FUNC_ENTER(MPID_STATE_SEND_NONCONTIG_PKT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SEND_NONCONTIG_PKT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_SEND_NONCONTIG_PKT);
 
     sendbuf = MPL_malloc(sendbuf_sz);
-    MPIU_Assert(sendbuf != NULL);
-    MPIU_Memcpy(sendbuf, hdr_p, sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(sendbuf != NULL);
+    MPIR_Memcpy(sendbuf, hdr_p, sizeof(MPIDI_CH3_Pkt_t));
     sendbuf_ptr = sendbuf + sizeof(MPIDI_CH3_Pkt_t);
 
     if (sreq->dev.ext_hdr_sz > 0) {
         /* copy extended packet header to send buf */
-        MPIU_Memcpy(sendbuf_ptr, sreq->dev.ext_hdr_ptr, sreq->dev.ext_hdr_sz);
+        MPIR_Memcpy(sendbuf_ptr, sreq->dev.ext_hdr_ptr, sreq->dev.ext_hdr_sz);
         sendbuf_ptr += sreq->dev.ext_hdr_sz;
     }
 
@@ -300,7 +300,7 @@ static int send_noncontig_pkt(MPIDI_VC_t *vc, MPIR_Request *sreq, void *hdr_p)
             first = last;
             last = sreq->dev.segment_size;
             MPIDU_Segment_pack(sreq->dev.segment_ptr, first, &last, TMPBUF(sreq));
-            MPIU_Assert(last == sreq->dev.segment_size);
+            MPIR_Assert(last == sreq->dev.segment_size);
 
             REQ_PTL(sreq)->num_gets = remaining / MPIDI_nem_ptl_ni_limits.max_msg_size;
             if (remaining % MPIDI_nem_ptl_ni_limits.max_msg_size) REQ_PTL(sreq)->num_gets++;
@@ -322,7 +322,7 @@ static int send_noncontig_pkt(MPIDI_VC_t *vc, MPIR_Request *sreq, void *hdr_p)
     vc_ptl->num_queued_sends++;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_SEND_NONCONTIG_PKT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_SEND_NONCONTIG_PKT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -336,16 +336,16 @@ static int send_noncontig_pkt(MPIDI_VC_t *vc, MPIR_Request *sreq, void *hdr_p)
 int MPID_nem_ptl_SendNoncontig(MPIDI_VC_t *vc, MPIR_Request *sreq, void *hdr, intptr_t hdr_sz)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_SENDNONCONTIG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_SENDNONCONTIG);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_SENDNONCONTIG);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_SENDNONCONTIG);
     
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
     mpi_errno = send_noncontig_pkt(vc, sreq, hdr);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_SENDNONCONTIG);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_SENDNONCONTIG);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -359,15 +359,15 @@ int MPID_nem_ptl_iStartContigMsg(MPIDI_VC_t *vc, void *hdr, intptr_t hdr_sz, voi
                                  intptr_t data_sz, MPIR_Request **sreq_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ISTARTCONTIGMSG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ISTARTCONTIGMSG);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ISTARTCONTIGMSG);
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_ISTARTCONTIGMSG);
+    MPIR_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
 
     /* create a request */
     *sreq_ptr = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-    MPIU_Assert(*sreq_ptr != NULL);
-    MPIU_Object_set_ref(*sreq_ptr, 2);
+    MPIR_Assert(*sreq_ptr != NULL);
+    MPIR_Object_set_ref(*sreq_ptr, 2);
     (*sreq_ptr)->kind = MPIR_REQUEST_KIND__SEND;
     (*sreq_ptr)->dev.OnDataAvail = NULL;
     (*sreq_ptr)->dev.user_buf = NULL;
@@ -376,7 +376,7 @@ int MPID_nem_ptl_iStartContigMsg(MPIDI_VC_t *vc, void *hdr, intptr_t hdr_sz, voi
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ISTARTCONTIGMSG);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_ISTARTCONTIGMSG);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -390,16 +390,16 @@ int MPID_nem_ptl_iSendContig(MPIDI_VC_t *vc, MPIR_Request *sreq, void *hdr, intp
                                void *data, intptr_t data_sz)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ISENDCONTIG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ISENDCONTIG);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ISENDCONTIG);
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_ISENDCONTIG);
+    MPIR_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
     
     mpi_errno = send_pkt(vc, hdr, data, data_sz, sreq);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ISENDCONTIG);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_ISENDCONTIG);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -416,9 +416,9 @@ static inline int on_data_avail(MPIR_Request * req)
     MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_ON_DATA_AVAIL);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_ON_DATA_AVAIL);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_ON_DATA_AVAIL);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_ON_DATA_AVAIL);
 
     reqFn = req->dev.OnDataAvail;
     if (!reqFn) {
@@ -432,7 +432,7 @@ static inline int on_data_avail(MPIR_Request * req)
     else {
         int complete;
         reqFn(vc, req, &complete);
-        MPIU_Assert(complete == TRUE);
+        MPIR_Assert(complete == TRUE);
     }
 
     vc_ptl->num_queued_sends--;
@@ -441,7 +441,7 @@ static inline int on_data_avail(MPIR_Request * req)
         MPID_nem_ptl_vc_terminated(vc);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_ON_DATA_AVAIL);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_ON_DATA_AVAIL);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -454,9 +454,9 @@ static inline int on_data_avail(MPIR_Request * req)
 int MPID_nem_ptl_nm_ctl_event_handler(const ptl_event_t *e)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_NM_CTL_EVENT_HANDLER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_NM_CTL_EVENT_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_NM_CTL_EVENT_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_NM_CTL_EVENT_HANDLER);
 
     switch(e->type) {
 
@@ -487,8 +487,8 @@ int MPID_nem_ptl_nm_ctl_event_handler(const ptl_event_t *e)
                     MPIR_Request *req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
                     /* This request is actually complete; just needs to wait to enforce ordering */
                     TMPBUF(req) = MPL_malloc(packet_sz);
-                    MPIU_Assert(TMPBUF(req));
-                    MPIU_Memcpy(TMPBUF(req), e->start, packet_sz);
+                    MPIR_Assert(TMPBUF(req));
+                    MPIR_Memcpy(TMPBUF(req), e->start, packet_sz);
                     REQ_PTL(req)->bytes_put = packet_sz;
                     req->ch.vc = vc;
                     REQ_PTL(req)->recv_ptr = e->start;
@@ -502,13 +502,13 @@ int MPID_nem_ptl_nm_ctl_event_handler(const ptl_event_t *e)
                 ptl_size_t target_offset;
 
                 MPIR_Request *req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-                MPIU_Assert(req != NULL);
+                MPIR_Assert(req != NULL);
                 MPIDI_CH3U_Request_decrement_cc(req, &incomplete);  /* We'll increment it below */
                 REQ_PTL(req)->event_handler = MPID_nem_ptl_nm_ctl_event_handler;
                 REQ_PTL(req)->bytes_put = packet_sz + remaining - sizeof(ptl_size_t);
                 TMPBUF(req) = MPL_malloc(REQ_PTL(req)->bytes_put);
-                MPIU_Assert(TMPBUF(req) != NULL);
-                MPIU_Memcpy(TMPBUF(req), e->start, packet_sz);
+                MPIR_Assert(TMPBUF(req) != NULL);
+                MPIR_Memcpy(TMPBUF(req), e->start, packet_sz);
                 REQ_PTL(req)->recv_ptr = e->start;
                 req->ch.vc = vc;
 
@@ -619,7 +619,7 @@ int MPID_nem_ptl_nm_ctl_event_handler(const ptl_event_t *e)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_NM_CTL_EVENT_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_NM_CTL_EVENT_HANDLER);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_poll.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_poll.c
index b580671..b3ad4b3 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_poll.c
@@ -23,25 +23,25 @@ int MPID_nem_ptl_poll_init(void)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPIU_CHKPMEM_DECL(NUM_OVERFLOW_ME);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_POLL_INIT);
+    MPIR_CHKPMEM_DECL(NUM_OVERFLOW_ME);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_POLL_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_POLL_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_POLL_INIT);
 
     /* create overflow buffers */
     for (i = 0; i < NUM_OVERFLOW_ME; ++i) {
-        MPIU_CHKPMEM_MALLOC(overflow_buf[i], void *, OVERFLOW_LENGTH, mpi_errno, "overflow buffer");
+        MPIR_CHKPMEM_MALLOC(overflow_buf[i], void *, OVERFLOW_LENGTH, mpi_errno, "overflow buffer");
         mpi_errno = append_overflow(i);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
     
  fn_exit:
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit2:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_POLL_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_POLL_INIT);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit2;
 }
 
@@ -56,9 +56,9 @@ int MPID_nem_ptl_poll_finalize(void)
     int mpi_errno = MPI_SUCCESS;
     int i;
     int ret;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_POLL_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_POLL_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_POLL_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_POLL_FINALIZE);
     
     for (i = 0; i < NUM_OVERFLOW_ME; ++i) {
         if (overflow_me_handle[i] != PTL_INVALID_HANDLE) {
@@ -69,7 +69,7 @@ int MPID_nem_ptl_poll_finalize(void)
     }
     
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_POLL_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_POLL_FINALIZE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -85,11 +85,11 @@ static int append_overflow(int i)
     int ret;
     ptl_me_t me;
     ptl_process_t id_any;
-    MPIDI_STATE_DECL(MPID_STATE_APPEND_OVERFLOW);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_APPEND_OVERFLOW);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_APPEND_OVERFLOW);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_APPEND_OVERFLOW);
 
-    MPIU_Assert(i >= 0 && i < NUM_OVERFLOW_ME);
+    MPIR_Assert(i >= 0 && i < NUM_OVERFLOW_ME);
     
     id_any.phys.pid = PTL_PID_ANY;
     id_any.phys.nid = PTL_NID_ANY;
@@ -111,7 +111,7 @@ static int append_overflow(int i)
     MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_APPEND_OVERFLOW);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_APPEND_OVERFLOW);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -127,16 +127,16 @@ int MPID_nem_ptl_poll(int is_blocking_poll)
     int mpi_errno = MPI_SUCCESS;
     ptl_event_t event;
     int ret;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_POLL);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_POLL);
 
-    /* MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_POLL); */
+    /* MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_POLL); */
 
     while (1) {
         int ctl_event = FALSE;
 
         /* Check the rptls EQ first. It should never return an event. */
         ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_rpt_eq, &event);
-        MPIU_Assert(ret == PTL_EQ_EMPTY);
+        MPIR_Assert(ret == PTL_EQ_EMPTY);
 
         /* check EQs for events */
         ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_eq, &event);
@@ -210,7 +210,7 @@ int MPID_nem_ptl_poll(int is_blocking_poll)
     }
 
  fn_exit:
-    /* MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_POLL); */
+    /* MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_POLL); */
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_probe.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_probe.c
index 006c5ba..655ab2f 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_probe.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_probe.c
@@ -14,9 +14,9 @@ static int handle_probe(const ptl_event_t *e)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *const req = e->user_ptr;
-    MPIDI_STATE_DECL(MPID_STATE_HANDLE_PROBE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_HANDLE_PROBE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_HANDLE_PROBE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_HANDLE_PROBE);
 
     if (e->ni_fail_type == PTL_NI_NO_MATCH) {
         REQ_PTL(req)->found = FALSE;
@@ -35,7 +35,7 @@ static int handle_probe(const ptl_event_t *e)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_HANDLE_PROBE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_HANDLE_PROBE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -45,10 +45,10 @@ static int handle_mprobe(const ptl_event_t *e)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *const req = e->user_ptr;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_HANDLE_PROBE);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_HANDLE_PROBE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_HANDLE_PROBE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_HANDLE_PROBE);
 
     if (e->ni_fail_type == PTL_NI_NO_MATCH) {
         REQ_PTL(req)->found = FALSE;
@@ -61,15 +61,15 @@ static int handle_mprobe(const ptl_event_t *e)
     MPIR_STATUS_SET_COUNT(req->status, NPTL_HEADER_GET_LENGTH(e->hdr_data));
     MPIDI_Request_set_sync_send_flag(req, e->hdr_data & NPTL_SSEND);
 
-    MPIU_CHKPMEM_MALLOC(req->dev.tmpbuf, void *, e->mlength, mpi_errno, "tmpbuf");
-    MPIU_Memcpy((char *)req->dev.tmpbuf, e->start, e->mlength);
+    MPIR_CHKPMEM_MALLOC(req->dev.tmpbuf, void *, e->mlength, mpi_errno, "tmpbuf");
+    MPIR_Memcpy((char *)req->dev.tmpbuf, e->start, e->mlength);
     req->dev.recv_data_sz = e->mlength;
 
     if (!(e->hdr_data & NPTL_LARGE)) {
         MPIDI_Request_set_msg_type(req, MPIDI_REQUEST_EAGER_MSG);
     }
     else {
-        MPIU_Assert (e->mlength == PTL_LARGE_THRESHOLD);
+        MPIR_Assert (e->mlength == PTL_LARGE_THRESHOLD);
         req->dev.match.parts.tag = req->status.MPI_TAG;
         req->dev.match.parts.context_id = NPTL_MATCH_GET_CTX(e->match_bits);
         req->dev.match.parts.rank = req->status.MPI_SOURCE;
@@ -88,11 +88,11 @@ static int handle_mprobe(const ptl_event_t *e)
     }
 
   fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_HANDLE_PROBE);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_HANDLE_PROBE);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -103,7 +103,7 @@ static int handle_mprobe(const ptl_event_t *e)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPID_nem_ptl_probe(MPIDI_VC_t *vc, int source, int tag, MPIR_Comm *comm, int context_offset, MPI_Status *status)
 {
-    MPIU_Assertp(0 && "This function shouldn't be called.");
+    MPIR_Assertp(0 && "This function shouldn't be called.");
     return MPI_SUCCESS;
 }
 
@@ -119,9 +119,9 @@ int MPID_nem_ptl_iprobe(MPIDI_VC_t *vc, int source, int tag, MPIR_Comm *comm, in
     ptl_process_t id_any;
     ptl_me_t me;
     MPIR_Request *req;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_IPROBE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_IPROBE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_IPROBE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_IPROBE);
 
     id_any.phys.nid = PTL_NID_ANY;
     id_any.phys.pid = PTL_PID_ANY;
@@ -129,7 +129,7 @@ int MPID_nem_ptl_iprobe(MPIDI_VC_t *vc, int source, int tag, MPIR_Comm *comm, in
     /* create a request */
     req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
     MPIR_ERR_CHKANDJUMP1(!req, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIR_Request_create");
-    MPIU_Object_set_ref(req, 2); /* 1 ref for progress engine and 1 ref for us */
+    MPIR_Object_set_ref(req, 2); /* 1 ref for progress engine and 1 ref for us */
     REQ_PTL(req)->event_handler = handle_probe;
 
     /* create a dummy ME to use for searching the list */
@@ -174,7 +174,7 @@ int MPID_nem_ptl_iprobe(MPIDI_VC_t *vc, int source, int tag, MPIR_Comm *comm, in
     MPIR_Request_free(req);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_IPROBE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_IPROBE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -194,9 +194,9 @@ int MPID_nem_ptl_improbe(MPIDI_VC_t *vc, int source, int tag, MPIR_Comm *comm, i
     ptl_me_t me;
     MPIR_Request *req;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_IMPROBE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_IMPROBE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_IMPROBE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_IMPROBE);
 
     id_any.phys.nid = PTL_NID_ANY;
     id_any.phys.pid = PTL_PID_ANY;
@@ -205,7 +205,7 @@ int MPID_nem_ptl_improbe(MPIDI_VC_t *vc, int source, int tag, MPIR_Comm *comm, i
     req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
     MPID_nem_ptl_init_req(req);
     MPIR_ERR_CHKANDJUMP1(!req, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIR_Request_create");
-    MPIU_Object_set_ref(req, 2); /* 1 ref for progress engine and 1 ref for us */
+    MPIR_Object_set_ref(req, 2); /* 1 ref for progress engine and 1 ref for us */
     REQ_PTL(req)->event_handler = handle_mprobe;
     req->kind = MPIR_REQUEST_KIND__MPROBE;
 
@@ -255,7 +255,7 @@ int MPID_nem_ptl_improbe(MPIDI_VC_t *vc, int source, int tag, MPIR_Comm *comm, i
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_IMPROBE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_IMPROBE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -268,14 +268,14 @@ int MPID_nem_ptl_improbe(MPIDI_VC_t *vc, int source, int tag, MPIR_Comm *comm, i
 int MPID_nem_ptl_anysource_iprobe(int tag, MPIR_Comm * comm, int context_offset, int *flag, MPI_Status * status)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IPROBE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IPROBE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IPROBE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IPROBE);
 
     return MPID_nem_ptl_iprobe(NULL, MPI_ANY_SOURCE, tag, comm, context_offset, flag, status);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IPROBE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IPROBE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -289,14 +289,14 @@ int MPID_nem_ptl_anysource_improbe(int tag, MPIR_Comm * comm, int context_offset
                                    MPI_Status * status)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IMPROBE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IMPROBE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IMPROBE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IMPROBE);
 
     return MPID_nem_ptl_improbe(NULL, MPI_ANY_SOURCE, tag, comm, context_offset, flag, message, status);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IMPROBE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_IMPROBE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -326,7 +326,7 @@ int MPID_nem_ptl_pkt_cancel_send_req_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pk
     search_req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
     MPID_nem_ptl_init_req(search_req);
     MPIR_ERR_CHKANDJUMP1(!search_req, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIR_Request_create");
-    MPIU_Object_set_ref(search_req, 2); /* 1 ref for progress engine and 1 ref for us */
+    MPIR_Object_set_ref(search_req, 2); /* 1 ref for progress engine and 1 ref for us */
     search_req->kind = MPIR_REQUEST_KIND__MPROBE;
 
     /* create a dummy ME to use for searching the list */
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
index 4f5953e..5eb460e 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
@@ -25,7 +25,7 @@ static void dequeue_req(const ptl_event_t *e)
     /* an MPI_ANY_SOURCE request may have been previously removed from the
        CH3 queue by an FDP (find and dequeue posted) operation */
     if (rreq->dev.match.parts.rank != MPI_ANY_SOURCE)
-        MPIU_Assert(found);
+        MPIR_Assert(found);
 
     rreq->status.MPI_ERROR = MPI_SUCCESS;
     rreq->status.MPI_SOURCE = NPTL_MATCH_GET_RANK(e->match_bits);
@@ -55,11 +55,11 @@ static int handler_recv_complete(const ptl_event_t *e)
     MPIR_Request *const rreq = e->user_ptr;
     int ret;
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_COMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_HANDLER_RECV_COMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_COMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_HANDLER_RECV_COMPLETE);
     
-    MPIU_Assert(e->type == PTL_EVENT_REPLY || e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW);
+    MPIR_Assert(e->type == PTL_EVENT_REPLY || e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW);
 
     if (REQ_PTL(rreq)->md != PTL_INVALID_HANDLE) {
         ret = PtlMDRelease(REQ_PTL(rreq)->md);
@@ -76,7 +76,7 @@ static int handler_recv_complete(const ptl_event_t *e)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_COMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_HANDLER_RECV_COMPLETE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -96,11 +96,11 @@ static int handler_recv_dequeue_complete(const ptl_event_t *e)
     intptr_t data_sz;
     MPIDU_Datatype*dt_ptr ATTRIBUTE((unused));
 
-    MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_COMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_COMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_COMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_COMPLETE);
 
-    MPIU_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW);
+    MPIR_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW);
 
     MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, is_contig, data_sz, dt_ptr, dt_true_lb);
     
@@ -111,7 +111,7 @@ static int handler_recv_dequeue_complete(const ptl_event_t *e)
         MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "is_contig = %d", is_contig);
 
         if (is_contig) {
-            MPIU_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, e->start, e->mlength);
+            MPIR_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, e->start, e->mlength);
         } else {
             last = e->mlength;
             MPIDU_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, e->start);
@@ -132,7 +132,7 @@ static int handler_recv_dequeue_complete(const ptl_event_t *e)
     mpi_errno = handler_recv_complete(e);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_COMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_COMPLETE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -148,11 +148,11 @@ static int handler_recv_big_get(const ptl_event_t *e)
     MPIR_Request *const rreq = e->user_ptr;
     MPI_Aint last;
 
-    MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_UNPACK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_HANDLER_RECV_UNPACK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_UNPACK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_HANDLER_RECV_UNPACK);
 
-    MPIU_Assert(e->type == PTL_EVENT_REPLY);
+    MPIR_Assert(e->type == PTL_EVENT_REPLY);
 
     /* decrement the number of remaining gets */
     REQ_PTL(rreq)->num_gets--;
@@ -161,7 +161,7 @@ static int handler_recv_big_get(const ptl_event_t *e)
         if (REQ_PTL(rreq)->chunk_buffer[0]) {
             last = rreq->dev.segment_size;
             MPIDU_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, REQ_PTL(rreq)->chunk_buffer[0]);
-            MPIU_Assert(last == rreq->dev.segment_size);
+            MPIR_Assert(last == rreq->dev.segment_size);
         }
         mpi_errno = handler_recv_complete(e);
     }
@@ -169,7 +169,7 @@ static int handler_recv_big_get(const ptl_event_t *e)
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_UNPACK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_HANDLER_RECV_UNPACK);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -198,7 +198,7 @@ static void big_get(void *buf, ptl_size_t left_to_get, MPIDI_VC_t *vc, ptl_match
         ret = MPID_nem_ptl_rptl_get(MPIDI_nem_ptl_global_md, start, get_sz, vc_ptl->id, vc_ptl->ptg, match_bits, 0, rreq);
         DBG_MSG_GET("global", get_sz, vc->pg_rank, match_bits);
         MPL_DBG_MSG_P(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "   buf=%p", (char *)start);
-        MPIU_Assert(ret == 0);
+        MPIR_Assert(ret == 0);
 
         /* account for what has been sent */
         start += get_sz;
@@ -218,11 +218,11 @@ static int handler_recv_unpack_complete(const ptl_event_t *e)
     void *buf;
     MPI_Aint last;
 
-    MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_UNPACK_COMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_HANDLER_RECV_UNPACK_COMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_UNPACK_COMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_HANDLER_RECV_UNPACK_COMPLETE);
     
-    MPIU_Assert(e->type == PTL_EVENT_REPLY || e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW);
+    MPIR_Assert(e->type == PTL_EVENT_REPLY || e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW);
 
     if (e->type == PTL_EVENT_PUT_OVERFLOW)
         buf = e->start;
@@ -231,13 +231,13 @@ static int handler_recv_unpack_complete(const ptl_event_t *e)
 
     last = rreq->dev.segment_first + e->mlength;
     MPIDU_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, buf);
-    MPIU_Assert(last == rreq->dev.segment_first + e->mlength);
+    MPIR_Assert(last == rreq->dev.segment_first + e->mlength);
     
     mpi_errno = handler_recv_complete(e);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_UNPACK_COMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_HANDLER_RECV_UNPACK_COMPLETE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -250,17 +250,17 @@ static int handler_recv_unpack_complete(const ptl_event_t *e)
 static int handler_recv_dequeue_unpack_complete(const ptl_event_t *e)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_COMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_COMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_COMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_COMPLETE);
     
-    MPIU_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW);
+    MPIR_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW);
 
     dequeue_req(e);
     mpi_errno = handler_recv_unpack_complete(e);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_COMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_COMPLETE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -282,12 +282,12 @@ static int handler_recv_dequeue_large(const ptl_event_t *e)
     MPIDU_Datatype*dt_ptr;
     MPI_Aint dt_true_lb;
     MPI_Aint last;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE);
     
-    MPIU_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW);
+    MPIR_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW);
 
     MPIDI_Comm_get_vc(rreq->comm, NPTL_MATCH_GET_RANK(e->match_bits), &vc);
     vc_ptl = VC_PTL(vc);
@@ -299,11 +299,11 @@ static int handler_recv_dequeue_large(const ptl_event_t *e)
     /* unpack data from unexpected buffer first */
     if (e->type == PTL_EVENT_PUT_OVERFLOW) {
         if (dt_contig) {
-            MPIU_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, e->start, e->mlength);
+            MPIR_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, e->start, e->mlength);
         } else {
             last = e->mlength;
             MPIDU_Segment_unpack(rreq->dev.segment_ptr, 0, &last, e->start);
-            MPIU_Assert(last == e->mlength);
+            MPIR_Assert(last == e->mlength);
             rreq->dev.segment_first = e->mlength;
         }
     }
@@ -315,7 +315,7 @@ static int handler_recv_dequeue_large(const ptl_event_t *e)
         goto fn_exit;
     }
         
-    MPIU_Assert (e->mlength == PTL_LARGE_THRESHOLD);
+    MPIR_Assert (e->mlength == PTL_LARGE_THRESHOLD);
 
     /* we need to GET the rest of the data from the sender's buffer */
     if (dt_contig) {
@@ -351,17 +351,17 @@ static int handler_recv_dequeue_large(const ptl_event_t *e)
         
     /* message won't fit in a single IOV, allocate buffer and unpack when received */
     /* FIXME: For now, allocate a single large buffer to hold entire message */
-    MPIU_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, data_sz - PTL_LARGE_THRESHOLD,
+    MPIR_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, data_sz - PTL_LARGE_THRESHOLD,
                         mpi_errno, "chunk_buffer");
     big_get(REQ_PTL(rreq)->chunk_buffer[0], data_sz - PTL_LARGE_THRESHOLD, vc, e->match_bits, rreq);
 
  fn_exit:
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit2:
-    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit2;
 }
 
@@ -377,11 +377,11 @@ static int handler_recv_dequeue_unpack_large(const ptl_event_t *e)
     MPIDI_VC_t *vc;
     MPI_Aint last;
     void *buf;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_LARGE);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_LARGE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_LARGE);
-    MPIU_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_LARGE);
+    MPIR_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW);
 
     MPIDI_Comm_get_vc(rreq->comm, NPTL_MATCH_GET_RANK(e->match_bits), &vc);
 
@@ -399,24 +399,24 @@ static int handler_recv_dequeue_unpack_large(const ptl_event_t *e)
     else
         buf = REQ_PTL(rreq)->chunk_buffer[0];
 
-    MPIU_Assert(e->mlength == PTL_LARGE_THRESHOLD);
+    MPIR_Assert(e->mlength == PTL_LARGE_THRESHOLD);
     last = PTL_LARGE_THRESHOLD;
     MPIDU_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, buf);
-    MPIU_Assert(last == PTL_LARGE_THRESHOLD);
+    MPIR_Assert(last == PTL_LARGE_THRESHOLD);
     rreq->dev.segment_first += PTL_LARGE_THRESHOLD;
     MPL_free(REQ_PTL(rreq)->chunk_buffer[0]);
 
-    MPIU_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, rreq->dev.segment_size - rreq->dev.segment_first,
+    MPIR_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, rreq->dev.segment_size - rreq->dev.segment_first,
                         mpi_errno, "chunk_buffer");
     big_get(REQ_PTL(rreq)->chunk_buffer[0], rreq->dev.segment_size - rreq->dev.segment_first, vc, e->match_bits, rreq);
 
  fn_exit:
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit2:
-    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_LARGE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_LARGE);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit2;
 }
 
@@ -436,10 +436,10 @@ int MPID_nem_ptl_recv_posted(MPIDI_VC_t *vc, MPIR_Request *rreq)
     MPI_Aint last;
     ptl_process_t id_any;
     int ret;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RECV_POSTED);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RECV_POSTED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_RECV_POSTED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_RECV_POSTED);
 
     id_any.phys.nid = PTL_NID_ANY;
     id_any.phys.pid = PTL_PID_ANY;
@@ -509,7 +509,7 @@ int MPID_nem_ptl_recv_posted(MPIDI_VC_t *vc, MPIR_Request *rreq)
                 /* IOV is not long enough to describe entire message: recv into
                    buffer and unpack later */
                 MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "    IOV too long: using bounce buffer");
-                MPIU_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer");
+                MPIR_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer");
                 me.start = REQ_PTL(rreq)->chunk_buffer[0];
                 me.length = data_sz;
                 REQ_PTL(rreq)->event_handler = handler_recv_dequeue_unpack_complete;
@@ -548,7 +548,7 @@ int MPID_nem_ptl_recv_posted(MPIDI_VC_t *vc, MPIR_Request *rreq)
                 /* IOV is not long enough to describe the first chunk: recv into
                    buffer and unpack later */
                 MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "    IOV too long: using bounce buffer for first chunk");
-                MPIU_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, PTL_LARGE_THRESHOLD, mpi_errno, "chunk_buffer");
+                MPIR_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, PTL_LARGE_THRESHOLD, mpi_errno, "chunk_buffer");
                 me.start = REQ_PTL(rreq)->chunk_buffer[0];
                 me.length = PTL_LARGE_THRESHOLD;
                 REQ_PTL(rreq)->event_handler = handler_recv_dequeue_unpack_large;
@@ -564,12 +564,12 @@ int MPID_nem_ptl_recv_posted(MPIDI_VC_t *vc, MPIR_Request *rreq)
     MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "MPIDI_nem_ptl_pt = %d", MPIDI_nem_ptl_pt);
 
  fn_exit:
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit2:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_RECV_POSTED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_RECV_POSTED);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit2;
 }
 
@@ -580,18 +580,18 @@ int MPID_nem_ptl_recv_posted(MPIDI_VC_t *vc, MPIR_Request *rreq)
 void MPID_nem_ptl_anysource_posted(MPIR_Request *rreq)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_POSTED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_POSTED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_POSTED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_POSTED);
 
     mpi_errno = MPID_nem_ptl_recv_posted(NULL, rreq);
 
     /* FIXME: This function is void, so we can't return an error.  This function
        cannot return an error because the queue functions (where the posted_recv
        hooks are called) return no error code. */
-    MPIU_Assertp(mpi_errno == MPI_SUCCESS);
+    MPIR_Assertp(mpi_errno == MPI_SUCCESS);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_POSTED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_POSTED);
 }
 
 #undef FUNCNAME
@@ -602,9 +602,9 @@ static int cancel_recv(MPIR_Request *rreq, int *cancelled)
 {
     int mpi_errno = MPI_SUCCESS;
     int ptl_err   = PTL_OK;
-    MPIDI_STATE_DECL(MPID_STATE_CANCEL_RECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CANCEL_RECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CANCEL_RECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CANCEL_RECV);
 
     *cancelled = FALSE;
 
@@ -619,7 +619,7 @@ static int cancel_recv(MPIR_Request *rreq, int *cancelled)
         *cancelled = TRUE;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_CANCEL_RECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CANCEL_RECV);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -634,18 +634,18 @@ int MPID_nem_ptl_anysource_matched(MPIR_Request *rreq)
 {
     int mpi_errno, cancelled;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_MATCHED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_MATCHED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_MATCHED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_MATCHED);
 
     mpi_errno = cancel_recv(rreq, &cancelled);
     /* FIXME: This function is does not return an error because the queue
        functions (where the posted_recv hooks are called) return no error
        code. See also comment on cancel_recv. */
-    MPIU_Assertp(mpi_errno == MPI_SUCCESS);
+    MPIR_Assertp(mpi_errno == MPI_SUCCESS);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_MATCHED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_ANYSOURCE_MATCHED);
     return !cancelled;
  fn_fail:
     goto fn_exit;
@@ -661,18 +661,18 @@ int MPID_nem_ptl_cancel_recv(MPIDI_VC_t *vc,  MPIR_Request *rreq)
 {
     int mpi_errno, cancelled;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_CANCEL_RECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_CANCEL_RECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_CANCEL_RECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_CANCEL_RECV);
 
     mpi_errno = cancel_recv(rreq, &cancelled);
     /* FIXME: This function is does not return an error because the queue
        functions (where the posted_recv hooks are called) return no error
        code. */
-    MPIU_Assertp(mpi_errno == MPI_SUCCESS);
+    MPIR_Assertp(mpi_errno == MPI_SUCCESS);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_CANCEL_RECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_CANCEL_RECV);
     return !cancelled;
  fn_fail:
     goto fn_exit;
@@ -700,11 +700,11 @@ int MPID_nem_ptl_lmt_start_recv(MPIDI_VC_t *vc,  MPIR_Request *rreq, MPL_IOV s_c
     int was_incomplete;
     int ret;
     MPID_nem_ptl_vc_area *vc_ptl = VC_PTL(vc);
-    MPIU_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_DECL(1);
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_LMT_START_RECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_LMT_START_RECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_LMT_START_RECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_LMT_START_RECV);
 
     /* This Rendezvous protocol does not do RTS-CTS. Since we have all the data, we limit to get it */
     /* The following code is inspired on handler_recv_dqueue_large */
@@ -712,7 +712,7 @@ int MPID_nem_ptl_lmt_start_recv(MPIDI_VC_t *vc,  MPIR_Request *rreq, MPL_IOV s_c
     match_bits = NPTL_MATCH(rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id,
                             rreq->dev.match.parts.rank);
     MPIDI_CH3U_Request_increment_cc(rreq, &was_incomplete);
-    MPIU_Assert(was_incomplete == 0);
+    MPIR_Assert(was_incomplete == 0);
     MPIR_Request_add_ref(rreq);
 
     MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, data_sz, dt_ptr,
@@ -723,7 +723,7 @@ int MPID_nem_ptl_lmt_start_recv(MPIDI_VC_t *vc,  MPIR_Request *rreq, MPL_IOV s_c
         big_get((char *)real_user_buf + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc, match_bits, rreq);
 
         /* The memcpy is done after the get purposely for overlapping */
-        MPIU_Memcpy(real_user_buf, rreq->dev.tmpbuf, PTL_LARGE_THRESHOLD);
+        MPIR_Memcpy(real_user_buf, rreq->dev.tmpbuf, PTL_LARGE_THRESHOLD);
     }
     else {
         MPI_Aint last;
@@ -737,7 +737,7 @@ int MPID_nem_ptl_lmt_start_recv(MPIDI_VC_t *vc,  MPIR_Request *rreq, MPL_IOV s_c
         rreq->dev.segment_size = data_sz;
         last = PTL_LARGE_THRESHOLD;
         MPIDU_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, rreq->dev.tmpbuf);
-        MPIU_Assert(last == PTL_LARGE_THRESHOLD);
+        MPIR_Assert(last == PTL_LARGE_THRESHOLD);
         rreq->dev.segment_first = PTL_LARGE_THRESHOLD;
         last = rreq->dev.segment_size;
         rreq->dev.iov_count = MPL_IOV_LIMIT;
@@ -765,7 +765,7 @@ int MPID_nem_ptl_lmt_start_recv(MPIDI_VC_t *vc,  MPIR_Request *rreq, MPL_IOV s_c
         else {
             /* message won't fit in a single IOV, allocate buffer and unpack when received */
             /* FIXME: For now, allocate a single large buffer to hold entire message */
-            MPIU_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, rreq->dev.segment_size - rreq->dev.segment_first,
+            MPIR_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, rreq->dev.segment_size - rreq->dev.segment_first,
                                 mpi_errno, "chunk_buffer");
             big_get(REQ_PTL(rreq)->chunk_buffer[0], rreq->dev.segment_size - rreq->dev.segment_first, vc, match_bits, rreq);
         }
@@ -774,10 +774,10 @@ int MPID_nem_ptl_lmt_start_recv(MPIDI_VC_t *vc,  MPIR_Request *rreq, MPL_IOV s_c
     rreq->ch.lmt_tmp_cookie.MPL_IOV_LEN = 0;  /* Required for do_cts in mpid_nem_lmt.c */
 
  fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_LMT_START_RECV);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_LMT_START_RECV);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_send.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_send.c
index 35a9358..d9757e1 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_send.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_send.c
@@ -41,10 +41,10 @@ static void big_meappend(void *buf, ptl_size_t left_to_send, MPIDI_VC_t *vc, ptl
         ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq,
                           &REQ_PTL(sreq)->get_me_p[i]);
         DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
-        MPIU_Assert(ret == 0);
+        MPIR_Assert(ret == 0);
         /* increment the cc for each get operation */
         MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete);
-        MPIU_Assert(was_incomplete);
+        MPIR_Assert(was_incomplete);
 
         /* account for what has been sent */
         me.start = (char *)me.start + me.length;
@@ -63,11 +63,11 @@ static int handler_send(const ptl_event_t *e)
 
     int i, ret;
 
-    MPIDI_STATE_DECL(MPID_STATE_HANDLER_SEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_HANDLER_SEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_SEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_HANDLER_SEND);
 
-    MPIU_Assert(e->type == PTL_EVENT_SEND || e->type == PTL_EVENT_GET);
+    MPIR_Assert(e->type == PTL_EVENT_SEND || e->type == PTL_EVENT_GET);
 
     /* if we are done, release all netmod resources */
     if (MPIR_cc_get(sreq->cc) == 1) {
@@ -89,7 +89,7 @@ static int handler_send(const ptl_event_t *e)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_SEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_HANDLER_SEND);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -115,10 +115,10 @@ static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *
     int initial_iov_count, remaining_iov_count;
     ptl_md_t md;
     MPI_Aint last;
-    MPIU_CHKPMEM_DECL(2);
-    MPIDI_STATE_DECL(MPID_STATE_SEND_MSG);
+    MPIR_CHKPMEM_DECL(2);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SEND_MSG);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_SEND_MSG);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_SEND_MSG);
 
     MPID_nem_ptl_request_create_sreq(sreq, mpi_errno, comm);
     sreq->dev.match.parts.rank = dest;
@@ -193,12 +193,12 @@ static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *
         
         /* IOV is not long enough to describe entire message */
         MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "    IOV too long: using bounce buffer");
-        MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer");
+        MPIR_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer");
         MPIDU_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
         sreq->dev.segment_first = 0;
         last = data_sz;
         MPIDU_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, REQ_PTL(sreq)->chunk_buffer[0]);
-        MPIU_Assert(last == sreq->dev.segment_size);
+        MPIR_Assert(last == sreq->dev.segment_size);
         REQ_PTL(sreq)->event_handler = handler_send;
         ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)REQ_PTL(sreq)->chunk_buffer[0], data_sz, PTL_NO_ACK_REQ,
                      vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
@@ -268,7 +268,7 @@ static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *
                 me.ignore_bits = 0;
                 me.min_free = 0;
 
-                MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->get_me_p, ptl_handle_me_t *, sizeof(ptl_handle_me_t), mpi_errno, "get_me_p");
+                MPIR_CHKPMEM_MALLOC(REQ_PTL(sreq)->get_me_p, ptl_handle_me_t *, sizeof(ptl_handle_me_t), mpi_errno, "get_me_p");
 
                 ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq,
                                   &REQ_PTL(sreq)->get_me_p[0]);
@@ -276,7 +276,7 @@ static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *
                 DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
                 /* increment the cc for the get operation */
                 MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete);
-                MPIU_Assert(was_incomplete);
+                MPIR_Assert(was_incomplete);
 
                 /* Create MD for first chunk */
                 md.start = sreq->dev.iov;
@@ -301,11 +301,11 @@ static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *
     }
 
     /* allocate a temporary buffer and copy all the data to send */
-    MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "tmpbuf");
+    MPIR_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "tmpbuf");
 
     last = data_sz;
     MPIDU_Segment_pack(sreq->dev.segment_ptr, 0, &last, REQ_PTL(sreq)->chunk_buffer[0]);
-    MPIU_Assert(last == data_sz);
+    MPIR_Assert(last == data_sz);
 
     big_meappend((char *)REQ_PTL(sreq)->chunk_buffer[0] + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc,
                  NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), sreq);
@@ -319,15 +319,15 @@ static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *
     
  fn_exit:
     *request = sreq;
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_SEND_MSG);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_SEND_MSG);
     return mpi_errno;
  fn_fail:
     if (sreq) {
         MPIR_Request_free(sreq);
         sreq = NULL;
     }
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -339,13 +339,13 @@ int MPID_nem_ptl_isend(struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI
                        MPIR_Comm *comm, int context_offset, struct MPIR_Request **request)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ISEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ISEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ISEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_ISEND);
 
     mpi_errno = send_msg(0, vc, buf, count, datatype, dest, tag, comm, context_offset, request);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ISEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_ISEND);
     return mpi_errno;
 }
 
@@ -358,13 +358,13 @@ int MPID_nem_ptl_issend(struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MP
                         MPIR_Comm *comm, int context_offset, struct MPIR_Request **request)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ISSEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_ISSEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_ISSEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_ISSEND);
 
     mpi_errno = send_msg(NPTL_SSEND, vc, buf, count, datatype, dest, tag, comm, context_offset, request);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_ISSEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_ISSEND);
     return mpi_errno;
 }
 
@@ -379,9 +379,9 @@ int MPID_nem_ptl_cancel_send(struct MPIDI_VC *vc,  struct MPIR_Request *sreq)
     MPIR_Request *csr_sreq;
     int was_incomplete;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_CANCEL_SEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_CANCEL_SEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_CANCEL_SEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_CANCEL_SEND);
 
     /* The completion counter and reference count are incremented to keep
        the request around long enough to receive a
@@ -408,7 +408,7 @@ int MPID_nem_ptl_cancel_send(struct MPIDI_VC *vc,  struct MPIR_Request *sreq)
         MPIR_Request_free(csr_sreq);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_CANCEL_SEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_CANCEL_SEND);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl.c
index aee798f..6cb5e48 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl.c
@@ -76,10 +76,10 @@ static int find_target(ptl_process_t id, struct rptl_target **target)
     int mpi_errno = MPI_SUCCESS;
     int ret = PTL_OK;
     struct rptl_target *t;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_FIND_TARGET);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_FIND_TARGET);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_FIND_TARGET);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_FIND_TARGET);
 
     for (t = rptl_info.target_list; t; t = t->next)
         if (IDS_ARE_EQUAL(t->id, id))
@@ -87,7 +87,7 @@ static int find_target(ptl_process_t id, struct rptl_target **target)
 
     /* if the target does not already exist, create one */
     if (t == NULL) {
-        MPIU_CHKPMEM_MALLOC(t, struct rptl_target *, sizeof(struct rptl_target), mpi_errno, "rptl target");
+        MPIR_CHKPMEM_MALLOC(t, struct rptl_target *, sizeof(struct rptl_target), mpi_errno, "rptl target");
         MPL_DL_APPEND(rptl_info.target_list, t);
 
         t->id = id;
@@ -103,14 +103,14 @@ static int find_target(ptl_process_t id, struct rptl_target **target)
     *target = t;
 
   fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_FIND_TARGET);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_FIND_TARGET);
     return ret;
 
   fn_fail:
     if (mpi_errno)
         ret = PTL_FAIL;
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -134,9 +134,9 @@ static int poke_progress(void)
     int mpi_errno = MPI_SUCCESS;
     ptl_process_t id;
     ptl_pt_index_t data_pt, control_pt;
-    MPIDI_STATE_DECL(MPID_STATE_POKE_PROGRESS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_POKE_PROGRESS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_POKE_PROGRESS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_POKE_PROGRESS);
 
     /* make progress on local RPTLs */
     for (rptl = rptl_info.rptl_list; rptl; rptl = rptl->next) {
@@ -311,7 +311,7 @@ static int poke_progress(void)
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_POKE_PROGRESS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_POKE_PROGRESS);
     return ret;
 
   fn_fail:
@@ -331,9 +331,9 @@ static int rptl_put(ptl_handle_md_t md_handle, ptl_size_t local_offset, ptl_size
     struct rptl_op *op;
     int ret = PTL_OK;
     struct rptl_target *target;
-    MPIDI_STATE_DECL(MPID_STATE_RPTL_PUT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_RPTL_PUT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_RPTL_PUT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_RPTL_PUT);
 
     ret = find_target(target_id, &target);
     RPTLU_ERR_POP(ret, "error finding target structure\n");
@@ -372,7 +372,7 @@ static int rptl_put(ptl_handle_md_t md_handle, ptl_size_t local_offset, ptl_size
     RPTLU_ERR_POP(ret, "Error from poke_progress\n");
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_RPTL_PUT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_RPTL_PUT);
     return ret;
 
   fn_fail:
@@ -405,9 +405,9 @@ int MPID_nem_ptl_rptl_get(ptl_handle_md_t md_handle, ptl_size_t local_offset, pt
     struct rptl_op *op;
     int ret = PTL_OK;
     struct rptl_target *target;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_GET);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_GET);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_GET);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_GET);
 
     ret = find_target(target_id, &target);
     RPTLU_ERR_POP(ret, "error finding target structure\n");
@@ -437,7 +437,7 @@ int MPID_nem_ptl_rptl_get(ptl_handle_md_t md_handle, ptl_size_t local_offset, pt
     RPTLU_ERR_POP(ret, "Error from poke_progress\n");
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_RPTL_GET);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_RPTL_GET);
     return ret;
 
   fn_fail:
@@ -455,9 +455,9 @@ static int send_pause_messages(struct rptl *rptl)
     ptl_process_t id;
     ptl_pt_index_t data_pt, control_pt;
     int ret = PTL_OK;
-    MPIDI_STATE_DECL(MPID_STATE_SEND_PAUSE_MESSAGES);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SEND_PAUSE_MESSAGES);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_SEND_PAUSE_MESSAGES);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_SEND_PAUSE_MESSAGES);
 
     /* if no control portal is setup for this rptl, we are doomed */
     assert(rptl->control.pt != PTL_PT_ANY);
@@ -484,7 +484,7 @@ static int send_pause_messages(struct rptl *rptl)
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_SEND_PAUSE_MESSAGES);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_SEND_PAUSE_MESSAGES);
     return ret;
 
   fn_fail:
@@ -501,9 +501,9 @@ static int clear_nacks(ptl_process_t target_id)
     struct rptl_target *target;
     struct rptl_op *op;
     int ret = PTL_OK;
-    MPIDI_STATE_DECL(MPID_STATE_CLEAR_NACKS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CLEAR_NACKS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CLEAR_NACKS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CLEAR_NACKS);
 
     ret = find_target(target_id, &target);
     RPTLU_ERR_POP(ret, "error finding target\n");
@@ -521,7 +521,7 @@ static int clear_nacks(ptl_process_t target_id)
     RPTLU_ERR_POP(ret, "error in poke_progress\n");
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_CLEAR_NACKS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CLEAR_NACKS);
     return ret;
 
   fn_fail:
@@ -538,9 +538,9 @@ static int get_event_info(ptl_event_t * event, struct rptl **ret_rptl, struct rp
     struct rptl *rptl;
     struct rptl_op *op;
     int ret = PTL_OK;
-    MPIDI_STATE_DECL(MPID_STATE_GET_EVENT_INFO);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_GET_EVENT_INFO);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_GET_EVENT_INFO);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_GET_EVENT_INFO);
 
     if (event->type == PTL_EVENT_SEND || event->type == PTL_EVENT_REPLY ||
         event->type == PTL_EVENT_ACK) {
@@ -572,7 +572,7 @@ static int get_event_info(ptl_event_t * event, struct rptl **ret_rptl, struct rp
     *ret_op = op;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_GET_EVENT_INFO);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_GET_EVENT_INFO);
     return ret;
 
   fn_fail:
@@ -588,10 +588,10 @@ static int stash_event(struct rptl_op *op, ptl_event_t event)
 {
     int mpi_errno = MPI_SUCCESS;
     int ret = PTL_OK;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_STASH_EVENT);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_STASH_EVENT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_STASH_EVENT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_STASH_EVENT);
 
     /* make sure this is of the event type we know of */
     assert(event.type == PTL_EVENT_SEND || event.type == PTL_EVENT_ACK);
@@ -607,25 +607,25 @@ static int stash_event(struct rptl_op *op, ptl_event_t event)
     assert(op->u.put.send == NULL && op->u.put.ack == NULL);
 
     if (event.type == PTL_EVENT_SEND) {
-        MPIU_CHKPMEM_MALLOC(op->u.put.send, ptl_event_t *, sizeof(ptl_event_t), mpi_errno,
+        MPIR_CHKPMEM_MALLOC(op->u.put.send, ptl_event_t *, sizeof(ptl_event_t), mpi_errno,
                             "ptl event");
         memcpy(op->u.put.send, &event, sizeof(ptl_event_t));
     }
     else {
-        MPIU_CHKPMEM_MALLOC(op->u.put.ack, ptl_event_t *, sizeof(ptl_event_t), mpi_errno,
+        MPIR_CHKPMEM_MALLOC(op->u.put.ack, ptl_event_t *, sizeof(ptl_event_t), mpi_errno,
                             "ptl event");
         memcpy(op->u.put.ack, &event, sizeof(ptl_event_t));
     }
 
   fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_STASH_EVENT);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_STASH_EVENT);
     return ret;
 
   fn_fail:
     if (mpi_errno)
         ret = PTL_FAIL;
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -644,9 +644,9 @@ int MPID_nem_ptl_rptl_eqget(ptl_handle_eq_t eq_handle, ptl_event_t * event)
     int ret = PTL_OK, tmp_ret = PTL_OK;
     int mpi_errno = MPI_SUCCESS;
     struct rptl_target *target;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_EQGET);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_EQGET);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_EQGET);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_EQGET);
 
     ret = poke_progress();
     RPTLU_ERR_POP(ret, "error poking progress\n");
@@ -915,7 +915,7 @@ int MPID_nem_ptl_rptl_eqget(ptl_handle_eq_t eq_handle, ptl_event_t * event)
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_RPTL_EQGET);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_RPTL_EQGET);
     return ret;
 
   fn_fail:
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl_init.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl_init.c
index cf76678..9e86d77 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl_init.c
@@ -17,9 +17,9 @@ int rptli_post_control_buffer(ptl_handle_ni_t ni_handle, ptl_pt_index_t pt,
     int ret;
     ptl_me_t me;
     ptl_process_t id;
-    MPIDI_STATE_DECL(MPID_STATE_RPTLI_POST_CONTROL_BUFFER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_RPTLI_POST_CONTROL_BUFFER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_RPTLI_POST_CONTROL_BUFFER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_RPTLI_POST_CONTROL_BUFFER);
 
     id.phys.nid = PTL_NID_ANY;
     id.phys.pid = PTL_PID_ANY;
@@ -43,7 +43,7 @@ int rptli_post_control_buffer(ptl_handle_ni_t ni_handle, ptl_pt_index_t pt,
     RPTLU_ERR_POP(ret, "Error appending empty buffer to priority list\n");
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_RPTLI_POST_CONTROL_BUFFER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_RPTLI_POST_CONTROL_BUFFER);
     return ret;
 
   fn_fail:
@@ -62,9 +62,9 @@ int MPID_nem_ptl_rptl_init(int world_size, uint64_t max_origin_events,
                                                    ptl_pt_index_t * target_control_pt))
 {
     int ret = PTL_OK;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_INIT);
 
     rptl_info.rptl_list = NULL;
     rptl_info.target_list = NULL;
@@ -74,7 +74,7 @@ int MPID_nem_ptl_rptl_init(int world_size, uint64_t max_origin_events,
     rptl_info.get_target_info = get_target_info;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_RPTL_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_RPTL_INIT);
     return ret;
 
   fn_fail:
@@ -93,9 +93,9 @@ int MPID_nem_ptl_rptl_drain_eq(int eq_count, ptl_handle_eq_t *eq)
     struct rptl_op_pool_segment *op_segment;
     int i;
     struct rptl_target *target, *t;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_FINALIZE);
 
     for (target = rptl_info.target_list; target; target = target->next) {
         while (target->control_op_list || target->data_op_list) {
@@ -125,7 +125,7 @@ int MPID_nem_ptl_rptl_drain_eq(int eq_count, ptl_handle_eq_t *eq)
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_RPTL_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_RPTL_FINALIZE);
     return ret;
 
   fn_fail:
@@ -145,15 +145,15 @@ int MPID_nem_ptl_rptl_ptinit(ptl_handle_ni_t ni_handle, ptl_handle_eq_t eq_handl
     int mpi_errno = MPI_SUCCESS;
     int i;
     ptl_md_t md;
-    MPIU_CHKPMEM_DECL(2);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_PTINIT);
+    MPIR_CHKPMEM_DECL(2);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_PTINIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_PTINIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_PTINIT);
 
 
     /* setup the parts of rptls that can be done before world size or
      * target information */
-    MPIU_CHKPMEM_MALLOC(rptl, struct rptl *, sizeof(struct rptl), mpi_errno, "rptl");
+    MPIR_CHKPMEM_MALLOC(rptl, struct rptl *, sizeof(struct rptl), mpi_errno, "rptl");
     MPL_DL_APPEND(rptl_info.rptl_list, rptl);
 
     rptl->local_state = RPTL_LOCAL_STATE_ACTIVE;
@@ -178,7 +178,7 @@ int MPID_nem_ptl_rptl_ptinit(ptl_handle_ni_t ni_handle, ptl_handle_eq_t eq_handl
 
     /* post world_size number of empty buffers on the control portal */
     if (rptl->control.pt != PTL_PT_ANY) {
-        MPIU_CHKPMEM_MALLOC(rptl->control.me, ptl_handle_me_t *,
+        MPIR_CHKPMEM_MALLOC(rptl->control.me, ptl_handle_me_t *,
                             2 * rptl_info.world_size * sizeof(ptl_handle_me_t), mpi_errno,
                             "rptl target info");
         for (i = 0; i < 2 * rptl_info.world_size; i++) {
@@ -189,14 +189,14 @@ int MPID_nem_ptl_rptl_ptinit(ptl_handle_ni_t ni_handle, ptl_handle_eq_t eq_handl
     }
 
   fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_RPTL_PTINIT);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_RPTL_PTINIT);
     return ret;
 
   fn_fail:
     if (mpi_errno)
         ret = PTL_FAIL;
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -210,9 +210,9 @@ int MPID_nem_ptl_rptl_ptfini(ptl_pt_index_t pt_index)
     int i;
     int ret = PTL_OK;
     struct rptl *rptl;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_PTFINI);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RPTL_PTFINI);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_PTFINI);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_RPTL_PTFINI);
 
     /* find the right rptl */
     for (rptl = rptl_info.rptl_list; rptl && rptl->data.pt != pt_index; rptl = rptl->next);
@@ -231,7 +231,7 @@ int MPID_nem_ptl_rptl_ptfini(ptl_pt_index_t pt_index)
     MPL_free(rptl);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_RPTL_PTFINI);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_PTL_RPTL_PTFINI);
     return ret;
 
   fn_fail:
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl_op.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl_op.c
index 7997883..e0b3005 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl_op.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl_op.c
@@ -17,15 +17,15 @@ int rptli_op_alloc(struct rptl_op **op, struct rptl_target *target)
     struct rptl_op_pool_segment *op_segment;
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_RPTLI_OP_ALLOC);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_RPTLI_OP_ALLOC);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_RPTLI_OP_ALLOC);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_RPTLI_OP_ALLOC);
 
     assert(target);
 
     if (target->op_pool == NULL) {
-        MPIU_CHKPMEM_MALLOC(op_segment, struct rptl_op_pool_segment *, sizeof(struct rptl_op_pool_segment),
+        MPIR_CHKPMEM_MALLOC(op_segment, struct rptl_op_pool_segment *, sizeof(struct rptl_op_pool_segment),
                             mpi_errno, "op pool segment");
         MPL_DL_APPEND(target->op_segment_list, op_segment);
 
@@ -37,14 +37,14 @@ int rptli_op_alloc(struct rptl_op **op, struct rptl_target *target)
     MPL_DL_DELETE(target->op_pool, *op);
 
   fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_RPTLI_OP_ALLOC);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_RPTLI_OP_ALLOC);
     return ret;
 
   fn_fail:
     if (mpi_errno)
         ret = PTL_FAIL;
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -55,11 +55,11 @@ int rptli_op_alloc(struct rptl_op **op, struct rptl_target *target)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 void rptli_op_free(struct rptl_op *op)
 {
-    MPIDI_STATE_DECL(MPID_STATE_RPTLI_OP_FREE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_RPTLI_OP_FREE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_RPTLI_OP_FREE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_RPTLI_OP_FREE);
 
     MPL_DL_APPEND(op->target->op_pool, op);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_RPTLI_OP_FREE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_RPTLI_OP_FREE);
 }
diff --git a/src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c b/src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c
index bad6432..2277dc8 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c
@@ -130,12 +130,12 @@ static inline int is_same_connection(sockconn_t *sc1, sockconn_t *sc2)
     /* Returns TRUE iff sc1 and sc2 connect the same processes */
 
     /* if pg_is_set is TRUE, then either it's the same pg, or pg_id is not NULL */
-    MPIU_Assert(!sc1->pg_is_set || sc1->is_same_pg || sc1->pg_id != NULL);
-    MPIU_Assert(!sc2->pg_is_set || sc2->is_same_pg || sc2->pg_id != NULL);
+    MPIR_Assert(!sc1->pg_is_set || sc1->is_same_pg || sc1->pg_id != NULL);
+    MPIR_Assert(!sc2->pg_is_set || sc2->is_same_pg || sc2->pg_id != NULL);
 
     /* if it's a tmpvc, the pg should not be set */
-    MPIU_Assert(!sc1->is_tmpvc || !sc1->pg_is_set);
-    MPIU_Assert(!sc1->is_tmpvc || !sc1->pg_is_set);
+    MPIR_Assert(!sc1->is_tmpvc || !sc1->pg_is_set);
+    MPIR_Assert(!sc1->is_tmpvc || !sc1->pg_is_set);
 
     return !sc1->is_tmpvc && !sc2->is_tmpvc &&
         sc1->pg_is_set && sc2->pg_is_set &&
@@ -153,14 +153,14 @@ static inline int is_same_connection(sockconn_t *sc1, sockconn_t *sc2)
 static int alloc_sc_plfd_tbls (void)
 {
     int i, mpi_errno = MPI_SUCCESS, idx = -1;
-    MPIU_CHKPMEM_DECL (2);
+    MPIR_CHKPMEM_DECL (2);
 
-    MPIU_Assert(g_sc_tbl == NULL);
-    MPIU_Assert(MPID_nem_tcp_plfd_tbl == NULL);
+    MPIR_Assert(g_sc_tbl == NULL);
+    MPIR_Assert(MPID_nem_tcp_plfd_tbl == NULL);
 
-    MPIU_CHKPMEM_MALLOC (g_sc_tbl, sockconn_t *, g_tbl_capacity * sizeof(sockconn_t), 
+    MPIR_CHKPMEM_MALLOC (g_sc_tbl, sockconn_t *, g_tbl_capacity * sizeof(sockconn_t),
                          mpi_errno, "connection table");
-    MPIU_CHKPMEM_MALLOC (MPID_nem_tcp_plfd_tbl, struct pollfd *, g_tbl_capacity * sizeof(struct pollfd), 
+    MPIR_CHKPMEM_MALLOC (MPID_nem_tcp_plfd_tbl, struct pollfd *, g_tbl_capacity * sizeof(struct pollfd),
                          mpi_errno, "pollfd table");
 #if defined(MPICH_DEBUG_MEMINIT)
     /* We initialize the arrays in order to eliminate spurious valgrind errors
@@ -174,21 +174,21 @@ static int alloc_sc_plfd_tbls (void)
         INIT_SC_ENTRY(((sockconn_t *)&g_sc_tbl[i]), i);
         INIT_POLLFD_ENTRY(((struct pollfd *)&MPID_nem_tcp_plfd_tbl[i]));
     }
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
 
     mpi_errno = find_free_entry(&idx);
     if (mpi_errno != MPI_SUCCESS) MPIR_ERR_POP (mpi_errno);
 
-    MPIU_Assert(0 == idx); /* assumed in other parts of this file */
-    MPIU_Memcpy (&g_sc_tbl[idx], &MPID_nem_tcp_g_lstn_sc, sizeof(MPID_nem_tcp_g_lstn_sc));
-    MPIU_Memcpy (&MPID_nem_tcp_plfd_tbl[idx], &MPID_nem_tcp_g_lstn_plfd, sizeof(MPID_nem_tcp_g_lstn_plfd));
-    MPIU_Assert(MPID_nem_tcp_plfd_tbl[idx].fd == g_sc_tbl[idx].fd);
-    MPIU_Assert(MPID_nem_tcp_plfd_tbl[idx].events == POLLIN);
+    MPIR_Assert(0 == idx); /* assumed in other parts of this file */
+    MPIR_Memcpy (&g_sc_tbl[idx], &MPID_nem_tcp_g_lstn_sc, sizeof(MPID_nem_tcp_g_lstn_sc));
+    MPIR_Memcpy (&MPID_nem_tcp_plfd_tbl[idx], &MPID_nem_tcp_g_lstn_plfd, sizeof(MPID_nem_tcp_g_lstn_plfd));
+    MPIR_Assert(MPID_nem_tcp_plfd_tbl[idx].fd == g_sc_tbl[idx].fd);
+    MPIR_Assert(MPID_nem_tcp_plfd_tbl[idx].events == POLLIN);
 
  fn_exit:
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
     goto fn_exit;
 }
@@ -223,17 +223,17 @@ static int expand_sc_plfd_tbls (void)
     sockconn_t *new_sc_tbl = NULL;
     struct pollfd *new_plfd_tbl = NULL;
     int new_capacity = g_tbl_capacity + CONN_PLFD_TBL_GROW_SIZE, i;
-    MPIU_CHKPMEM_DECL (2);
+    MPIR_CHKPMEM_DECL (2);
 
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "expand_sc_plfd_tbls Entry"));
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "expand_sc_plfd_tbls b4 g_sc_tbl[0].fd=%d", g_sc_tbl[0].fd));
-    MPIU_CHKPMEM_MALLOC (new_sc_tbl, sockconn_t *, new_capacity * sizeof(sockconn_t), 
+    MPIR_CHKPMEM_MALLOC (new_sc_tbl, sockconn_t *, new_capacity * sizeof(sockconn_t),
                          mpi_errno, "expanded connection table");
-    MPIU_CHKPMEM_MALLOC (new_plfd_tbl, struct pollfd *, new_capacity * sizeof(struct pollfd), 
+    MPIR_CHKPMEM_MALLOC (new_plfd_tbl, struct pollfd *, new_capacity * sizeof(struct pollfd),
                          mpi_errno, "expanded pollfd table");
 
-    MPIU_Memcpy (new_sc_tbl, g_sc_tbl, g_tbl_capacity * sizeof(sockconn_t));
-    MPIU_Memcpy (new_plfd_tbl, MPID_nem_tcp_plfd_tbl, g_tbl_capacity * sizeof(struct pollfd));
+    MPIR_Memcpy (new_sc_tbl, g_sc_tbl, g_tbl_capacity * sizeof(sockconn_t));
+    MPIR_Memcpy (new_plfd_tbl, MPID_nem_tcp_plfd_tbl, g_tbl_capacity * sizeof(struct pollfd));
 
     /* VCs have pointers to entries in the sc table.  These
        are updated here after the expand. */
@@ -268,23 +268,23 @@ static int expand_sc_plfd_tbls (void)
     {
         sockconn_t *sc = &g_sc_tbl[i];
         MPIDI_VC_t *vc = sc->vc;
-        MPIU_AssertDeclValue(MPID_nem_tcp_vc_area *vc_tcp, VC_TCP(vc));
+        MPIR_AssertDeclValue(MPID_nem_tcp_vc_area *vc_tcp, VC_TCP(vc));
        /*         sockconn_t *dbg_sc = g_sc_tbl[i].vc ? VC_FIELD(g_sc_tbl[i].vc, sc) : (sockconn_t*)(-1); */
 
         /* The state is only valid if the FD is valid.  The VC field is only
            valid if the state is valid and COMMRDY. */
-        MPIU_Assert(MPID_nem_tcp_plfd_tbl[i].fd == CONN_INVALID_FD ||
+        MPIR_Assert(MPID_nem_tcp_plfd_tbl[i].fd == CONN_INVALID_FD ||
                     sc->state.cstate != CONN_STATE_TS_COMMRDY ||
                     vc_tcp->sc == sc);
     }
     
     
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit:
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "expand_sc_plfd_tbls Exit"));
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
     goto fn_exit;
 }
@@ -322,7 +322,7 @@ static int find_free_entry(int *idx)
             goto fn_fail;
     }
 
-    MPIU_Assert(g_tbl_capacity > g_tbl_size);
+    MPIR_Assert(g_tbl_capacity > g_tbl_size);
     *idx = g_tbl_size;
     ++g_tbl_size;
 
@@ -349,9 +349,9 @@ static int find_free_entry(int *idx)
 static int found_better_sc(sockconn_t *sc, sockconn_t **fnd_sc)
 {
     int i, found = FALSE;
-    MPIDI_STATE_DECL(MPID_STATE_FOUND_BETTER_SC);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_FOUND_BETTER_SC);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_FOUND_BETTER_SC);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_FOUND_BETTER_SC);
 
     /* tmpvc's can never match a better sc */
     if (sc->is_tmpvc) {
@@ -360,7 +360,7 @@ static int found_better_sc(sockconn_t *sc, sockconn_t **fnd_sc)
     }
 
     /* if we don't know our own pg info, how can we look for a better SC? */
-    MPIU_Assert(sc->pg_is_set);
+    MPIR_Assert(sc->pg_is_set);
 
     for(i = 0; i < g_tbl_size && !found; i++)
     {
@@ -373,21 +373,21 @@ static int found_better_sc(sockconn_t *sc, sockconn_t **fnd_sc)
             switch (sc->state.cstate)
             {
             case CONN_STATE_TC_C_CNTD:
-                MPIU_Assert(fnd_sc == NULL);
+                MPIR_Assert(fnd_sc == NULL);
                 if (istate == CONN_STATE_TS_COMMRDY ||
                     istate == CONN_STATE_TA_C_RANKRCVD ||
                     istate == CONN_STATE_TC_C_TMPVCSENT)
                     found = TRUE;
                 break;
             case CONN_STATE_TA_C_RANKRCVD:
-                MPIU_Assert(fnd_sc != NULL);
+                MPIR_Assert(fnd_sc != NULL);
                 if (istate == CONN_STATE_TS_COMMRDY || istate == CONN_STATE_TC_C_RANKSENT) {
                     found = TRUE;
                     *fnd_sc = iter_sc;
                 }
                 break;                
             case CONN_STATE_TA_C_TMPVCRCVD:
-                MPIU_Assert(fnd_sc != NULL);
+                MPIR_Assert(fnd_sc != NULL);
                 if (istate == CONN_STATE_TS_COMMRDY || istate == CONN_STATE_TC_C_TMPVCSENT) {
                     found = TRUE;
                     *fnd_sc = iter_sc;
@@ -396,7 +396,7 @@ static int found_better_sc(sockconn_t *sc, sockconn_t **fnd_sc)
                 /* Add code for other states here, if need be. */
             default:
                 /* FIXME: need to handle error condition better */
-                MPIU_Assert (0);
+                MPIR_Assert (0);
                 break;
             }
         }
@@ -421,7 +421,7 @@ fn_exit:
                          (MPL_DBG_FDEST, "found_better_sc(sc=%p (%s), *fnd_sc=N/A) found=FALSE",
                           sc, CONN_STATE_STR[sc->state.cstate]));
     }
-    MPIDI_FUNC_EXIT(MPID_STATE_FOUND_BETTER_SC);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_FOUND_BETTER_SC);
     return found;
 }
 
@@ -433,7 +433,7 @@ fn_exit:
 static int vc_is_in_shutdown(MPIDI_VC_t *vc)
 {
     int retval = FALSE;
-    MPIU_Assert(vc != NULL);
+    MPIR_Assert(vc != NULL);
     if (vc->state == MPIDI_VC_STATE_REMOTE_CLOSE ||
         vc->state == MPIDI_VC_STATE_CLOSE_ACKED ||
         vc->state == MPIDI_VC_STATE_CLOSED ||
@@ -462,9 +462,9 @@ static int send_id_info(const sockconn_t *const sc)
     int buf_size, iov_cnt = 2;
     ssize_t offset;
     size_t pg_id_len = 0;
-    MPIDI_STATE_DECL(MPID_STATE_SEND_ID_INFO);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SEND_ID_INFO);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_SEND_ID_INFO);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_SEND_ID_INFO);
 
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "my_pg->id=%s my_pg->rank=%d, sc->pg_rank=%d sc->is_same_pg=%s",
                                              (char *)MPIDI_Process.my_pg->id, MPIDI_Process.my_pg_rank, sc->pg_rank,
@@ -496,18 +496,18 @@ static int send_id_info(const sockconn_t *const sc)
     }
     
     offset = MPL_large_writev(sc->fd, iov, iov_cnt);
-    MPIR_ERR_CHKANDJUMP1(offset == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
-    MPIR_ERR_CHKANDJUMP1(offset != buf_size, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP1(offset == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIR_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP1(offset != buf_size, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIR_Strerror(errno));
 /*     FIXME log appropriate error */
 /*     FIXME-Z1  socket is just connected and we are sending a few bytes. So, there should not */
 /*     be a problem of partial data only being written to. If partial data written, */
 /*     handle this. */
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_SEND_ID_INFO);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_SEND_ID_INFO);
     return mpi_errno;
  fn_fail:
-    MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d, offset=%lld, errno=%d %s", mpi_errno, (long long)offset, errno, MPIU_Strerror(errno)));
+    MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d, offset=%lld, errno=%d %s", mpi_errno, (long long)offset, errno, MPIR_Strerror(errno)));
     goto fn_exit;    
 }
 
@@ -524,9 +524,9 @@ static int send_tmpvc_info(const sockconn_t *const sc)
     struct iovec iov[3];
     int buf_size, iov_cnt = 2;
     ssize_t offset;
-    MPIDI_STATE_DECL(MPID_STATE_SEND_TMPVC_INFO);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SEND_TMPVC_INFO);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_SEND_TMPVC_INFO);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_SEND_TMPVC_INFO);
 
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "my->pg_rank=%d, sc->pg_rank=%d"
                                              , MPIDI_Process.my_pg_rank, sc->pg_rank));
@@ -548,18 +548,18 @@ static int send_tmpvc_info(const sockconn_t *const sc)
     buf_size = sizeof(hdr) + sizeof(port_info);
     
     offset = MPL_large_writev(sc->fd, iov, iov_cnt);
-    MPIR_ERR_CHKANDJUMP1(offset == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
-    MPIR_ERR_CHKANDJUMP1(offset != buf_size, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP1(offset == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIR_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP1(offset != buf_size, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIR_Strerror(errno));
 /*     FIXME log appropriate error */
 /*     FIXME-Z1  socket is just connected and we are sending a few bytes. So, there should not */
 /*     be a problem of partial data only being written to. If partial data written, */
 /*     handle this. */
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_SEND_TMPVC_INFO);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_SEND_TMPVC_INFO);
     return mpi_errno;
  fn_fail:
-    MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d, offset=%lld, errno=%d %s", mpi_errno, (long long)offset, errno, MPIU_Strerror(errno)));
+    MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d, offset=%lld, errno=%d %s", mpi_errno, (long long)offset, errno, MPIR_Strerror(errno)));
     goto fn_exit;    
 }
 
@@ -578,11 +578,11 @@ static int recv_id_or_tmpvc_info(sockconn_t *const sc, int *got_sc_eof)
     struct iovec iov[2];
     char *pg_id = NULL;
 
-    MPIU_CHKPMEM_DECL (1);
-    MPIU_CHKLMEM_DECL (1);
-    MPIDI_STATE_DECL(MPID_STATE_RECV_ID_OR_TMPVC_INFO);
+    MPIR_CHKPMEM_DECL (1);
+    MPIR_CHKLMEM_DECL (1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_RECV_ID_OR_TMPVC_INFO);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_RECV_ID_OR_TMPVC_INFO);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_RECV_ID_OR_TMPVC_INFO);
 
     *got_sc_eof = 0;
 
@@ -594,24 +594,24 @@ static int recv_id_or_tmpvc_info(sockconn_t *const sc, int *got_sc_eof)
         *got_sc_eof = 1;
         goto fn_exit;
     }
-    MPIR_ERR_CHKANDJUMP1(nread == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIU_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP1(nread == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIR_Strerror(errno));
     MPIR_ERR_CHKANDJUMP(nread != hdr_len, mpi_errno, MPI_ERR_OTHER, "**read");  /* FIXME-Z1 */
-    MPIU_Assert(hdr.pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_ID_INFO ||
+    MPIR_Assert(hdr.pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_ID_INFO ||
 		hdr.pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_TMPVC_INFO);
-    MPIU_Assert(hdr.datalen != 0);
+    MPIR_Assert(hdr.datalen != 0);
     
     if (hdr.pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_ID_INFO) {
 	iov[0].iov_base = (void *) &(sc->pg_rank);
 	iov[0].iov_len = sizeof(sc->pg_rank);
 	pg_id_len = hdr.datalen - sizeof(MPIDI_nem_tcp_idinfo_t);
 	if (pg_id_len != 0) {
-	    MPIU_CHKLMEM_MALLOC (pg_id, char *, pg_id_len, mpi_errno, "sockconn pg_id");
+	    MPIR_CHKLMEM_MALLOC (pg_id, char *, pg_id_len, mpi_errno, "sockconn pg_id");
 	    iov[1].iov_base = (void *)pg_id;
 	    iov[1].iov_len = pg_id_len;
 	    ++iov_cnt;
 	} 
 	nread = MPL_large_readv(sc->fd, iov, iov_cnt);
-        MPIR_ERR_CHKANDJUMP1(nread == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIU_Strerror(errno));
+        MPIR_ERR_CHKANDJUMP1(nread == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIR_Strerror(errno));
 	MPIR_ERR_CHKANDJUMP(nread != hdr.datalen, mpi_errno, MPI_ERR_OTHER, "**read"); /* FIXME-Z1 */
 	if (pg_id_len == 0) {
 	    sc->is_same_pg = TRUE;
@@ -631,14 +631,14 @@ static int recv_id_or_tmpvc_info(sockconn_t *const sc, int *got_sc_eof)
             MPIDI_VC_t *sc_vc = sc->vc;
             MPID_nem_tcp_vc_area *sc_vc_tcp = VC_TCP(sc_vc);
 
-            MPIU_Assert(sc_vc != NULL);
+            MPIR_Assert(sc_vc != NULL);
             MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "about to incr sc_ref_count sc=%p sc->vc=%p sc_ref_count=%d", sc, sc_vc, sc_vc_tcp->sc_ref_count));
             ++sc_vc_tcp->sc_ref_count;
         }
         
         /* very important, without this is_same_connection() will always fail */
         sc->pg_is_set = TRUE;
-        MPIU_Assert(!sc->is_tmpvc);
+        MPIR_Assert(!sc->is_tmpvc);
         
 	MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE, (MPL_DBG_FDEST, "PKT_ID_INFO: sc->pg_rank=%d sc->is_same_pg=%d sc->fd=%d, sc->vc=%p, sc=%p", sc->pg_rank, sc->is_same_pg, sc->fd, sc->vc, sc));
     }
@@ -648,7 +648,7 @@ static int recv_id_or_tmpvc_info(sockconn_t *const sc, int *got_sc_eof)
 
         MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "PKT_TMPVC_INFO: sc->fd=%d", sc->fd));
         /* create a new VC */
-        MPIU_CHKPMEM_MALLOC (vc, MPIDI_VC_t *, sizeof(MPIDI_VC_t), mpi_errno, "real vc from tmp vc");
+        MPIR_CHKPMEM_MALLOC (vc, MPIDI_VC_t *, sizeof(MPIDI_VC_t), mpi_errno, "real vc from tmp vc");
         /* --BEGIN ERROR HANDLING-- */
         if (vc == NULL) {
             mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", NULL);
@@ -671,7 +671,7 @@ static int recv_id_or_tmpvc_info(sockconn_t *const sc, int *got_sc_eof)
         iov[0].iov_len = sizeof(sc->vc->port_name_tag);
 
         nread = MPL_large_readv(sc->fd, iov, iov_cnt);
-        MPIR_ERR_CHKANDJUMP1(nread == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIU_Strerror(errno));
+        MPIR_ERR_CHKANDJUMP1(nread == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIR_Strerror(errno));
         MPIR_ERR_CHKANDJUMP(nread != hdr.datalen, mpi_errno, MPI_ERR_OTHER, "**read"); /* FIXME-Z1 */
         sc->is_same_pg = FALSE;
         sc->pg_id = NULL;
@@ -680,16 +680,16 @@ static int recv_id_or_tmpvc_info(sockconn_t *const sc, int *got_sc_eof)
         MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE, (MPL_DBG_FDEST, "enqueuing on acceptq vc=%p, sc->fd=%d, tag=%d", vc, sc->fd, sc->vc->port_name_tag));
         MPIDI_CH3I_Acceptq_enqueue(vc, sc->vc->port_name_tag);
 
-        MPIU_Assert(!sc->pg_is_set);
+        MPIR_Assert(!sc->pg_is_set);
     }
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_FUNC_EXIT(MPID_STATE_RECV_ID_OR_TMPVC_INFO);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_RECV_ID_OR_TMPVC_INFO);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
     goto fn_exit;
 }
@@ -709,7 +709,7 @@ static int send_cmd_pkt(int fd, MPIDI_nem_tcp_socksm_pkt_type_t pkt_type)
     MPIDI_nem_tcp_header_t pkt;
     int pkt_len = sizeof(MPIDI_nem_tcp_header_t);
 
-    MPIU_Assert(pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_ID_ACK ||
+    MPIR_Assert(pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_ID_ACK ||
                 pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_ID_NAK ||
 		pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_TMPVC_ACK ||
 		pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_TMPVC_NAK ||
@@ -721,8 +721,8 @@ static int send_cmd_pkt(int fd, MPIDI_nem_tcp_socksm_pkt_type_t pkt_type)
     pkt.datalen = 0;
 
     CHECK_EINTR (offset, write(fd, &pkt, pkt_len));
-    MPIR_ERR_CHKANDJUMP1(offset == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
-    MPIR_ERR_CHKANDJUMP1(offset != pkt_len, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno)); /* FIXME-Z1 */
+    MPIR_ERR_CHKANDJUMP1(offset == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIR_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP1(offset != pkt_len, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIR_Strerror(errno)); /* FIXME-Z1 */
  fn_exit:
     return mpi_errno;
  fn_fail:
@@ -745,15 +745,15 @@ static int recv_cmd_pkt(int fd, MPIDI_nem_tcp_socksm_pkt_type_t *pkt_type)
     ssize_t nread;
     MPIDI_nem_tcp_header_t pkt;
     int pkt_len = sizeof(MPIDI_nem_tcp_header_t);
-    MPIDI_STATE_DECL(MPID_STATE_RECV_CMD_PKT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_RECV_CMD_PKT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_RECV_CMD_PKT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_RECV_CMD_PKT);
 
     CHECK_EINTR (nread, read(fd, &pkt, pkt_len));
-    MPIR_ERR_CHKANDJUMP1(nread == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIU_Strerror(errno));
-    MPIR_ERR_CHKANDJUMP2(nread != pkt_len, mpi_errno, MPI_ERR_OTHER, "**read", "**read %d %s", nread, MPIU_Strerror(errno)); /* FIXME-Z1 */
-    MPIU_Assert(pkt.datalen == 0);
-    MPIU_Assert(pkt.pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_ID_ACK ||
+    MPIR_ERR_CHKANDJUMP1(nread == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIR_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP2(nread != pkt_len, mpi_errno, MPI_ERR_OTHER, "**read", "**read %d %s", nread, MPIR_Strerror(errno)); /* FIXME-Z1 */
+    MPIR_Assert(pkt.datalen == 0);
+    MPIR_Assert(pkt.pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_ID_ACK ||
                 pkt.pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_ID_NAK ||
                 pkt.pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_TMPVC_ACK ||
                 pkt.pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_TMPVC_NAK ||
@@ -761,7 +761,7 @@ static int recv_cmd_pkt(int fd, MPIDI_nem_tcp_socksm_pkt_type_t *pkt_type)
     
     *pkt_type = pkt.pkt_type;
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_RECV_CMD_PKT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_RECV_CMD_PKT);
     return mpi_errno;
  fn_fail:
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
@@ -784,12 +784,12 @@ int MPID_nem_tcp_connect(struct MPIDI_VC *const vc)
     int idx = -1;
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_CHKLMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CONNECT);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CONNECT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_CONNECT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_CONNECT);
 
-    MPIU_Assert(vc != NULL);
+    MPIR_Assert(vc != NULL);
 
     /* Handle error case */
     if (vc_tcp->state == MPID_NEM_TCP_VC_STATE_ERROR ||
@@ -817,7 +817,7 @@ int MPID_nem_tcp_connect(struct MPIDI_VC *const vc)
         
         ++vc_tcp->connect_retry_count;
 
-        MPIU_Assert(vc_tcp->sc == NULL);
+        MPIR_Assert(vc_tcp->sc == NULL);
         mpi_errno = find_free_entry(&idx);
         if (mpi_errno != MPI_SUCCESS) MPIR_ERR_POP (mpi_errno);
 
@@ -841,7 +841,7 @@ int MPID_nem_tcp_connect(struct MPIDI_VC *const vc)
             pmi_errno = PMI_KVS_Get_value_length_max(&val_max_sz);
             MPIR_ERR_CHKANDJUMP1(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %d", pmi_errno);
 #endif
-            MPIU_CHKLMEM_MALLOC(bc, char *, val_max_sz, mpi_errno, "bc");
+            MPIR_CHKLMEM_MALLOC(bc, char *, val_max_sz, mpi_errno, "bc");
             
             sc->is_tmpvc = FALSE;
             
@@ -854,13 +854,13 @@ int MPID_nem_tcp_connect(struct MPIDI_VC *const vc)
         }
         else {
             sc->is_tmpvc = TRUE;
-            MPIU_Assert(!sc->pg_is_set);
+            MPIR_Assert(!sc->pg_is_set);
         }
 
         sock_addr = &(vc_tcp->sock_id);
 
         CHECK_EINTR(sc->fd, socket(AF_INET, SOCK_STREAM, 0));
-        MPIR_ERR_CHKANDJUMP2(sc->fd == -1, mpi_errno, MPI_ERR_OTHER, "**sock_create", "**sock_create %s %d", MPIU_Strerror(errno), errno);
+        MPIR_ERR_CHKANDJUMP2(sc->fd == -1, mpi_errno, MPI_ERR_OTHER, "**sock_create", "**sock_create %s %d", MPIR_Strerror(errno), errno);
 
         plfd->fd = sc->fd;
 	MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "sc->fd=%d, plfd->events=%d, plfd->revents=%d, vc=%p, sc=%p", sc->fd, plfd->events, plfd->revents, vc, sc));
@@ -870,7 +870,7 @@ int MPID_nem_tcp_connect(struct MPIDI_VC *const vc)
         MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "connecting to 0x%08X:%d", sock_addr->sin_addr.s_addr, sock_addr->sin_port));
         rc = connect(sc->fd, (SA*)sock_addr, sizeof(*sock_addr));
         /* connect should not be called with CHECK_EINTR macro */
-        MPIR_ERR_CHKANDJUMP2(rc < 0 && errno != EINPROGRESS, mpi_errno, MPI_ERR_OTHER, "**sock_connect", "**sock_connect %d %s", errno, MPIU_Strerror(errno));
+        MPIR_ERR_CHKANDJUMP2(rc < 0 && errno != EINPROGRESS, mpi_errno, MPI_ERR_OTHER, "**sock_connect", "**sock_connect %d %s", errno, MPIR_Strerror(errno));
         
         if (rc == 0) {
             CHANGE_STATE(sc, CONN_STATE_TC_C_CNTD);
@@ -893,10 +893,10 @@ int MPID_nem_tcp_connect(struct MPIDI_VC *const vc)
             }
             /* very important, without this is_same_connection() will always fail */
             sc->pg_is_set = TRUE;
-            MPIU_Assert(!sc->is_tmpvc);
+            MPIR_Assert(!sc->is_tmpvc);
         }
         else { /* (vc->pg == NULL), dynamic proc connection - temp vc */
-            MPIU_Assert(sc->is_tmpvc);
+            MPIR_Assert(sc->is_tmpvc);
         }
 
         ASSIGN_SC_TO_VC(vc_tcp, sc);
@@ -906,7 +906,7 @@ int MPID_nem_tcp_connect(struct MPIDI_VC *const vc)
     }
     else if (vc_tcp->state == MPID_NEM_TCP_VC_STATE_CONNECTED) {
         sc = vc_tcp->sc;
-        MPIU_Assert(sc != NULL);
+        MPIR_Assert(sc != NULL);
         /* Do nothing here, the caller just needs to wait for the connection
            state machine to work its way through the states.  Doing something at
            this point will almost always just mess up any head-to-head
@@ -914,13 +914,13 @@ int MPID_nem_tcp_connect(struct MPIDI_VC *const vc)
     }
     else {
         /* We already handled the error case at the top of the routine. */
-        MPIU_Assertp(0);
+        MPIR_Assertp(0);
     }
 
  fn_exit:
     /* MPID_nem_tcp_connpoll(); FIXME-Imp should be called? */
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_CONNECT);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_CONNECT);
     return mpi_errno;
  fn_fail:
     if (idx != -1)
@@ -941,17 +941,17 @@ static int cleanup_and_free_sc_plfd(sockconn_t *const sc)
     const int idx = sc->index;
     struct pollfd *const plfd = &MPID_nem_tcp_plfd_tbl[sc->index];
     freenode_t *node;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_CLEANUP_AND_FREE_SC_PLFD);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CLEANUP_AND_FREE_SC_PLFD);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CLEANUP_AND_FREE_SC_PLFD);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CLEANUP_AND_FREE_SC_PLFD);
 
     if (sc == NULL)
         goto fn_exit;
 
     if (sc_vc) {
         MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "about to decr sc_ref_count sc=%p sc->vc=%p sc_ref_count=%d", sc, sc_vc, sc_vc_tcp->sc_ref_count));
-        MPIU_Assert(sc_vc_tcp->sc_ref_count > 0);
+        MPIR_Assert(sc_vc_tcp->sc_ref_count > 0);
         --sc_vc_tcp->sc_ref_count;
     }
     
@@ -966,16 +966,16 @@ static int cleanup_and_free_sc_plfd(sockconn_t *const sc)
     INIT_SC_ENTRY(sc, idx);
     INIT_POLLFD_ENTRY(plfd);
 
-    MPIU_CHKPMEM_MALLOC(node, freenode_t *, sizeof(freenode_t), mpi_errno, "free node");
+    MPIR_CHKPMEM_MALLOC(node, freenode_t *, sizeof(freenode_t), mpi_errno, "free node");
     node->index = idx;
     Q_ENQUEUE(&freeq, node);
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_CLEANUP_AND_FREE_SC_PLFD);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CLEANUP_AND_FREE_SC_PLFD);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
     goto fn_exit;
 }
@@ -993,9 +993,9 @@ int close_cleanup_and_free_sc_plfd(sockconn_t *const sc)
     int mpi_errno = MPI_SUCCESS;
     int mpi_errno2 = MPI_SUCCESS;
     int rc;
-    MPIDI_STATE_DECL(MPID_STATE_CLOSE_CLEANUP_AND_FREE_SC_PLFD);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CLOSE_CLEANUP_AND_FREE_SC_PLFD);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CLOSE_CLEANUP_AND_FREE_SC_PLFD);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CLOSE_CLEANUP_AND_FREE_SC_PLFD);
 
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "vc=%p, sc=%p, closing fd=%d", sc->vc, sc, sc->fd));
 
@@ -1004,13 +1004,13 @@ int close_cleanup_and_free_sc_plfd(sockconn_t *const sc)
 
     CHECK_EINTR(rc, close(sc->fd));
     if (rc == -1 && errno != EAGAIN && errno != EBADF)
-        MPIR_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**close", "**close %s", MPIU_Strerror(errno));
+        MPIR_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**close", "**close %s", MPIR_Strerror(errno));
 
     mpi_errno2 = cleanup_and_free_sc_plfd(sc);
     if (mpi_errno2) MPIR_ERR_ADD(mpi_errno, mpi_errno2);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_CLOSE_CLEANUP_AND_FREE_SC_PLFD);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CLOSE_CLEANUP_AND_FREE_SC_PLFD);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -1030,9 +1030,9 @@ int MPID_nem_tcp_cleanup (struct MPIDI_VC *const vc)
 {
     int mpi_errno = MPI_SUCCESS, i;
     MPID_nem_tcp_vc_area *const vc_tcp = VC_TCP(vc);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CLEANUP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CLEANUP);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_CLEANUP);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_CLEANUP);
 
     if (vc_tcp->sc != NULL) {
         mpi_errno = close_cleanup_and_free_sc_plfd(vc_tcp->sc);
@@ -1047,7 +1047,7 @@ int MPID_nem_tcp_cleanup (struct MPIDI_VC *const vc)
                clean him up he'll end up accessing the about-to-be-freed vc. */
             mpi_errno = close_cleanup_and_free_sc_plfd(&g_sc_tbl[i]);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPIU_Assert(g_sc_tbl[i].vc == NULL);
+            MPIR_Assert(g_sc_tbl[i].vc == NULL);
         }
         ++i;
     }
@@ -1055,10 +1055,10 @@ int MPID_nem_tcp_cleanup (struct MPIDI_VC *const vc)
     /* close_cleanup_and_free_sc_plfd can technically cause a reconnect on a per-sc basis, but I
        don't think that it can happen when cleanup is called.  Let's
        assert this for now and remove it if we prove that it can happen. */
-    MPIU_Assert(vc_tcp->sc_ref_count == 0);
+    MPIR_Assert(vc_tcp->sc_ref_count == 0);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_CLEANUP);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_CLEANUP);
     return mpi_errno;
  fn_fail:
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
@@ -1077,9 +1077,9 @@ int MPID_nem_tcp_ckpt_cleanup(void)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CKPT_CLEANUP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CKPT_CLEANUP);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_CKPT_CLEANUP);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_CKPT_CLEANUP);
 
     for (i = 0; i < MPIDI_Process.my_pg->size; ++i) {
         MPIDI_VC_t *vc;
@@ -1099,7 +1099,7 @@ int MPID_nem_tcp_ckpt_cleanup(void)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_CKPT_CLEANUP);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_CKPT_CLEANUP);
     return mpi_errno;
  fn_fail:
 
@@ -1115,9 +1115,9 @@ static int state_tc_c_cnting_handler(struct pollfd *const plfd, sockconn_t *cons
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_NEM_TCP_SOCK_STATUS_t status;
-    MPIDI_STATE_DECL(MPID_STATE_STATE_TC_C_CNTING_HANDLER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_STATE_TC_C_CNTING_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_STATE_TC_C_CNTING_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_STATE_TC_C_CNTING_HANDLER);
    
     status = MPID_nem_tcp_check_sock_status(plfd);
 
@@ -1141,7 +1141,7 @@ static int state_tc_c_cnting_handler(struct pollfd *const plfd, sockconn_t *cons
         */
     }
 
-    MPIDI_FUNC_EXIT(MPID_STATE_STATE_TC_C_CNTING_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_STATE_TC_C_CNTING_HANDLER);
     return mpi_errno;
 }
 
@@ -1152,9 +1152,9 @@ static int state_tc_c_cnting_handler(struct pollfd *const plfd, sockconn_t *cons
 static int state_tc_c_cntd_handler(struct pollfd *const plfd, sockconn_t *const sc)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_STATE_TC_C_CNTD_HANDLER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_STATE_TC_C_CNTD_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_STATE_TC_C_CNTD_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_STATE_TC_C_CNTD_HANDLER);
 
     if (found_better_sc(sc, NULL)) {
         MPL_DBG_MSG(MPIDI_NEM_TCP_DBG_DET, VERBOSE, "state_tc_c_cntd_handler(): closing sc");
@@ -1186,7 +1186,7 @@ static int state_tc_c_cntd_handler(struct pollfd *const plfd, sockconn_t *const
         /* Remain in the same state */
     }
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_STATE_TC_C_CNTD_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_STATE_TC_C_CNTD_HANDLER);
     return mpi_errno;
  fn_fail:
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
@@ -1204,15 +1204,15 @@ static int state_c_ranksent_handler(struct pollfd *const plfd, sockconn_t *const
     MPID_nem_tcp_vc_area *const sc_vc_tcp = VC_TCP(sc_vc);
     int mpi_errno = MPI_SUCCESS;
     MPIDI_nem_tcp_socksm_pkt_type_t pkt_type;
-    MPIDI_STATE_DECL(MPID_STATE_STATE_C_RANKSENT_HANDLER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_STATE_C_RANKSENT_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_STATE_C_RANKSENT_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_STATE_C_RANKSENT_HANDLER);
 
     if (IS_READABLE(plfd)) {
         mpi_errno = recv_cmd_pkt(sc->fd, &pkt_type);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         
-        MPIU_Assert(pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_ID_ACK ||
+        MPIR_Assert(pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_ID_ACK ||
                     pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_ID_NAK ||
                     pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_CLOSED);
 
@@ -1236,12 +1236,12 @@ static int state_c_ranksent_handler(struct pollfd *const plfd, sockconn_t *const
             mpi_errno = close_cleanup_and_free_sc_plfd(sc); /* QUIESCENT */
             break;
         default:
-            MPIU_Assert(0);
+            MPIR_Assert(0);
         }
     }
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_STATE_C_RANKSENT_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_STATE_C_RANKSENT_HANDLER);
     return mpi_errno;
 fn_fail:
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
@@ -1260,9 +1260,9 @@ static int state_c_tmpvcsent_handler(struct pollfd *const plfd, sockconn_t *cons
     MPID_nem_tcp_vc_area *const sc_vc_tcp = VC_TCP(sc_vc);
     int mpi_errno = MPI_SUCCESS;
     MPIDI_nem_tcp_socksm_pkt_type_t pkt_type;
-    MPIDI_STATE_DECL(MPID_STATE_STATE_C_TMPVCSENT_HANDLER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_STATE_C_TMPVCSENT_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_STATE_C_TMPVCSENT_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_STATE_C_TMPVCSENT_HANDLER);
 
     if (IS_READABLE(plfd)) {
         mpi_errno = recv_cmd_pkt(sc->fd, &pkt_type);
@@ -1279,7 +1279,7 @@ static int state_c_tmpvcsent_handler(struct pollfd *const plfd, sockconn_t *cons
                packet then there really was a problem */
         }
         else {
-            MPIU_Assert(pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_TMPVC_ACK ||
+            MPIR_Assert(pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_TMPVC_ACK ||
                         pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_TMPVC_NAK);
 
             if (pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_TMPVC_ACK) {
@@ -1297,7 +1297,7 @@ static int state_c_tmpvcsent_handler(struct pollfd *const plfd, sockconn_t *cons
         }
     }
 
-    MPIDI_FUNC_EXIT(MPID_STATE_STATE_C_TMPVCSENT_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_STATE_C_TMPVCSENT_HANDLER);
     return mpi_errno;
 }
 
@@ -1310,9 +1310,9 @@ static int state_l_cntd_handler(struct pollfd *const plfd, sockconn_t *const sc)
     int mpi_errno = MPI_SUCCESS;
     MPID_NEM_TCP_SOCK_STATUS_t status;
     int got_sc_eof = 0;
-    MPIDI_STATE_DECL(MPID_STATE_STATE_L_CNTD_HANDLER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_STATE_L_CNTD_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_STATE_L_CNTD_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_STATE_L_CNTD_HANDLER);
 
     status = MPID_nem_tcp_check_sock_status(plfd);
     if (status == MPID_NEM_TCP_SOCK_ERROR_EOF) {
@@ -1347,7 +1347,7 @@ static int state_l_cntd_handler(struct pollfd *const plfd, sockconn_t *const sc)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_STATE_L_CNTD_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_STATE_L_CNTD_HANDLER);
     return mpi_errno;
  fn_fail:
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
@@ -1367,11 +1367,11 @@ static int state_l_cntd_handler(struct pollfd *const plfd, sockconn_t *const sc)
 static int do_i_win(sockconn_t *rmt_sc)
 {
     int win = FALSE;
-    MPIDI_STATE_DECL(MPID_STATE_DO_I_WIN);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_DO_I_WIN);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_DO_I_WIN);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_DO_I_WIN);
 
-    MPIU_Assert(rmt_sc->pg_is_set);
+    MPIR_Assert(rmt_sc->pg_is_set);
 
     if (rmt_sc->is_same_pg) {
         if (MPIDI_Process.my_pg_rank > rmt_sc->pg_rank)
@@ -1387,7 +1387,7 @@ static int do_i_win(sockconn_t *rmt_sc)
                       rmt_sc, CONN_STATE_STR[rmt_sc->state.cstate],
                       (win ? "TRUE" : "FALSE"),(rmt_sc->is_same_pg ? "TRUE" : "FALSE"), MPIDI_Process.my_pg_rank,
                       rmt_sc->pg_rank));
-    MPIDI_FUNC_EXIT(MPID_STATE_DO_I_WIN);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_DO_I_WIN);
     return win;
 }
 
@@ -1403,9 +1403,9 @@ static int state_l_rankrcvd_handler(struct pollfd *const plfd, sockconn_t *const
     MPID_NEM_TCP_SOCK_STATUS_t status;
     sockconn_t *fnd_sc = NULL;
     int snd_nak = FALSE;
-    MPIDI_STATE_DECL(MPID_STATE_STATE_L_RANKRCVD_HANDLER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_STATE_L_RANKRCVD_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_STATE_L_RANKRCVD_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_STATE_L_RANKRCVD_HANDLER);
 
     status = MPID_nem_tcp_check_sock_status(plfd);
     if (status == MPID_NEM_TCP_SOCK_ERROR_EOF)
@@ -1453,7 +1453,7 @@ static int state_l_rankrcvd_handler(struct pollfd *const plfd, sockconn_t *const
     }
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_STATE_L_RANKRCVD_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_STATE_L_RANKRCVD_HANDLER);
     return mpi_errno;
 fn_fail:
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
@@ -1472,9 +1472,9 @@ static int state_l_tmpvcrcvd_handler(struct pollfd *const plfd, sockconn_t *cons
     MPID_nem_tcp_vc_area *const sc_vc_tcp = VC_TCP(sc_vc);
     int mpi_errno = MPI_SUCCESS;
     MPID_NEM_TCP_SOCK_STATUS_t status;
-    MPIDI_STATE_DECL(MPID_STATE_STATE_L_TMPVCRCVD_HANDLER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_STATE_L_TMPVCRCVD_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_STATE_L_TMPVCRCVD_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_STATE_L_TMPVCRCVD_HANDLER);
 
     status = MPID_nem_tcp_check_sock_status(plfd);
     if (status == MPID_NEM_TCP_SOCK_ERROR_EOF) {
@@ -1495,7 +1495,7 @@ static int state_l_tmpvcrcvd_handler(struct pollfd *const plfd, sockconn_t *cons
     }
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_STATE_L_TMPVCRCVD_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_STATE_L_TMPVCRCVD_HANDLER);
     return mpi_errno;
  fn_fail:
     {
@@ -1522,12 +1522,12 @@ static int MPID_nem_tcp_recv_handler(sockconn_t *const sc)
        that. */
     const int sc_fd = sc->fd;
     MPIDI_VC_t *const sc_vc = sc->vc;
-    MPIU_AssertDeclValue(MPID_nem_tcp_vc_area *const sc_vc_tcp, VC_TCP(sc_vc));
+    MPIR_AssertDeclValue(MPID_nem_tcp_vc_area *const sc_vc_tcp, VC_TCP(sc_vc));
     int mpi_errno = MPI_SUCCESS;
     ssize_t bytes_recvd;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_RECV_HANDLER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_RECV_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_RECV_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_RECV_HANDLER);
 
     if (sc_vc->ch.recv_active == NULL)
     {
@@ -1540,10 +1540,10 @@ static int MPID_nem_tcp_recv_handler(sockconn_t *const sc)
 
             if (bytes_recvd == 0)
             {
-                MPIU_Assert(sc != NULL);
-                MPIU_Assert(sc_vc != NULL);
+                MPIR_Assert(sc != NULL);
+                MPIR_Assert(sc_vc != NULL);
                 /* sc->vc->sc will be NULL if sc->vc->state == _INACTIVE */
-                MPIU_Assert(sc_vc_tcp->sc == NULL || sc_vc_tcp->sc == sc);
+                MPIR_Assert(sc_vc_tcp->sc == NULL || sc_vc_tcp->sc == sc);
 
                 if (vc_is_in_shutdown(sc_vc))
                 {
@@ -1567,7 +1567,7 @@ static int MPID_nem_tcp_recv_handler(sockconn_t *const sc)
             }
             else
             {
-                MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIU_Strerror(errno));
+                MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIR_Strerror(errno));
             }
         }
     
@@ -1584,8 +1584,8 @@ static int MPID_nem_tcp_recv_handler(sockconn_t *const sc)
         MPL_IOV *iov = &rreq->dev.iov[rreq->dev.iov_offset];
         int (*reqFn)(MPIDI_VC_t *, MPIR_Request *, int *);
 
-        MPIU_Assert(rreq->dev.iov_count > 0);
-        MPIU_Assert(rreq->dev.iov_count + rreq->dev.iov_offset <= MPL_IOV_LIMIT);
+        MPIR_Assert(rreq->dev.iov_count > 0);
+        MPIR_Assert(rreq->dev.iov_count + rreq->dev.iov_offset <= MPL_IOV_LIMIT);
 
         bytes_recvd = MPL_large_readv(sc_fd, iov, rreq->dev.iov_count);
         if (bytes_recvd <= 0)
@@ -1596,7 +1596,7 @@ static int MPID_nem_tcp_recv_handler(sockconn_t *const sc)
             if (bytes_recvd == 0) {
                 MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**sock_closed");
             } else {
-                MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIU_Strerror(errno));
+                MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIR_Strerror(errno));
             }
         }
 
@@ -1624,7 +1624,7 @@ static int MPID_nem_tcp_recv_handler(sockconn_t *const sc)
         reqFn = rreq->dev.OnDataAvail;
         if (!reqFn)
         {
-            MPIU_Assert(MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_GET_RESP);
+            MPIR_Assert(MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_GET_RESP);
             mpi_errno = MPID_Request_complete(rreq);
             if (mpi_errno != MPI_SUCCESS) {
                 MPIR_ERR_POP(mpi_errno);
@@ -1652,7 +1652,7 @@ static int MPID_nem_tcp_recv_handler(sockconn_t *const sc)
     }
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_RECV_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_RECV_HANDLER);
     return mpi_errno;
 fn_fail: /* comm related failures jump here */
     {
@@ -1680,9 +1680,9 @@ static int state_commrdy_handler(struct pollfd *const plfd, sockconn_t *const sc
        invalid. */
     MPIDI_VC_t *sc_vc = sc->vc;
     MPID_nem_tcp_vc_area *sc_vc_tcp = VC_TCP(sc_vc);
-    MPIDI_STATE_DECL(MPID_STATE_STATE_COMMRDY_HANDLER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_STATE_COMMRDY_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_STATE_COMMRDY_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_STATE_COMMRDY_HANDLER);
 
     MPL_DBG_MSG_P(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "vc = %p", sc->vc);
     if (IS_READABLE(plfd))
@@ -1702,7 +1702,7 @@ static int state_commrdy_handler(struct pollfd *const plfd, sockconn_t *const sc
         }
     }
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_STATE_COMMRDY_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_STATE_COMMRDY_HANDLER);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -1716,7 +1716,7 @@ static int state_commrdy_handler(struct pollfd *const plfd, sockconn_t *const sc
 int MPID_nem_tcp_sm_init(void)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_DECL(1);
     /* Set the appropriate handlers */
     sc_state_info[CONN_STATE_TS_CLOSED].sc_state_handler = NULL;
     sc_state_info[CONN_STATE_TC_C_CNTING].sc_state_handler = state_tc_c_cnting_handler;
@@ -1746,13 +1746,13 @@ int MPID_nem_tcp_sm_init(void)
     MPID_nem_tcp_plfd_tbl = NULL;
     alloc_sc_plfd_tbls();
     
-    MPIU_CHKPMEM_MALLOC(recv_buf, char*, MPID_NEM_TCP_RECV_MAX_PKT_LEN, mpi_errno, "TCP temporary buffer");
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_MALLOC(recv_buf, char*, MPID_NEM_TCP_RECV_MAX_PKT_LEN, mpi_errno, "TCP temporary buffer");
+    MPIR_CHKPMEM_COMMIT();
 
  fn_exit:
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -1798,8 +1798,8 @@ int MPID_nem_tcp_connpoll(int in_blocking_poll)
     int num_polled = g_tbl_size;
 
     if (num_polled) {
-        MPIU_Assert(MPID_nem_tcp_plfd_tbl != NULL);
-        MPIU_Assert(g_sc_tbl != NULL);
+        MPIR_Assert(MPID_nem_tcp_plfd_tbl != NULL);
+        MPIR_Assert(g_sc_tbl != NULL);
     }
 
     /* To improve shared memory performance, we don't call the poll()
@@ -1813,7 +1813,7 @@ int MPID_nem_tcp_connpoll(int in_blocking_poll)
     num_skipped_polls = 0;
 
     CHECK_EINTR(n, poll(MPID_nem_tcp_plfd_tbl, num_polled, 0));
-    MPIR_ERR_CHKANDJUMP1(n == -1, mpi_errno, MPI_ERR_OTHER, "**poll", "**poll %s", MPIU_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP1(n == -1, mpi_errno, MPI_ERR_OTHER, "**poll", "**poll %s", MPIR_Strerror(errno));
     /* MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "some sc fd poll event")); */
     for(i = 0; i < num_polled; i++)
     {
@@ -1834,7 +1834,7 @@ int MPID_nem_tcp_connpoll(int in_blocking_poll)
                    (Stevens Network Programming Vol 1, pg 184) */
                 rc = read(it_plfd->fd, &dummy, 1);
                 if (rc < 0)
-                    err_str = MPIU_Strerror(errno);
+                    err_str = MPIR_Strerror(errno);
                 
                 MPL_DBG_MSG(MPIDI_NEM_TCP_DBG_DET, VERBOSE, "error polling fd, closing sc");
                 if (it_sc->vc) {
@@ -1902,22 +1902,22 @@ int MPID_nem_tcp_state_listening_handler(struct pollfd *const unused_1, sockconn
     socklen_t len;
     SA_IN rmt_addr;
     sockconn_t *l_sc;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_STATE_LISTENING_HANDLER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_STATE_LISTENING_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_STATE_LISTENING_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_STATE_LISTENING_HANDLER);
 
     while (1) {
         l_sc = &g_sc_tbl[0];  /* N3 Important */
         len = sizeof(SA_IN);
         MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "before accept"));
         if ((connfd = accept(l_sc->fd, (SA *) &rmt_addr, &len)) < 0) {
-            MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "after accept, l_sc=%p lstnfd=%d connfd=%d, errno=%d:%s ", l_sc, l_sc->fd, connfd, errno, MPIU_Strerror(errno)));
+            MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "after accept, l_sc=%p lstnfd=%d connfd=%d, errno=%d:%s ", l_sc, l_sc->fd, connfd, errno, MPIR_Strerror(errno)));
             if (errno == EINTR) 
                 continue;
             else if (errno == EWOULDBLOCK || errno == EAGAIN)
                 break; /*  no connection in the listen queue. get out of here.(N1) */
 
-            MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**sock_accept", "**sock_accept %s", MPIU_Strerror(errno));
+            MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**sock_accept", "**sock_accept %s", MPIR_Strerror(errno));
         }
         else {
             int idx = -1;
@@ -1942,7 +1942,7 @@ int MPID_nem_tcp_state_listening_handler(struct pollfd *const unused_1, sockconn
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_STATE_LISTENING_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_STATE_LISTENING_HANDLER);
     return mpi_errno;
  fn_fail:
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
@@ -1960,9 +1960,9 @@ static int error_closed(struct MPIDI_VC *const vc, int req_errno)
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_nem_tcp_vc_area * const vc_tcp = VC_TCP(vc);
-    MPIDI_STATE_DECL(MPID_STATE_ERROR_CLOSED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_ERROR_CLOSED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_ERROR_CLOSED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_ERROR_CLOSED);
 
     vc_tcp->state = MPID_NEM_TCP_VC_STATE_ERROR;
 
@@ -1973,7 +1973,7 @@ static int error_closed(struct MPIDI_VC *const vc, int req_errno)
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_ERROR_CLOSED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_ERROR_CLOSED);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -1990,9 +1990,9 @@ int MPID_nem_tcp_cleanup_on_error(MPIDI_VC_t *const vc, int req_errno)
 {
     int mpi_errno = MPI_SUCCESS;
     int mpi_errno2 = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CLEANUP_ON_ERROR);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CLEANUP_ON_ERROR);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_CLEANUP_ON_ERROR);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_CLEANUP_ON_ERROR);
 
     mpi_errno = MPID_nem_tcp_cleanup(vc);
     /* not jumping on error, keep going */
@@ -2001,7 +2001,7 @@ int MPID_nem_tcp_cleanup_on_error(MPIDI_VC_t *const vc, int req_errno)
     if (mpi_errno2) MPIR_ERR_ADD(mpi_errno, mpi_errno2);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_CLEANUP_ON_ERROR);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_CLEANUP_ON_ERROR);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_ckpt.c b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_ckpt.c
index 7f91c72..2193b2a 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_ckpt.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_ckpt.c
@@ -19,14 +19,14 @@ int MPID_nem_tcp_ckpt_pause_send_vc(MPIDI_VC_t *vc)
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(vc);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CKPT_PAUSE_SEND_VC);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CKPT_PAUSE_SEND_VC);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_CKPT_PAUSE_SEND_VC);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_CKPT_PAUSE_SEND_VC);
 
     vc_tcp->send_paused = TRUE;
     
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_CKPT_PAUSE_SEND_VC);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_CKPT_PAUSE_SEND_VC);
     return mpi_errno;
 fn_fail:
 
@@ -41,9 +41,9 @@ int MPID_nem_tcp_pkt_unpause_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intpt
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(vc);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CKPT_UNPAUSE_HANDLER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CKPT_UNPAUSE_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_CKPT_UNPAUSE_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_CKPT_UNPAUSE_HANDLER);
 
     vc_tcp->send_paused = FALSE;
 
@@ -61,7 +61,7 @@ int MPID_nem_tcp_pkt_unpause_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intpt
 fn_exit:
     *buflen = sizeof(MPIDI_CH3_Pkt_t);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_CKPT_UNPAUSE_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_CKPT_UNPAUSE_HANDLER);
     return mpi_errno;
 fn_fail:
 
@@ -77,9 +77,9 @@ int MPID_nem_tcp_ckpt_continue_vc(MPIDI_VC_t *vc)
     int mpi_errno = MPI_SUCCESS;
     MPID_PKT_DECL_CAST(upkt, MPIDI_nem_tcp_pkt_unpause_t, unpause_pkt);
     MPIR_Request *unpause_req;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CKPT_CONTINUE_VC);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CKPT_CONTINUE_VC);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_CKPT_CONTINUE_VC);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_CKPT_CONTINUE_VC);
 
     unpause_pkt->type = MPIDI_NEM_PKT_NETMOD;
     unpause_pkt->subtype = MPIDI_NEM_TCP_PKT_UNPAUSE;
@@ -93,7 +93,7 @@ int MPID_nem_tcp_ckpt_continue_vc(MPIDI_VC_t *vc)
     }
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_CKPT_CONTINUE_VC);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_CKPT_CONTINUE_VC);
     return mpi_errno;
 fn_fail:
 
@@ -112,9 +112,9 @@ int MPID_nem_tcp_ckpt_restart_vc(MPIDI_VC_t *vc)
     MPIDI_CH3_Pkt_t upkt;
     MPIDI_nem_tcp_pkt_unpause_t * const pkt = (MPIDI_nem_tcp_pkt_unpause_t *)&upkt;
     MPIR_Request *sreq;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CKPT_RESTART_VC);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CKPT_RESTART_VC);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_CKPT_RESTART_VC);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_CKPT_RESTART_VC);
 
     pkt->type = MPIDI_NEM_PKT_NETMOD;
     pkt->subtype = MPIDI_NEM_TCP_PKT_UNPAUSE;
@@ -132,7 +132,7 @@ int MPID_nem_tcp_ckpt_restart_vc(MPIDI_VC_t *vc)
     }
     
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_CKPT_RESTART_VC);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_CKPT_RESTART_VC);
     return mpi_errno;
 fn_fail:
 
diff --git a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_finalize.c b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_finalize.c
index a612c48..9569434 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_finalize.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_finalize.c
@@ -14,9 +14,9 @@ int MPID_nem_tcp_finalize(void)
 {
     int mpi_errno = MPI_SUCCESS;
     int ret;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_FINALIZE);
 
     mpi_errno = MPID_nem_tcp_send_finalize();
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -26,11 +26,11 @@ int MPID_nem_tcp_finalize(void)
     if (MPID_nem_tcp_g_lstn_sc.fd)
     {
         CHECK_EINTR (ret, close(MPID_nem_tcp_g_lstn_sc.fd));
-        MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**closesocket", "**closesocket %s %d", errno, MPIU_Strerror (errno));
+        MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**closesocket", "**closesocket %s %d", errno, MPIR_Strerror (errno));
     }
         
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_FINALIZE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_getip.c b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_getip.c
index f3464fc..3835971 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_getip.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_getip.c
@@ -71,9 +71,9 @@ int MPIDI_GetIPInterface( MPIDU_Sock_ifaddr_t *ifaddr, int *found )
 #ifdef WORDS_BIGENDIAN
     unsigned int MSBlocalhost = 0x7f000001;
 #endif
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_GETIPINTERFACE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_GETIPINTERFACE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_GETIPINTERFACE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_GETIPINTERFACE);
     *found = 0;
 
     fd = socket(AF_INET, SOCK_DGRAM, 0);
@@ -113,7 +113,7 @@ int MPIDI_GetIPInterface( MPIDU_Sock_ifaddr_t *ifaddr, int *found )
 	rc = ioctl(fd, SIOCGIFCONF, &ifconf);
 	if (rc < 0) {
 	    if (errno != EINVAL || buf_len_prev != 0) {
-		MPIR_ERR_SETANDJUMP2(mpi_errno, MPI_ERR_OTHER, "**ioctl", "**ioctl %d %s", errno, MPIU_Strerror(errno));
+		MPIR_ERR_SETANDJUMP2(mpi_errno, MPI_ERR_OTHER, "**ioctl", "**ioctl %d %s", errno, MPIR_Strerror(errno));
 	    }
 	}
         else {
@@ -167,14 +167,14 @@ int MPIDI_GetIPInterface( MPIDU_Sock_ifaddr_t *ifaddr, int *found )
 		if (nfound == 0) {
 		    myifaddr.type = AF_INET;
 		    myifaddr.len  = 4;
-		    MPIU_Memcpy( myifaddr.ifaddr, &addr.s_addr, 4 );
+		    MPIR_Memcpy( myifaddr.ifaddr, &addr.s_addr, 4 );
 		}
 	    }
 	    else {
 		nfound++;
 		myifaddr.type = AF_INET;
 		myifaddr.len  = 4;
-		MPIU_Memcpy( myifaddr.ifaddr, &addr.s_addr, 4 );
+		MPIR_Memcpy( myifaddr.ifaddr, &addr.s_addr, 4 );
 	    }
 	}
 	else {
@@ -216,7 +216,7 @@ fn_exit:
     if (fd >= 0)
         close(fd);
     
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_GETIPINTERFACE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_GETIPINTERFACE);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -254,14 +254,14 @@ int MPIDI_Get_IP_for_iface(const char *ifname, MPIDU_Sock_ifaddr_t *ifaddr, int
         *found = FALSE;
 
     fd = socket(AF_INET, SOCK_DGRAM, 0);
-    MPIR_ERR_CHKANDJUMP2(fd < 0, mpi_errno, MPI_ERR_OTHER, "**sock_create", "**sock_create %s %d", MPIU_Strerror(errno), errno);
+    MPIR_ERR_CHKANDJUMP2(fd < 0, mpi_errno, MPI_ERR_OTHER, "**sock_create", "**sock_create %s %d", MPIR_Strerror(errno), errno);
     ifr.ifr_addr.sa_family = AF_INET; /* just IPv4 for now */
     MPL_strncpy(ifr.ifr_name, ifname, IFNAMSIZ-1);
     ret = ioctl(fd, SIOCGIFADDR, &ifr);
-    MPIR_ERR_CHKANDJUMP2(ret < 0, mpi_errno, MPI_ERR_OTHER, "**ioctl", "**ioctl %d %s", errno, MPIU_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP2(ret < 0, mpi_errno, MPI_ERR_OTHER, "**ioctl", "**ioctl %d %s", errno, MPIR_Strerror(errno));
 
     *found = TRUE;
-    MPIU_Memcpy(ifaddr->ifaddr, &((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr, 4);
+    MPIR_Memcpy(ifaddr->ifaddr, &((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr, 4);
     ifaddr->len = 4;
     ifaddr->type = AF_INET;
 
@@ -269,7 +269,7 @@ fn_exit:
     if (fd != -1) {
         ret = close(fd);
         if (ret < 0) {
-            MPIR_ERR_SET2(mpi_errno, MPI_ERR_OTHER, "**sock_close", "**sock_close %s %d", MPIU_Strerror(errno), errno);
+            MPIR_ERR_SET2(mpi_errno, MPI_ERR_OTHER, "**sock_close", "**sock_close %s %d", MPIR_Strerror(errno), errno);
         }
     }
     return mpi_errno;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_init.c b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_init.c
index 4fbcef7..fd5e5a1 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_init.c
@@ -122,12 +122,12 @@ static int set_up_listener(void)
 {
     int mpi_errno = MPI_SUCCESS;
     int ret;
-    MPIDI_STATE_DECL(MPID_STATE_SET_UP_LISTENER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SET_UP_LISTENER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_SET_UP_LISTENER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_SET_UP_LISTENER);
 
     MPID_nem_tcp_g_lstn_plfd.fd = MPID_nem_tcp_g_lstn_sc.fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
-    MPIR_ERR_CHKANDJUMP2(MPID_nem_tcp_g_lstn_sc.fd == -1, mpi_errno, MPI_ERR_OTHER, "**sock_create", "**sock_create %s %d", MPIU_Strerror(errno), errno);
+    MPIR_ERR_CHKANDJUMP2(MPID_nem_tcp_g_lstn_sc.fd == -1, mpi_errno, MPI_ERR_OTHER, "**sock_create", "**sock_create %s %d", MPIR_Strerror(errno), errno);
 
     mpi_errno = MPID_nem_tcp_set_sockopts(MPID_nem_tcp_g_lstn_sc.fd);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -137,12 +137,12 @@ static int set_up_listener(void)
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
     ret = listen(MPID_nem_tcp_g_lstn_sc.fd, SOMAXCONN);
-    MPIR_ERR_CHKANDJUMP2(ret == -1, mpi_errno, MPI_ERR_OTHER, "**listen", "**listen %s %d", MPIU_Strerror(errno), errno);  
+    MPIR_ERR_CHKANDJUMP2(ret == -1, mpi_errno, MPI_ERR_OTHER, "**listen", "**listen %s %d", MPIR_Strerror(errno), errno);
     MPID_nem_tcp_g_lstn_sc.state.lstate = LISTEN_STATE_LISTENING;
     MPID_nem_tcp_g_lstn_sc.handler = MPID_nem_tcp_state_listening_handler;
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_SET_UP_LISTENER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_SET_UP_LISTENER);
     return mpi_errno;
 fn_fail:
 
@@ -156,14 +156,14 @@ fn_fail:
 int MPID_nem_tcp_init (MPIDI_PG_t *pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_INIT);
 
     MPID_nem_net_module_vc_dbg_print_sendq = MPID_nem_tcp_vc_dbg_print_sendq;
 
     /* first make sure that our private fields in the vc fit into the area provided  */
-    MPIU_Assert(sizeof(MPID_nem_tcp_vc_area) <= MPIDI_NEM_VC_NETMOD_AREA_LEN);
+    MPIR_Assert(sizeof(MPID_nem_tcp_vc_area) <= MPIDI_NEM_VC_NETMOD_AREA_LEN);
 
 #if defined (MPL_USE_DBG_LOGGING)
     MPIDI_NEM_TCP_DBG_DET = MPL_dbg_class_alloc("MPIDI_NEM_TCP_DBG_DET", "nem_sock_det");
@@ -192,17 +192,17 @@ int MPID_nem_tcp_init (MPIDI_PG_t *pg_p, int pg_rank, char **bc_val_p, int *val_
         void *ret;
 
         ret = signal(SIGPIPE, SIG_IGN);
-        MPIR_ERR_CHKANDJUMP1(ret == SIG_ERR, mpi_errno, MPI_ERR_OTHER, "**signal", "**signal %s", MPIU_Strerror(errno));
+        MPIR_ERR_CHKANDJUMP1(ret == SIG_ERR, mpi_errno, MPI_ERR_OTHER, "**signal", "**signal %s", MPIR_Strerror(errno));
         if (ret != SIG_DFL && ret != SIG_IGN) {
             /* The app has set its own signal handler.  Replace the previous handler. */
             ret = signal(SIGPIPE, ret);
-            MPIR_ERR_CHKANDJUMP1(ret == SIG_ERR, mpi_errno, MPI_ERR_OTHER, "**signal", "**signal %s", MPIU_Strerror(errno));
+            MPIR_ERR_CHKANDJUMP1(ret == SIG_ERR, mpi_errno, MPI_ERR_OTHER, "**signal", "**signal %s", MPIR_Strerror(errno));
         }
     }
 #endif
     
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_INIT);
 /*     fprintf(stdout, FCNAME " Exit\n"); fflush(stdout); */
     return mpi_errno;
  fn_fail:
@@ -223,9 +223,9 @@ static int ckpt_restart(void)
     char *bc_val          = NULL;
     int val_max_sz;
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_CKPT_RESTART);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CKPT_RESTART);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CKPT_RESTART);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CKPT_RESTART);
 
     /* First, clean up.  We didn't shut anything down before the
        checkpoint, so we need to go close and free any resources */
@@ -265,7 +265,7 @@ static int ckpt_restart(void)
     
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_CKPT_RESTART);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CKPT_RESTART);
     return mpi_errno;
 fn_fail:
 
@@ -301,7 +301,7 @@ static int GetSockInterfaceAddr(int myRank, char *ifname, int maxIfname,
     int mpi_errno = MPI_SUCCESS;
     int ifaddrFound = 0;
 
-    MPIU_Assert(maxIfname);
+    MPIR_Assert(maxIfname);
     ifname[0] = '\0';
 
     MPIR_ERR_CHKANDJUMP(MPIR_CVAR_CH3_INTERFACE_HOSTNAME && MPIR_CVAR_NEMESIS_TCP_NETWORK_IFACE, mpi_errno, MPI_ERR_OTHER, "**ifname_and_hostname");
@@ -393,7 +393,7 @@ static int GetSockInterfaceAddr(int myRank, char *ifname, int maxIfname,
             ifaddr->type = -1;
             MPIR_ERR_INTERNAL(mpi_errno, "Address too long to fit in field");
         } else {
-            MPIU_Memcpy( ifaddr->ifaddr, info->h_addr_list[0], ifaddr->len );
+            MPIR_Memcpy( ifaddr->ifaddr, info->h_addr_list[0], ifaddr->len );
 	}
     }
 
@@ -417,9 +417,9 @@ int MPID_nem_tcp_get_business_card (int my_rank, char **bc_val_p, int *val_max_s
     int ret;
     struct sockaddr_in sock_id;
     socklen_t len;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_GET_BUSINESS_CARD);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_GET_BUSINESS_CARD);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_GET_BUSINESS_CARD);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_GET_BUSINESS_CARD);
     
     mpi_errno = GetSockInterfaceAddr(my_rank, ifname, sizeof(ifname), &ifaddr);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -433,7 +433,7 @@ int MPID_nem_tcp_get_business_card (int my_rank, char **bc_val_p, int *val_max_s
 
     len = sizeof(sock_id);
     ret = getsockname (MPID_nem_tcp_g_lstn_sc.fd, (struct sockaddr *)&sock_id, &len);
-    MPIR_ERR_CHKANDJUMP1 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**getsockname", "**getsockname %s", MPIU_Strerror (errno));
+    MPIR_ERR_CHKANDJUMP1 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**getsockname", "**getsockname %s", MPIR_Strerror (errno));
 
     str_errno = MPL_str_add_int_arg (bc_val_p, val_max_sz_p, MPIDI_CH3I_PORT_KEY, ntohs(sock_id.sin_port));
     if (str_errno) {
@@ -459,7 +459,7 @@ int MPID_nem_tcp_get_business_card (int my_rank, char **bc_val_p, int *val_max_s
 
  fn_exit:
 /*     fprintf(stdout, "MPID_nem_tcp_get_business_card Exit, mpi_errno=%d\n", mpi_errno); fflush(stdout); */
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_GET_BUSINESS_CARD);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_GET_BUSINESS_CARD);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -474,9 +474,9 @@ int MPID_nem_tcp_connect_to_root (const char *business_card, MPIDI_VC_t *new_vc)
     int mpi_errno = MPI_SUCCESS;
     struct in_addr addr;
     MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(new_vc);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CONNECT_TO_ROOT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CONNECT_TO_ROOT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_CONNECT_TO_ROOT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_CONNECT_TO_ROOT);
 
     /* vc is already allocated before reaching this point */
 
@@ -490,7 +490,7 @@ int MPID_nem_tcp_connect_to_root (const char *business_card, MPIDI_VC_t *new_vc)
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_CONNECT_TO_ROOT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_CONNECT_TO_ROOT);
     return mpi_errno;
 
  fn_fail:
@@ -506,9 +506,9 @@ int MPID_nem_tcp_vc_init (MPIDI_VC_t *vc)
     int mpi_errno = MPI_SUCCESS;
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
     MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(vc);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_VC_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_VC_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_VC_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_VC_INIT);
 
     vc_tcp->state = MPID_NEM_TCP_VC_STATE_DISCONNECTED;
     
@@ -542,7 +542,7 @@ int MPID_nem_tcp_vc_init (MPIDI_VC_t *vc)
     
     vc_tcp->connect_retry_count = 0;
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_VC_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_VC_INIT);
     return mpi_errno;
 }
 
@@ -576,9 +576,9 @@ int MPID_nem_tcp_get_addr_port_from_bc(const char *business_card, struct in_addr
     int port_int;
     /*char desc_str[256];*/
     char ifname[256];
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_GET_ADDR_PORT_FROM_BC);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_GET_ADDR_PORT_FROM_BC);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_GET_ADDR_PORT_FROM_BC);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_GET_ADDR_PORT_FROM_BC);
     
     /*     fprintf(stdout, FCNAME " Enter\n"); fflush(stdout); */
     /* desc_str is only used for debugging
@@ -592,7 +592,7 @@ int MPID_nem_tcp_get_addr_port_from_bc(const char *business_card, struct in_addr
     /* MPL_STR_FAIL is not a valid MPI error code so we store the result in ret
      * instead of mpi_errno. */
     MPIR_ERR_CHKANDJUMP (ret != MPL_STR_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**argstr_missingport");
-    MPIU_Assert((port_int >> (8*sizeof(*port))) == 0); /* ensure port_int isn't too large for *port */
+    MPIR_Assert((port_int >> (8*sizeof(*port))) == 0); /* ensure port_int isn't too large for *port */
     *port = htons((in_port_t)port_int);
 
     ret = MPL_str_get_string_arg(business_card, MPIDI_CH3I_IFNAME_KEY, ifname, sizeof(ifname));
@@ -604,7 +604,7 @@ int MPID_nem_tcp_get_addr_port_from_bc(const char *business_card, struct in_addr
     
  fn_exit:
 /*     fprintf(stdout, FCNAME " Exit\n"); fflush(stdout); */
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_GET_ADDR_PORT_FROM_BC);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_GET_ADDR_PORT_FROM_BC);
     return mpi_errno;
  fn_fail:
 /*     fprintf(stdout, "failure. mpi_errno = %d\n", mpi_errno); */
@@ -625,9 +625,9 @@ int MPID_nem_tcp_bind (int sockfd)
     int ret;
     struct sockaddr_in sin;
     int port;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_BIND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_BIND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_BIND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_BIND);
    
     MPIR_ERR_CHKANDJUMP(MPIR_CVAR_CH3_PORT_RANGE.low < 0 || MPIR_CVAR_CH3_PORT_RANGE.low > MPIR_CVAR_CH3_PORT_RANGE.high, mpi_errno, MPI_ERR_OTHER, "**badportrange");
 
@@ -645,16 +645,16 @@ int MPID_nem_tcp_bind (int sockfd)
             break;
         
         /* check for real error */
-        MPIR_ERR_CHKANDJUMP3 (errno != EADDRINUSE && errno != EADDRNOTAVAIL, mpi_errno, MPI_ERR_OTHER, "**sock|poll|bind", "**sock|poll|bind %d %d %s", port, errno, MPIU_Strerror (errno));
+        MPIR_ERR_CHKANDJUMP3 (errno != EADDRINUSE && errno != EADDRNOTAVAIL, mpi_errno, MPI_ERR_OTHER, "**sock|poll|bind", "**sock|poll|bind %d %d %s", port, errno, MPIR_Strerror (errno));
     }
     /* check if an available port was found */
-    MPIR_ERR_CHKANDJUMP3 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**sock|poll|bind", "**sock|poll|bind %d %d %s", port-1, errno, MPIU_Strerror (errno));
+    MPIR_ERR_CHKANDJUMP3 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**sock|poll|bind", "**sock|poll|bind %d %d %s", port-1, errno, MPIR_Strerror (errno));
 
  fn_exit:
 /*     if (ret == 0) */
 /*         fprintf(stdout, "sockfd=%d  port=%d bound\n", sockfd, port); */
 /*     fprintf(stdout, FCNAME " Exit\n"); fflush(stdout); */
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_BIND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_BIND);
     return mpi_errno;
  fn_fail:
 /*     fprintf(stdout, "failure. mpi_errno = %d\n", mpi_errno); */
@@ -670,9 +670,9 @@ int MPID_nem_tcp_vc_terminate(MPIDI_VC_t *vc)
 {
     int mpi_errno = MPI_SUCCESS;
     int req_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_VC_TERMINATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_VC_TERMINATE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_VC_TERMINATE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_VC_TERMINATE);
 
     if (vc->state != MPIDI_VC_STATE_CLOSED) {
         /* VC is terminated as a result of a fault.  Complete
@@ -699,7 +699,7 @@ int MPID_nem_tcp_vc_terminate(MPIDI_VC_t *vc)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_VC_TERMINATE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_VC_TERMINATE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -715,9 +715,9 @@ int MPID_nem_tcp_vc_terminated(MPIDI_VC_t *vc)
     /* This is called when the VC is to be terminated once all queued
        sends have been sent. */
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_NEM_TCP_VC_TERMINATED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_NEM_TCP_VC_TERMINATED);
 
-    MPIDI_FUNC_ENTER(MPID_NEM_TCP_VC_TERMINATED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_NEM_TCP_VC_TERMINATED);
 
     mpi_errno = MPID_nem_tcp_cleanup(vc);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -726,7 +726,7 @@ int MPID_nem_tcp_vc_terminated(MPIDI_VC_t *vc)
     if(mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_NEM_TCP_VC_TERMINATED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_NEM_TCP_VC_TERMINATED);
     return mpi_errno;
  fn_fail:
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
diff --git a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_queue.h b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_queue.h
index f20f4a9..6b13e5e 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_queue.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_queue.h
@@ -19,7 +19,7 @@
 #define GENERIC_Q_HEAD(q) ((q).head)
 
 #define GENERIC_Q_ENQUEUE_EMPTY(qp, ep, next_field) do {        \
-        MPIU_Assert (GENERIC_Q_EMPTY (*(qp)));                  \
+        MPIR_Assert (GENERIC_Q_EMPTY (*(qp)));                  \
         (qp)->head = (qp)->tail = ep;                           \
         (ep)->next_field = NULL;                                \
         PRINT_QUEUE (qp, next_field);                           \
@@ -40,7 +40,7 @@
    tail of a linked list of elements.  The list is inserted on the end
    of the queue. */
 #define GENERIC_Q_ENQUEUE_EMPTY_MULTIPLE(qp, ep0, ep1, next_field) do { \
-        MPIU_Assert (GENERIC_Q_EMPTY (*(qp)));                          \
+        MPIR_Assert (GENERIC_Q_EMPTY (*(qp)));                          \
         (qp)->head = ep0;                                               \
         (qp)->tail = ep1;                                               \
         (ep1)->next_field = NULL;                                       \
@@ -59,7 +59,7 @@
 
 
 #define GENERIC_Q_DEQUEUE(qp, epp, next_field) do {     \
-        MPIU_Assert (!GENERIC_Q_EMPTY (*(qp)));         \
+        MPIR_Assert (!GENERIC_Q_EMPTY (*(qp)));         \
         *(epp) = (qp)->head;                            \
         (qp)->head = (*(epp))->next_field;              \
         if ((qp)->head == NULL)                         \
@@ -68,7 +68,7 @@
 
 /* remove the elements from the top of the queue starting with ep0 through ep1 */
 #define GENERIC_Q_REMOVE_ELEMENTS(qp, ep0, ep1, next_field) do {        \
-        MPIU_Assert (GENERIC_Q_HEAD (*(qp)) == (ep0));                  \
+        MPIR_Assert (GENERIC_Q_HEAD (*(qp)) == (ep0));                  \
         (qp)->head = (ep1)->next_field;                                 \
         if ((qp)->head == NULL)                                         \
             (qp)->tail = NULL;                                          \
@@ -82,7 +82,7 @@
 #define GENERIC_L_HEAD(q) ((q).head)
 
 #define GENERIC_L_ADD_EMPTY(qp, ep, next_field, prev_field) do {        \
-        MPIU_Assert (GENERIC_L_EMPTY (*(qp)));                          \
+        MPIR_Assert (GENERIC_L_EMPTY (*(qp)));                          \
         (qp)->head = ep;                                                \
         (ep)->next_field = (ep)->prev_field = NULL;                     \
     } while (0)
@@ -100,7 +100,7 @@
     } while (0)
 
 #define GENERIC_L_REMOVE(qp, ep, next_field, prev_field) do {   \
-        MPIU_Assert (!GENERIC_L_EMPTY (*(qp)));                 \
+        MPIR_Assert (!GENERIC_L_EMPTY (*(qp)));                 \
         if ((ep)->prev_field)                                   \
             ((ep)->prev_field)->next_field = (ep)->next_field;  \
         else                                                    \
diff --git a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_send.c b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_send.c
index b0f1c20..135b69e 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_send.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_send.c
@@ -23,7 +23,7 @@ static struct {MPID_nem_tcp_send_q_element_t *top;} free_buffers = {0};
 #define ALLOC_Q_ELEMENT(e) do {                                                                                                         \
         if (S_EMPTY (free_buffers))                                                                                                     \
         {                                                                                                                               \
-            MPIU_CHKPMEM_MALLOC (*(e), MPID_nem_tcp_send_q_element_t *, sizeof(MPID_nem_tcp_send_q_element_t),      \
+            MPIR_CHKPMEM_MALLOC (*(e), MPID_nem_tcp_send_q_element_t *, sizeof(MPID_nem_tcp_send_q_element_t),      \
                                  mpi_errno, "send queue element");                                                                      \
         }                                                                                                                               \
         else                                                                                                                            \
@@ -44,22 +44,22 @@ int MPID_nem_tcp_send_init(void)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPIU_CHKPMEM_DECL (NUM_PREALLOC_SENDQ);
+    MPIR_CHKPMEM_DECL (NUM_PREALLOC_SENDQ);
     
     /* preallocate sendq elements */
     for (i = 0; i < NUM_PREALLOC_SENDQ; ++i)
     {
         MPID_nem_tcp_send_q_element_t *e;
         
-        MPIU_CHKPMEM_MALLOC (e, MPID_nem_tcp_send_q_element_t *,
+        MPIR_CHKPMEM_MALLOC (e, MPID_nem_tcp_send_q_element_t *,
                              sizeof(MPID_nem_tcp_send_q_element_t), mpi_errno, "send queue element");
         S_PUSH (&free_buffers, e);
     }
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     return mpi_errno;
 }
 
@@ -75,12 +75,12 @@ int MPID_nem_tcp_send_queued(MPIDI_VC_t *vc, MPIDI_nem_tcp_request_queue_t *send
     MPL_IOV *iov;
     int complete;
     MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(vc);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_SEND_QUEUED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_SEND_QUEUED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_SEND_QUEUED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_SEND_QUEUED);
 
     MPL_DBG_MSG_P(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "vc = %p", vc);
-    MPIU_Assert(vc != NULL);
+    MPIR_Assert(vc != NULL);
 
     if (MPIDI_CH3I_Sendq_empty(*send_queue))
 	goto fn_exit;
@@ -111,7 +111,7 @@ int MPID_nem_tcp_send_queued(MPIDI_VC_t *vc, MPIDI_nem_tcp_request_queue_t *send
                 break;
             } else {
                 int req_errno = MPI_SUCCESS;
-                MPIR_ERR_SET1(req_errno, MPI_ERR_OTHER, "**writev", "**writev %s", MPIU_Strerror(errno));
+                MPIR_ERR_SET1(req_errno, MPI_ERR_OTHER, "**writev", "**writev %s", MPIR_Strerror(errno));
                 MPIR_ERR_SET1(req_errno, MPIX_ERR_PROC_FAILED, "**comm_fail", "**comm_fail %d", vc->pg_rank);
                 mpi_errno = MPID_nem_tcp_cleanup_on_error(vc, req_errno);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -148,7 +148,7 @@ int MPID_nem_tcp_send_queued(MPIDI_VC_t *vc, MPIDI_nem_tcp_request_queue_t *send
             reqFn = sreq->dev.OnDataAvail;
             if (!reqFn)
             {
-                MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
+                MPIR_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
                 mpi_errno = MPID_Request_complete(sreq);
                 if (mpi_errno != MPI_SUCCESS) {
                     MPIR_ERR_POP(mpi_errno);
@@ -176,7 +176,7 @@ int MPID_nem_tcp_send_queued(MPIDI_VC_t *vc, MPIDI_nem_tcp_request_queue_t *send
         UNSET_PLFD(vc_tcp);
     
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_SEND_QUEUED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_SEND_QUEUED);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -209,9 +209,9 @@ int MPID_nem_tcp_conn_est (MPIDI_VC_t *vc)
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(vc);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CONN_EST);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CONN_EST);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_CONN_EST);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_CONN_EST);
 
     MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
 
@@ -223,7 +223,7 @@ int MPID_nem_tcp_conn_est (MPIDI_VC_t *vc)
     }
 
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_CONN_EST);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_CONN_EST);
     return mpi_errno;
 }
 
@@ -239,11 +239,11 @@ int MPID_nem_tcp_iStartContigMsg(MPIDI_VC_t *vc, void *hdr, intptr_t hdr_sz, voi
     intptr_t offset = 0;
     MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(vc);
     sockconn_t *sc = vc_tcp->sc;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_ISTARTCONTIGMSG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_ISTARTCONTIGMSG);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_ISTARTCONTIGMSG);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_ISTARTCONTIGMSG);
     
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
     
     MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "tcp_iStartContigMsg");
     MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)hdr);
@@ -276,7 +276,7 @@ int MPID_nem_tcp_iStartContigMsg(MPIDI_VC_t *vc, void *hdr, intptr_t hdr_sz, voi
                         offset = 0;
                     else {
                         int req_errno = MPI_SUCCESS;
-                        MPIR_ERR_SET1(req_errno, MPI_ERR_OTHER, "**writev", "**writev %s", MPIU_Strerror(errno));
+                        MPIR_ERR_SET1(req_errno, MPI_ERR_OTHER, "**writev", "**writev %s", MPIR_Strerror(errno));
                         MPIR_ERR_SET1(req_errno, MPIX_ERR_PROC_FAILED, "**comm_fail", "**comm_fail %d", vc->pg_rank);
                         mpi_errno = MPID_nem_tcp_cleanup_on_error(vc, req_errno);
                         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -307,8 +307,8 @@ int MPID_nem_tcp_iStartContigMsg(MPIDI_VC_t *vc, void *hdr, intptr_t hdr_sz, voi
 
     /* create a request */
     sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-    MPIU_Assert (sreq != NULL);
-    MPIU_Object_set_ref (sreq, 2);
+    MPIR_Assert (sreq != NULL);
+    MPIR_Object_set_ref (sreq, 2);
     sreq->kind = MPIR_REQUEST_KIND__SEND;
 
     sreq->dev.OnDataAvail = 0;
@@ -336,7 +336,7 @@ int MPID_nem_tcp_iStartContigMsg(MPIDI_VC_t *vc, void *hdr, intptr_t hdr_sz, voi
         sreq->dev.iov_count = 1;
     }
     
-    MPIU_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPL_IOV_LEN > 0);
+    MPIR_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPL_IOV_LEN > 0);
 
     if (MPID_nem_tcp_vc_send_paused(vc_tcp)) {
         MPIDI_CH3I_Sendq_enqueue(&vc_tcp->paused_send_queue, sreq);
@@ -360,7 +360,7 @@ int MPID_nem_tcp_iStartContigMsg(MPIDI_VC_t *vc, void *hdr, intptr_t hdr_sz, voi
     *sreq_ptr = sreq;
     
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_ISTARTCONTIGMSG);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_ISTARTCONTIGMSG);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -379,11 +379,11 @@ int MPID_nem_tcp_iStartContigMsg_paused(MPIDI_VC_t *vc, void *hdr, intptr_t hdr_
     intptr_t offset = 0;
     MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(vc);
     sockconn_t *sc = vc_tcp->sc;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_ISTARTCONTIGMSG_PAUSED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_ISTARTCONTIGMSG_PAUSED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_ISTARTCONTIGMSG_PAUSED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_ISTARTCONTIGMSG_PAUSED);
     
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
     
     MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "tcp_iStartContigMsg");
     MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)hdr);
@@ -415,7 +415,7 @@ int MPID_nem_tcp_iStartContigMsg_paused(MPIDI_VC_t *vc, void *hdr, intptr_t hdr_
                     offset = 0;
                 else {
                     int req_errno = MPI_SUCCESS;
-                    MPIR_ERR_SET1(req_errno, MPI_ERR_OTHER, "**writev", "**writev %s", MPIU_Strerror(errno));
+                    MPIR_ERR_SET1(req_errno, MPI_ERR_OTHER, "**writev", "**writev %s", MPIR_Strerror(errno));
                     MPIR_ERR_SET1(req_errno, MPIX_ERR_PROC_FAILED, "**comm_fail", "**comm_fail %d", vc->pg_rank);
 
                     mpi_errno = MPID_nem_tcp_cleanup_on_error(vc, req_errno);
@@ -446,8 +446,8 @@ int MPID_nem_tcp_iStartContigMsg_paused(MPIDI_VC_t *vc, void *hdr, intptr_t hdr_
 
     /* create a request */
     sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-    MPIU_Assert (sreq != NULL);
-    MPIU_Object_set_ref (sreq, 2);
+    MPIR_Assert (sreq != NULL);
+    MPIR_Object_set_ref (sreq, 2);
     sreq->kind = MPIR_REQUEST_KIND__SEND;
 
     sreq->dev.OnDataAvail = 0;
@@ -475,7 +475,7 @@ int MPID_nem_tcp_iStartContigMsg_paused(MPIDI_VC_t *vc, void *hdr, intptr_t hdr_
         sreq->dev.iov_count = 1;
     }
     
-    MPIU_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPL_IOV_LEN > 0);
+    MPIR_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPL_IOV_LEN > 0);
 
     if (MPID_nem_tcp_vc_is_connected(vc_tcp)) {
         if (MPIDI_CH3I_Sendq_empty(vc_tcp->send_queue)) {
@@ -495,7 +495,7 @@ int MPID_nem_tcp_iStartContigMsg_paused(MPIDI_VC_t *vc, void *hdr, intptr_t hdr_
     *sreq_ptr = sreq;
     
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_ISTARTCONTIGMSG_PAUSED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_ISTARTCONTIGMSG_PAUSED);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -512,11 +512,11 @@ int MPID_nem_tcp_iSendContig(MPIDI_VC_t *vc, MPIR_Request *sreq, void *hdr, intp
     intptr_t offset = 0;
     MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(vc);
     sockconn_t *sc = vc_tcp->sc;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_ISENDCONTIGMSG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_ISENDCONTIGMSG);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_ISENDCONTIGMSG);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_ISENDCONTIGMSG);
     
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
     
     MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "tcp_iSendContig");
 
@@ -560,7 +560,7 @@ int MPID_nem_tcp_iSendContig(MPIDI_VC_t *vc, MPIR_Request *sreq, void *hdr, intp
                         offset = 0;
                     else {
                         int req_errno = MPI_SUCCESS;
-                        MPIR_ERR_SET1(req_errno, MPI_ERR_OTHER, "**writev", "**writev %s", MPIU_Strerror(errno));
+                        MPIR_ERR_SET1(req_errno, MPI_ERR_OTHER, "**writev", "**writev %s", MPIR_Strerror(errno));
                         MPIR_ERR_SET1(req_errno, MPIX_ERR_PROC_FAILED, "**comm_fail", "**comm_fail %d", vc->pg_rank);
                         mpi_errno = MPID_nem_tcp_cleanup_on_error(vc, req_errno);
                         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -577,7 +577,7 @@ int MPID_nem_tcp_iSendContig(MPIDI_VC_t *vc, MPIR_Request *sreq, void *hdr, intp
                     reqFn = sreq->dev.OnDataAvail;
                     if (!reqFn)
                     {
-                        MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
+                        MPIR_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
                         mpi_errno = MPID_Request_complete(sreq);
                         if (mpi_errno != MPI_SUCCESS) {
                             MPIR_ERR_POP(mpi_errno);
@@ -638,7 +638,7 @@ int MPID_nem_tcp_iSendContig(MPIDI_VC_t *vc, MPIR_Request *sreq, void *hdr, intp
         }
     }
     else if (offset < sizeof(MPIDI_CH3_Pkt_t) + sreq->dev.ext_hdr_sz) {
-        MPIU_Assert(sreq->dev.ext_hdr_sz > 0);
+        MPIR_Assert(sreq->dev.ext_hdr_sz > 0);
         sreq->dev.iov[sreq->dev.iov_count].MPL_IOV_BUF = sreq->dev.ext_hdr_ptr;
         sreq->dev.iov[sreq->dev.iov_count].MPL_IOV_LEN = sreq->dev.ext_hdr_sz;
         sreq->dev.iov_count++;
@@ -659,7 +659,7 @@ int MPID_nem_tcp_iSendContig(MPIDI_VC_t *vc, MPIR_Request *sreq, void *hdr, intp
 enqueue_request:
     /* enqueue request */
     MPL_DBG_MSG (MPIDI_CH3_DBG_CHANNEL, VERBOSE, "enqueuing");
-    MPIU_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPL_IOV_LEN > 0);
+    MPIR_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPL_IOV_LEN > 0);
 
     sreq->ch.vc = vc;
     sreq->dev.iov_offset = 0;
@@ -684,7 +684,7 @@ enqueue_request:
     }
     
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_ISENDCONTIGMSG);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_ISENDCONTIGMSG);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -706,10 +706,10 @@ int MPID_nem_tcp_SendNoncontig(MPIDI_VC_t *vc, MPIR_Request *sreq, void *header,
     MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(vc);
     int iov_offset;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_SENDNONCONTIG);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_SENDNONCONTIG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_SENDNONCONTIG);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_SENDNONCONTIG);
     MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "tcp_SendNoncontig");
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
 
     iov_n = 0;
 
@@ -752,7 +752,7 @@ int MPID_nem_tcp_SendNoncontig(MPIDI_VC_t *vc, MPIR_Request *sreq, void *header,
                         offset = 0;
                     else {
                         int req_errno = MPI_SUCCESS;
-                        MPIR_ERR_SET1(req_errno, MPI_ERR_OTHER, "**writev", "**writev %s", MPIU_Strerror(errno));
+                        MPIR_ERR_SET1(req_errno, MPI_ERR_OTHER, "**writev", "**writev %s", MPIR_Strerror(errno));
                         MPIR_ERR_SET1(req_errno, MPIX_ERR_PROC_FAILED, "**comm_fail", "**comm_fail %d", vc->pg_rank);
                         mpi_errno = MPID_nem_tcp_cleanup_on_error(vc, req_errno);
                         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -826,7 +826,7 @@ int MPID_nem_tcp_SendNoncontig(MPIDI_VC_t *vc, MPIR_Request *sreq, void *header,
         
     /* enqueue request */
     MPL_DBG_MSG (MPIDI_CH3_DBG_CHANNEL, VERBOSE, "enqueuing");
-    MPIU_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPL_IOV_LEN > 0);
+    MPIR_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPL_IOV_LEN > 0);
         
     sreq->ch.vc = vc;
     sreq->dev.iov_offset = 0;
@@ -851,7 +851,7 @@ int MPID_nem_tcp_SendNoncontig(MPIDI_VC_t *vc, MPIR_Request *sreq, void *header,
     }
     
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_SENDNONCONTIG);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_SENDNONCONTIG);
     return mpi_errno;
 fn_fail:
     MPIR_Request_free(sreq);
@@ -867,9 +867,9 @@ int MPID_nem_tcp_error_out_send_queue(struct MPIDI_VC *const vc, int req_errno)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *req;
     MPID_nem_tcp_vc_area *const vc_tcp = VC_TCP(vc);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_ERROR_OUT_SEND_QUEUE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_ERROR_OUT_SEND_QUEUE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_ERROR_OUT_SEND_QUEUE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_ERROR_OUT_SEND_QUEUE);
 
     /* we don't call onDataAvail or onFinal handlers because this is
        an error condition and we just want to mark them as complete */
@@ -897,7 +897,7 @@ int MPID_nem_tcp_error_out_send_queue(struct MPIDI_VC *const vc, int req_errno)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_ERROR_OUT_SEND_QUEUE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_ERROR_OUT_SEND_QUEUE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_utility.c b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_utility.c
index b2f8573..861d291 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_utility.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_utility.c
@@ -37,9 +37,9 @@ int MPID_nem_tcp_get_vc_from_conninfo (char *pg_id, int pg_rank, struct MPIDI_VC
 {
     int mpi_errno = MPI_SUCCESS;
     MPIDI_PG_t *pg;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_GET_VC_FROM_CONNINFO);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_GET_VC_FROM_CONNINFO);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_GET_VC_FROM_CONNINFO);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_GET_VC_FROM_CONNINFO);
 
     MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "pg_id=%s pg_rank=%d", pg_id, pg_rank));
     
@@ -52,7 +52,7 @@ int MPID_nem_tcp_get_vc_from_conninfo (char *pg_id, int pg_rank, struct MPIDI_VC
     MPIDI_PG_Get_vc_set_active (pg, pg_rank, vc);
     
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_GET_VC_FROM_CONNINFO);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_GET_VC_FROM_CONNINFO);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -75,30 +75,30 @@ int MPID_nem_tcp_set_sockopts (int fd)
     option = 1;
     len = sizeof(int);
     ret = setsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &option, len);
-    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIU_Strerror (errno), errno);
+    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIR_Strerror (errno), errno);
     ret = getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &option, &len);
-    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIU_Strerror (errno), errno);
+    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIR_Strerror (errno), errno);
 
     option = 128*1024;
     len = sizeof(int);
     ret = setsockopt (fd, SOL_SOCKET, SO_RCVBUF, &option, len);
-    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIU_Strerror (errno), errno);
+    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIR_Strerror (errno), errno);
     ret = getsockopt (fd, SOL_SOCKET, SO_RCVBUF, &option, &len);
-    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIU_Strerror (errno), errno);
+    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIR_Strerror (errno), errno);
     ret = setsockopt (fd, SOL_SOCKET, SO_SNDBUF, &option, len);
-    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIU_Strerror (errno), errno);
+    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIR_Strerror (errno), errno);
     ret = getsockopt (fd, SOL_SOCKET, SO_SNDBUF, &option, &len);
-    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIU_Strerror (errno), errno);
+    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIR_Strerror (errno), errno);
     
     flags = fcntl(fd, F_GETFL, 0);
-    MPIR_ERR_CHKANDJUMP2 (flags == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIU_Strerror (errno), errno);
+    MPIR_ERR_CHKANDJUMP2 (flags == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIR_Strerror (errno), errno);
     ret = fcntl(fd, F_SETFL, flags | SO_REUSEADDR);
-    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIU_Strerror (errno), errno);
+    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIR_Strerror (errno), errno);
     
     flags = fcntl(fd, F_GETFL, 0);
-    MPIR_ERR_CHKANDJUMP2 (flags == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIU_Strerror (errno), errno);    
+    MPIR_ERR_CHKANDJUMP2 (flags == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIR_Strerror (errno), errno);
     ret = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
-    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIU_Strerror (errno), errno);    
+    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", MPIR_Strerror (errno), errno);
 
  fn_exit:
 /*     fprintf(stdout, FCNAME " Exit\n"); fflush(stdout); */
@@ -160,7 +160,7 @@ MPID_nem_tcp_check_sock_status(const struct pollfd *const plfd)
         if (getsockopt(plfd->fd, SOL_SOCKET, SO_ERROR, &error, &n) < 0 || error != 0) 
         {
             rc = MPID_NEM_TCP_SOCK_ERROR_EOF; /*  (N1) */
-            MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "getsockopt failure. error=%d:%s", error, MPIU_Strerror(error)));
+            MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "getsockopt failure. error=%d:%s", error, MPIR_Strerror(error)));
             goto fn_exit;
         }
         rc = MPID_NEM_TCP_SOCK_CONNECTED;
@@ -186,7 +186,7 @@ int MPID_nem_tcp_is_sock_connected(int fd)
     n = sizeof(error);
     if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &n) < 0 || error != 0) 
     {
-        MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "getsockopt failure. error=%d:%s", error, MPIU_Strerror(error)));
+        MPL_DBG_MSG_FMT(MPIDI_NEM_TCP_DBG_DET, VERBOSE, (MPL_DBG_FDEST, "getsockopt failure. error=%d:%s", error, MPIR_Strerror(error)));
         rc = FALSE; /*  error */
         goto fn_exit;
     }
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_finalize.c b/src/mpid/ch3/channels/nemesis/src/ch3_finalize.c
index 641d951..6931611 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_finalize.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_finalize.c
@@ -13,9 +13,9 @@
 int MPIDI_CH3_Finalize(void)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_FINALIZE);
 
     mpi_errno = MPIDI_CH3I_Progress_finalize();
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -24,6 +24,6 @@ int MPIDI_CH3_Finalize(void)
     if (mpi_errno) MPIR_ERR_POP (mpi_errno);
 
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_FINALIZE);
     return mpi_errno;
 }
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_init.c b/src/mpid/ch3/channels/nemesis/src/ch3_init.c
index 53fac87..5c986b4 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_init.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_init.c
@@ -78,9 +78,9 @@ int MPIDI_CH3_Init(int has_parent, MPIDI_PG_t *pg_p, int pg_rank)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_INIT);
 
     /* Override split_type */
     MPIR_Comm_fns = &comm_fns;
@@ -106,7 +106,7 @@ int MPIDI_CH3_Init(int has_parent, MPIDI_PG_t *pg_p, int pg_rank)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_INIT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -116,13 +116,13 @@ int MPIDI_CH3_Init(int has_parent, MPIDI_PG_t *pg_p, int pg_rank)
    MPI Port functions */
 int MPIDI_CH3_PortFnsInit( MPIDI_PortFns *portFns )
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PORTFNSINIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PORTFNSINIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PORTFNSINIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PORTFNSINIT);
 
     MPL_UNREFERENCED_ARG(portFns);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PORTFNSINIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PORTFNSINIT);
     return 0;
 }
 
@@ -133,15 +133,15 @@ int MPIDI_CH3_PortFnsInit( MPIDI_PortFns *portFns )
 int MPIDI_CH3_Get_business_card(int myRank, char *value, int length)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPIDI_STATE_MPIDI_CH3_GET_BUSINESS_CARD);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPIDI_STATE_MPIDI_CH3_GET_BUSINESS_CARD);
 
-    MPIDI_FUNC_ENTER(MPIDI_STATE_MPIDI_CH3_GET_BUSINESS_CARD);
+    MPIR_FUNC_VERBOSE_ENTER(MPIDI_STATE_MPIDI_CH3_GET_BUSINESS_CARD);
 
     mpi_errno = MPID_nem_get_business_card(myRank, value, length);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPIDI_STATE_MPIDI_CH3_GET_BUSINESS_CARD);
+    MPIR_FUNC_VERBOSE_EXIT(MPIDI_STATE_MPIDI_CH3_GET_BUSINESS_CARD);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -155,9 +155,9 @@ fn_fail:
 int MPIDI_CH3_VC_Init( MPIDI_VC_t *vc )
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_VC_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_VC_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_VC_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_VC_INIT);
 
     /* FIXME: Circular dependency.  Before calling MPIDI_CH3_Init,
        MPID_Init calls InitPG which calls MPIDI_PG_Create which calls
@@ -190,7 +190,7 @@ int MPIDI_CH3_VC_Init( MPIDI_VC_t *vc )
 
  fn_exit:
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_VC_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_VC_INIT);
     return mpi_errno;
 }
 
@@ -201,9 +201,9 @@ int MPIDI_CH3_VC_Init( MPIDI_VC_t *vc )
 int MPIDI_CH3_VC_Destroy(MPIDI_VC_t *vc )
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_VC_DESTROY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_VC_DESTROY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_VC_DESTROY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_VC_DESTROY);
 
     /* no need to destroy vc to self, this corresponds to the optimization above
      * in MPIDI_CH3_VC_Init */
@@ -215,7 +215,7 @@ int MPIDI_CH3_VC_Destroy(MPIDI_VC_t *vc )
     mpi_errno = MPID_nem_vc_destroy(vc);
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_VC_DESTROY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_VC_DESTROY);
     return mpi_errno;
 }
 
@@ -228,14 +228,14 @@ int MPIDI_CH3_Connect_to_root (const char *port_name, MPIDI_VC_t **new_vc)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIDI_VC_t * vc;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_CONNECT_TO_ROOT);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_CONNECT_TO_ROOT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_CONNECT_TO_ROOT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_CONNECT_TO_ROOT);
 
     *new_vc = NULL; /* so that the err handling knows to cleanup */
 
-    MPIU_CHKPMEM_MALLOC (vc, MPIDI_VC_t *, sizeof(MPIDI_VC_t), mpi_errno, "vc");
+    MPIR_CHKPMEM_MALLOC (vc, MPIDI_VC_t *, sizeof(MPIDI_VC_t), mpi_errno, "vc");
     /* FIXME - where does this vc get freed?
        ANSWER (goodell@) - ch3u_port.c FreeNewVC
                            (but the VC_Destroy is in this file) */
@@ -253,16 +253,16 @@ int MPIDI_CH3_Connect_to_root (const char *port_name, MPIDI_VC_t **new_vc)
     mpi_errno = MPID_nem_connect_to_root (port_name, vc);
     if (mpi_errno) MPIR_ERR_POP (mpi_errno);
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_CONNECT_TO_ROOT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_CONNECT_TO_ROOT);
     return mpi_errno;
  fn_fail:
     /* freeing without giving the lower layer a chance to cleanup can lead to
        leaks on error */
     if (*new_vc)
         MPIDI_CH3_VC_Destroy(*new_vc);
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -279,23 +279,23 @@ const char * MPIDI_CH3_VC_GetStateString( struct MPIDI_VC *vc )
 /* We don't initialize before calling MPIDI_CH3_VC_Init */
 int MPIDI_CH3_PG_Init(MPIDI_PG_t *pg_p)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PG_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PG_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PG_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PG_INIT);
     MPL_UNREFERENCED_ARG(pg_p);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PG_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PG_INIT);
     return MPI_SUCCESS;
 }
 
 int MPIDI_CH3_PG_Destroy(MPIDI_PG_t *pg_p)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PG_DESTROY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PG_DESTROY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PG_DESTROY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PG_DESTROY);
     MPL_UNREFERENCED_ARG(pg_p);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PG_DESTROY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PG_DESTROY);
     return MPI_SUCCESS;
 }
 
@@ -320,21 +320,21 @@ int MPID_nem_register_initcomp_cb(int (* callback)(void))
 {
     int mpi_errno = MPI_SUCCESS;
     initcomp_cb_t *ep;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_REGISTER_INITCOMP_CB);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_REGISTER_INITCOMP_CB);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_REGISTER_INITCOMP_CB);
-    MPIU_CHKPMEM_MALLOC(ep, initcomp_cb_t *, sizeof(*ep), mpi_errno, "initcomp callback element");
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_REGISTER_INITCOMP_CB);
+    MPIR_CHKPMEM_MALLOC(ep, initcomp_cb_t *, sizeof(*ep), mpi_errno, "initcomp callback element");
 
     ep->callback = callback;
     INITCOMP_S_PUSH(ep);
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_REGISTER_INITCOMP_CB);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_REGISTER_INITCOMP_CB);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -347,9 +347,9 @@ int MPIDI_CH3_InitCompleted(void)
     int mpi_errno = MPI_SUCCESS;
     initcomp_cb_t *ep;
     initcomp_cb_t *ep_tmp;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_INITCOMPLETED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_INITCOMPLETED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_INITCOMPLETED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_INITCOMPLETED);
     ep = INITCOMP_S_TOP();
     while (ep)
     {
@@ -361,7 +361,7 @@ int MPIDI_CH3_InitCompleted(void)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_INITCOMPLETED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_INITCOMPLETED);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_isend.c b/src/mpid/ch3/channels/nemesis/src/ch3_isend.c
index 5192ead..0e40fd5 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_isend.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_isend.c
@@ -20,9 +20,9 @@ int MPIDI_CH3_iSend (MPIDI_VC_t *vc, MPIR_Request *sreq, void * hdr, intptr_t hd
     int mpi_errno = MPI_SUCCESS;
     int again = 0;
     int in_cs = FALSE;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_ISEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_ISEND);
 
     if (vc->state == MPIDI_VC_STATE_MORIBUND) {
         sreq->status.MPI_ERROR = MPI_SUCCESS;
@@ -38,8 +38,8 @@ int MPIDI_CH3_iSend (MPIDI_VC_t *vc, MPIR_Request *sreq, void * hdr, intptr_t hd
         goto fn_exit;
     }
 
-    /* MPIU_Assert(vc->ch.is_local); */
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    /* MPIR_Assert(vc->ch.is_local); */
+    MPIR_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
 
     /* This channel uses a fixed length header, the size of which
      * is the maximum of all possible packet headers */
@@ -51,7 +51,7 @@ int MPIDI_CH3_iSend (MPIDI_VC_t *vc, MPIR_Request *sreq, void * hdr, intptr_t hd
 
     if (MPIDI_CH3I_Sendq_empty(MPIDI_CH3I_shm_sendq))
     {
-        MPIU_Assert(hdr_sz <= INT_MAX);
+        MPIR_Assert(hdr_sz <= INT_MAX);
 	MPL_DBG_MSG_D (MPIDI_CH3_DBG_CHANNEL, VERBOSE, "iSend %d", (int) hdr_sz);
 	mpi_errno = MPID_nem_mpich_send_header (hdr, (int)hdr_sz, vc, &again);
         if (mpi_errno) MPIR_ERR_POP (mpi_errno);
@@ -66,7 +66,7 @@ int MPIDI_CH3_iSend (MPIDI_VC_t *vc, MPIR_Request *sreq, void * hdr, intptr_t hd
             reqFn = sreq->dev.OnDataAvail;
             if (!reqFn)
             {
-                MPIU_Assert (MPIDI_Request_get_type (sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
+                MPIR_Assert (MPIDI_Request_get_type (sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
                 mpi_errno = MPID_Request_complete(sreq);
                 if (mpi_errno != MPI_SUCCESS) {
                     MPIR_ERR_POP(mpi_errno);
@@ -91,7 +91,7 @@ int MPIDI_CH3_iSend (MPIDI_VC_t *vc, MPIR_Request *sreq, void * hdr, intptr_t hd
         MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     }
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_ISEND);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_isendv.c b/src/mpid/ch3/channels/nemesis/src/ch3_isendv.c
index 3bc9762..d87365f 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_isendv.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_isendv.c
@@ -24,9 +24,9 @@ int MPIDI_CH3_iSendv (MPIDI_VC_t *vc, MPIR_Request *sreq, MPL_IOV *iov, int n_io
     int j;
     int in_cs = FALSE;
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISENDV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_ISENDV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISENDV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_ISENDV);
 
     if (vc->state == MPIDI_VC_STATE_MORIBUND) {
         sreq->status.MPI_ERROR = MPI_SUCCESS;
@@ -38,7 +38,7 @@ int MPIDI_CH3_iSendv (MPIDI_VC_t *vc, MPIR_Request *sreq, MPL_IOV *iov, int n_io
 
     if (vc_ch->iSendContig)
     {
-        MPIU_Assert(n_iov > 0);
+        MPIR_Assert(n_iov > 0);
         switch (n_iov)
         {
         case 1:
@@ -54,9 +54,9 @@ int MPIDI_CH3_iSendv (MPIDI_VC_t *vc, MPIR_Request *sreq, MPL_IOV *iov, int n_io
         goto fn_exit;
     }
 
-    /*MPIU_Assert(vc_ch->is_local); */
-    MPIU_Assert(n_iov <= MPL_IOV_LIMIT);
-    MPIU_Assert(iov[0].MPL_IOV_LEN <= sizeof(MPIDI_CH3_Pkt_t));
+    /*MPIR_Assert(vc_ch->is_local); */
+    MPIR_Assert(n_iov <= MPL_IOV_LIMIT);
+    MPIR_Assert(iov[0].MPL_IOV_LEN <= sizeof(MPIDI_CH3_Pkt_t));
 
     /* The channel uses a fixed length header, the size of which is
      * the maximum of all possible packet headers */
@@ -109,7 +109,7 @@ int MPIDI_CH3_iSendv (MPIDI_VC_t *vc, MPIR_Request *sreq, MPL_IOV *iov, int n_io
             sreq->ch.noncontig = FALSE;
 	    sreq->ch.vc = vc;
 	    MPIDI_CH3I_Sendq_enqueue(&MPIDI_CH3I_shm_sendq, sreq);
-	    MPIU_Assert (MPIDI_CH3I_shm_active_send == NULL);
+	    MPIR_Assert (MPIDI_CH3I_shm_active_send == NULL);
 
             if (remaining_iov != iov) {
                 /* headers are sent, mark current sreq as active_send req */
@@ -125,7 +125,7 @@ int MPIDI_CH3_iSendv (MPIDI_VC_t *vc, MPIR_Request *sreq, MPL_IOV *iov, int n_io
             reqFn = sreq->dev.OnDataAvail;
             if (!reqFn)
             {
-                MPIU_Assert (MPIDI_Request_get_type (sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
+                MPIR_Assert (MPIDI_Request_get_type (sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
                 mpi_errno = MPID_Request_complete (sreq);
                 if (mpi_errno != MPI_SUCCESS) {
                     MPIR_ERR_POP(mpi_errno);
@@ -145,7 +145,7 @@ int MPIDI_CH3_iSendv (MPIDI_VC_t *vc, MPIR_Request *sreq, MPL_IOV *iov, int n_io
                     sreq->ch.noncontig = FALSE;
                     sreq->ch.vc = vc;
                     MPIDI_CH3I_Sendq_enqueue(&MPIDI_CH3I_shm_sendq, sreq);
-                    MPIU_Assert (MPIDI_CH3I_shm_active_send == NULL);
+                    MPIR_Assert (MPIDI_CH3I_shm_active_send == NULL);
                     MPIDI_CH3I_shm_active_send = sreq;
                     MPL_DBG_MSG (MPIDI_CH3_DBG_CHANNEL, VERBOSE, ".... reloaded and enqueued");
                 }
@@ -188,7 +188,7 @@ int MPIDI_CH3_iSendv (MPIDI_VC_t *vc, MPIR_Request *sreq, MPL_IOV *iov, int n_io
         MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     }
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISENDV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_ISENDV);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_istartmsg.c b/src/mpid/ch3/channels/nemesis/src/ch3_istartmsg.c
index 810f8a5..40350cd 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_istartmsg.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_istartmsg.c
@@ -29,9 +29,9 @@ int MPIDI_CH3_iStartMsg (MPIDI_VC_t *vc, void *hdr, intptr_t hdr_sz, MPIR_Reques
     int mpi_errno = MPI_SUCCESS;
     int again = 0;
     int in_cs = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISTARTMSG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_ISTARTMSG);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISTARTMSG);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_ISTARTMSG);
 
     MPIR_ERR_CHKANDJUMP1(vc->state == MPIDI_VC_STATE_MORIBUND, mpi_errno, MPIX_ERR_PROC_FAILED, "**comm_fail", "**comm_fail %d", vc->pg_rank);
 
@@ -41,8 +41,8 @@ int MPIDI_CH3_iStartMsg (MPIDI_VC_t *vc, void *hdr, intptr_t hdr_sz, MPIR_Reques
         goto fn_exit;
     }
 
-    /*MPIU_Assert(vc->ch.is_local);*/
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    /*MPIR_Assert(vc->ch.is_local);*/
+    MPIR_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
 
     /* This channel uses a fixed length header, the size of which is
      * the maximum of all possible packet headers */
@@ -55,7 +55,7 @@ int MPIDI_CH3_iStartMsg (MPIDI_VC_t *vc, void *hdr, intptr_t hdr_sz, MPIR_Reques
     if (MPIDI_CH3I_Sendq_empty(MPIDI_CH3I_shm_sendq))
        /* MT */
     {
-        MPIU_Assert(hdr_sz <= INT_MAX);
+        MPIR_Assert(hdr_sz <= INT_MAX);
 	MPL_DBG_MSG_D (MPIDI_CH3_DBG_CHANNEL, VERBOSE, "iStartMsg %d", (int) hdr_sz);
 	mpi_errno = MPID_nem_mpich_send_header (hdr, (int)hdr_sz, vc, &again);
         if (mpi_errno) MPIR_ERR_POP (mpi_errno);
@@ -77,7 +77,7 @@ int MPIDI_CH3_iStartMsg (MPIDI_VC_t *vc, void *hdr, intptr_t hdr_sz, MPIR_Reques
     if (in_cs) {
         MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     }
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISTARTMSG);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_ISTARTMSG);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -90,8 +90,8 @@ int MPIDI_CH3_iStartMsg (MPIDI_VC_t *vc, void *hdr, intptr_t hdr_sz, MPIR_Reques
 
 	/* create a request */
 	sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-	MPIU_Assert (sreq != NULL);
-	MPIU_Object_set_ref (sreq, 2);
+	MPIR_Assert (sreq != NULL);
+	MPIR_Object_set_ref (sreq, 2);
 	sreq->kind = MPIR_REQUEST_KIND__SEND;
 
 	sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) hdr;
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_istartmsgv.c b/src/mpid/ch3/channels/nemesis/src/ch3_istartmsgv.c
index ea5942b..a21edd2 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_istartmsgv.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_istartmsgv.c
@@ -37,15 +37,15 @@ int MPIDI_CH3_iStartMsgv (MPIDI_VC_t *vc, MPL_IOV *iov, int n_iov, MPIR_Request
     int in_cs = FALSE;
     int again = 0;
     int j;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISTARTMSGV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_ISTARTMSGV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISTARTMSGV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_ISTARTMSGV);
 
     MPIR_ERR_CHKANDJUMP1(vc->state == MPIDI_VC_STATE_MORIBUND, mpi_errno, MPIX_ERR_PROC_FAILED, "**comm_fail", "**comm_fail %d", vc->pg_rank);
 
     if (vc->ch.iStartContigMsg)
     {
-        MPIU_Assert (n_iov > 0);
+        MPIR_Assert (n_iov > 0);
         switch (n_iov)
         {
         case 1:
@@ -63,8 +63,8 @@ int MPIDI_CH3_iStartMsgv (MPIDI_VC_t *vc, MPL_IOV *iov, int n_iov, MPIR_Request
         goto fn_exit;
     }
 
-    MPIU_Assert (n_iov <= MPL_IOV_LIMIT);
-    MPIU_Assert (iov[0].MPL_IOV_LEN <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert (n_iov <= MPL_IOV_LIMIT);
+    MPIR_Assert (iov[0].MPL_IOV_LEN <= sizeof(MPIDI_CH3_Pkt_t));
 
     /* The channel uses a fixed length header, the size of which is
      * the maximum of all possible packet headers */
@@ -118,8 +118,8 @@ int MPIDI_CH3_iStartMsgv (MPIDI_VC_t *vc, MPL_IOV *iov, int n_iov, MPIR_Request
             /* Create a new request and save remaining portions of the
 	     * iov in it. */
             sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-	    MPIU_Assert(sreq != NULL);
-	    MPIU_Object_set_ref(sreq, 2);
+	    MPIR_Assert(sreq != NULL);
+	    MPIR_Object_set_ref(sreq, 2);
 	    sreq->kind = MPIR_REQUEST_KIND__SEND;
 	    for (j = 0; j < remaining_n_iov; ++j)
 	    {
@@ -138,7 +138,7 @@ int MPIDI_CH3_iStartMsgv (MPIDI_VC_t *vc, MPL_IOV *iov, int n_iov, MPIR_Request
 		sreq->dev.iov[0].MPL_IOV_LEN = iov[0].MPL_IOV_LEN;
 	    }
 	    MPIDI_CH3I_Sendq_enqueue(&MPIDI_CH3I_shm_sendq, sreq);
-	    MPIU_Assert (MPIDI_CH3I_shm_active_send == NULL);
+	    MPIR_Assert (MPIDI_CH3I_shm_active_send == NULL);
 	    MPIDI_CH3I_shm_active_send = sreq;
 	}
     }
@@ -149,8 +149,8 @@ int MPIDI_CH3_iStartMsgv (MPIDI_VC_t *vc, MPL_IOV *iov, int n_iov, MPIR_Request
 	MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, TERSE, "request enqueued");
 	/* create a request */
 	sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-	MPIU_Assert(sreq != NULL);
-	MPIU_Object_set_ref(sreq, 2);
+	MPIR_Assert(sreq != NULL);
+	MPIR_Object_set_ref(sreq, 2);
 	sreq->kind = MPIR_REQUEST_KIND__SEND;
 
 	sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) iov[0].MPL_IOV_BUF;
@@ -186,7 +186,7 @@ int MPIDI_CH3_iStartMsgv (MPIDI_VC_t *vc, MPL_IOV *iov, int n_iov, MPIR_Request
     if (in_cs) {
         MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     }
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISTARTMSGV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_ISTARTMSGV);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_progress.c b/src/mpid/ch3/channels/nemesis/src/ch3_progress.c
index f38211b..35b03b3 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_progress.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_progress.c
@@ -112,9 +112,9 @@ static void sigusr1_handler(int sig)
 static int check_terminating_vcs(void)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_CHECK_TERMINATING_VCS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CHECK_TERMINATING_VCS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CHECK_TERMINATING_VCS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CHECK_TERMINATING_VCS);
 
     while (!TERMQ_EMPTY() && MPIR_Request_is_complete(TERMQ_HEAD()->req)) {
         vc_term_element_t *ep;
@@ -126,7 +126,7 @@ static int check_terminating_vcs(void)
     }
     
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_CHECK_TERMINATING_VCS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CHECK_TERMINATING_VCS);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -148,9 +148,9 @@ int MPIDI_CH3I_Shm_send_progress(void)
     MPIR_Request *sreq;
     int again = 0;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_SEND_PROGRESS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_SEND_PROGRESS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SHM_SEND_PROGRESS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_SHM_SEND_PROGRESS);
 
     sreq = MPIDI_CH3I_shm_active_send;
     MPL_DBG_STMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE, {if (sreq) MPL_DBG_MSG (MPIDI_CH3_DBG_CHANNEL, VERBOSE, "Send: cont sreq");});
@@ -158,7 +158,7 @@ int MPIDI_CH3I_Shm_send_progress(void)
     {
         if (!sreq->ch.noncontig)
         {
-            MPIU_Assert(sreq->dev.iov_count > 0 && sreq->dev.iov[sreq->dev.iov_offset].MPL_IOV_LEN > 0);
+            MPIR_Assert(sreq->dev.iov_count > 0 && sreq->dev.iov[sreq->dev.iov_offset].MPL_IOV_LEN > 0);
 
             iov = &sreq->dev.iov[sreq->dev.iov_offset];
             n_iov = sreq->dev.iov_count;
@@ -199,7 +199,7 @@ int MPIDI_CH3I_Shm_send_progress(void)
 
         if (!sreq->ch.noncontig)
         {
-            MPIU_Assert(sreq->dev.iov_count > 0 && sreq->dev.iov[sreq->dev.iov_offset].MPL_IOV_LEN > 0);
+            MPIR_Assert(sreq->dev.iov_count > 0 && sreq->dev.iov[sreq->dev.iov_offset].MPL_IOV_LEN > 0);
 
             iov = &sreq->dev.iov[sreq->dev.iov_offset];
             n_iov = sreq->dev.iov_count;
@@ -247,14 +247,14 @@ int MPIDI_CH3I_Shm_send_progress(void)
     }
 
     /* finished sending sreq */
-    MPIU_Assert(!again);
+    MPIR_Assert(!again);
 
     if (!sreq->dev.OnDataAvail)
     {
         /* MT FIXME-N1 race under per-object, harmless to disable here but
          * its a symptom of a bigger problem... */
-#if !(defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT))
-        MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
+#if !(defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ))
+        MPIR_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
 #endif
 
         mpi_errno = MPID_Request_complete(sreq);
@@ -286,7 +286,7 @@ int MPIDI_CH3I_Shm_send_progress(void)
     }
         
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SHM_SEND_PROGRESS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_SHM_SEND_PROGRESS);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -300,9 +300,9 @@ int MPIDI_CH3I_Progress_register_hook(int (*progress_fn)(int*), int *id)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_REGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_REGISTER_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_REGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_REGISTER_HOOK);
     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 
     for (i = 0; i < MAX_PROGRESS_HOOKS; i++) {
@@ -323,7 +323,7 @@ int MPIDI_CH3I_Progress_register_hook(int (*progress_fn)(int*), int *id)
 
   fn_exit:
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_REGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_REGISTER_HOOK);
     return mpi_errno;
 
   fn_fail:
@@ -337,19 +337,19 @@ int MPIDI_CH3I_Progress_register_hook(int (*progress_fn)(int*), int *id)
 int MPIDI_CH3I_Progress_deregister_hook(int id)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_DEREGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_DEREGISTER_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_DEREGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_DEREGISTER_HOOK);
     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 
-    MPIU_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS && progress_hooks[id].func_ptr != NULL);
+    MPIR_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS && progress_hooks[id].func_ptr != NULL);
 
     progress_hooks[id].func_ptr = NULL;
     progress_hooks[id].active = FALSE;
 
   fn_exit:
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_DEREGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_DEREGISTER_HOOK);
     return mpi_errno;
 
   fn_fail:
@@ -364,18 +364,18 @@ int MPIDI_CH3I_Progress_deregister_hook(int id)
 int MPIDI_CH3I_Progress_activate_hook(int id)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 
-    MPIU_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS &&
+    MPIR_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS &&
                 progress_hooks[id].active == FALSE && progress_hooks[id].func_ptr != NULL);
     progress_hooks[id].active = TRUE;
 
   fn_exit:
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
     return mpi_errno;
 
   fn_fail:
@@ -390,18 +390,18 @@ int MPIDI_CH3I_Progress_activate_hook(int id)
 int MPIDI_CH3I_Progress_deactivate_hook(int id)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 
-    MPIU_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS &&
+    MPIR_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS &&
                 progress_hooks[id].active == TRUE && progress_hooks[id].func_ptr != NULL);
     progress_hooks[id].active = FALSE;
 
   fn_exit:
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
     return mpi_errno;
 
   fn_fail:
@@ -419,15 +419,15 @@ int MPIDI_CH3I_Progress (MPID_Progress_state *progress_state, int is_blocking)
 {
     int mpi_errno = MPI_SUCCESS;
     int made_progress = FALSE;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS);
 
     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 
     /* sanity: if this doesn't hold, we can't track our local view of completion safely */
     if (is_blocking) {
-        MPIU_Assert(progress_state != NULL);
+        MPIR_Assert(progress_state != NULL);
     }
 
     if (sigusr1_count > my_sigusr1_count) {
@@ -518,7 +518,7 @@ int MPIDI_CH3I_Progress (MPID_Progress_state *progress_state, int is_blocking)
                 MPIDI_CH3_Pkt_t *pkt         = (MPIDI_CH3_Pkt_t *)cell_buf;
 
                 /* Empty packets are not allowed */
-                MPIU_Assert(payload_len >= 0);
+                MPIR_Assert(payload_len >= 0);
 
                 if (in_fbox)
                 {
@@ -527,16 +527,16 @@ int MPIDI_CH3I_Progress (MPID_Progress_state *progress_state, int is_blocking)
 
                     /* This packet must be the first packet of a new message */
                     MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "Recv pkt from fbox");
-                    MPIU_Assert(payload_len >= sizeof (MPIDI_CH3_Pkt_t));
+                    MPIR_Assert(payload_len >= sizeof (MPIDI_CH3_Pkt_t));
 
                     MPIDI_PG_Get_vc_set_active(MPIDI_Process.my_pg, MPID_NEM_FBOX_SOURCE(cell), &vc);
 		   
-		    MPIU_Assert(vc->ch.recv_active == NULL &&
+		    MPIR_Assert(vc->ch.recv_active == NULL &&
                                 vc->ch.pending_pkt_len == 0);
                     vc_ch = &vc->ch;
 
                     /* invalid pkt data will result in unpredictable behavior */
-                    MPIU_Assert(pkt->type >= 0 && pkt->type < MPIDI_CH3_PKT_END_ALL);
+                    MPIR_Assert(pkt->type >= 0 && pkt->type < MPIDI_CH3_PKT_END_ALL);
 
                     mpi_errno = pktArray[pkt->type](vc, pkt, &buflen, &rreq);
                     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -557,7 +557,7 @@ int MPIDI_CH3I_Progress (MPID_Progress_state *progress_state, int is_blocking)
                     MPID_nem_mpich_release_fbox(cell);
 
                     /* the whole message should have been handled */
-                    MPIU_Assert(!vc_ch->recv_active);
+                    MPIR_Assert(!vc_ch->recv_active);
 
                     break; /* break out of recv progress block */
                 }
@@ -593,7 +593,7 @@ int MPIDI_CH3I_Progress (MPID_Progress_state *progress_state, int is_blocking)
 
         for (i = 0; i < MAX_PROGRESS_HOOKS; i++) {
             if (progress_hooks[i].active == TRUE) {
-                MPIU_Assert(progress_hooks[i].func_ptr != NULL);
+                MPIR_Assert(progress_hooks[i].func_ptr != NULL);
                 mpi_errno = progress_hooks[i].func_ptr(&made_progress);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                 if (made_progress)
@@ -656,7 +656,7 @@ int MPIDI_CH3I_Progress (MPID_Progress_state *progress_state, int is_blocking)
 
  fn_exit:
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -672,17 +672,17 @@ int MPIDI_CH3I_Progress (MPID_Progress_state *progress_state, int is_blocking)
 static int MPIDI_CH3I_Progress_delay(unsigned int completion_count)
 {
     int mpi_errno = MPI_SUCCESS, err;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_DELAY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_DELAY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_DELAY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_DELAY);
     /* FIXME should be appropriately abstracted somehow */
-#   if defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL)
+#   if defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL)
     {
         MPID_Thread_cond_wait(&MPIDI_CH3I_progress_completion_cond, &MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX/*MPIDCOMM*/, &err);
     }
 #   endif
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_DELAY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_DELAY);
     return mpi_errno;
 }
 /* end MPIDI_CH3I_Progress_delay() */
@@ -695,18 +695,18 @@ static int MPIDI_CH3I_Progress_delay(unsigned int completion_count)
 static int MPIDI_CH3I_Progress_continue(unsigned int completion_count/*unused*/)
 {
     int mpi_errno = MPI_SUCCESS,err;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_CONTINUE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_CONTINUE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_CONTINUE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_CONTINUE);
     /* FIXME should be appropriately abstracted somehow */
-#   if defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL)
+#   if defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL)
     {
         /* we currently hold the MPIDCOMM CS */
         MPID_Thread_cond_broadcast(&MPIDI_CH3I_progress_completion_cond, &err);
     }
 #   endif
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_CONTINUE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_CONTINUE);
     return mpi_errno;
 }
 /* end MPIDI_CH3I_Progress_continue() */
@@ -718,12 +718,12 @@ static int MPIDI_CH3I_Progress_continue(unsigned int completion_count/*unused*/)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 void MPIDI_CH3I_Progress_wakeup(void)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_WAKEUP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_WAKEUP);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_WAKEUP);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_WAKEUP);
 
     /* no processes sleep in nemesis progress */
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_WAKEUP);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_WAKEUP);
     return;
 }
 #endif /* MPICH_IS_THREADED */
@@ -738,9 +738,9 @@ int MPID_nem_handle_pkt(MPIDI_VC_t *vc, char *buf, intptr_t buflen)
     MPIR_Request *rreq = NULL;
     int complete;
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_HANDLE_PKT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_HANDLE_PKT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_HANDLE_PKT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_HANDLE_PKT);
 
     do
     {
@@ -755,7 +755,7 @@ int MPID_nem_handle_pkt(MPIDI_VC_t *vc, char *buf, intptr_t buflen)
                 MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "received new message");
 
                 /* invalid pkt data will result in unpredictable behavior */
-                MPIU_Assert(pkt->type >= 0 && pkt->type < MPIDI_CH3_PKT_END_ALL);
+                MPIR_Assert(pkt->type >= 0 && pkt->type < MPIDI_CH3_PKT_END_ALL);
 
                 mpi_errno = pktArray[pkt->type](vc, pkt, &len, &rreq);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -774,7 +774,7 @@ int MPID_nem_handle_pkt(MPIDI_VC_t *vc, char *buf, intptr_t buflen)
         }
         else if (vc_ch->recv_active)
         {
-            MPIU_Assert(vc_ch->pending_pkt_len == 0);
+            MPIR_Assert(vc_ch->pending_pkt_len == 0);
             MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "continuing recv");
             rreq = vc_ch->recv_active;
         }
@@ -790,25 +790,25 @@ int MPID_nem_handle_pkt(MPIDI_VC_t *vc, char *buf, intptr_t buflen)
             copylen = ((vc_ch->pending_pkt_len + buflen <= sizeof(MPIDI_CH3_Pkt_t))
                        ? buflen
                        : sizeof(MPIDI_CH3_Pkt_t) - vc_ch->pending_pkt_len);
-            MPIU_Memcpy((char *)vc_ch->pending_pkt + vc_ch->pending_pkt_len, buf, copylen);
+            MPIR_Memcpy((char *)vc_ch->pending_pkt + vc_ch->pending_pkt_len, buf, copylen);
             vc_ch->pending_pkt_len += copylen;
             if (vc_ch->pending_pkt_len < sizeof(MPIDI_CH3_Pkt_t))
                 goto fn_exit;
 
             /* we have a whole header */
             MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "    completed header");
-            MPIU_Assert(vc_ch->pending_pkt_len == sizeof(MPIDI_CH3_Pkt_t));
+            MPIR_Assert(vc_ch->pending_pkt_len == sizeof(MPIDI_CH3_Pkt_t));
 
             buflen -= copylen;
             buf    += copylen;
 
             /* invalid pkt data will result in unpredictable behavior */
-            MPIU_Assert(pkt->type >= 0 && pkt->type < MPIDI_CH3_PKT_END_ALL);
+            MPIR_Assert(pkt->type >= 0 && pkt->type < MPIDI_CH3_PKT_END_ALL);
 
             pktlen = sizeof(MPIDI_CH3_Pkt_t);
             mpi_errno = pktArray[pkt->type](vc, pkt, &pktlen, &rreq);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPIU_Assert(pktlen == sizeof(MPIDI_CH3_Pkt_t));
+            MPIR_Assert(pktlen == sizeof(MPIDI_CH3_Pkt_t));
 
             vc_ch->pending_pkt_len = 0;
 
@@ -822,8 +822,8 @@ int MPID_nem_handle_pkt(MPIDI_VC_t *vc, char *buf, intptr_t buflen)
         }
 
         /* copy data into user buffer described by iov in rreq */
-        MPIU_Assert(rreq);
-        MPIU_Assert(rreq->dev.iov_count > 0 && rreq->dev.iov[rreq->dev.iov_offset].MPL_IOV_LEN > 0);
+        MPIR_Assert(rreq);
+        MPIR_Assert(rreq->dev.iov_count > 0 && rreq->dev.iov[rreq->dev.iov_offset].MPL_IOV_LEN > 0);
 
         MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "    copying into user buffer from IOV");
 
@@ -848,7 +848,7 @@ int MPID_nem_handle_pkt(MPIDI_VC_t *vc, char *buf, intptr_t buflen)
                 size_t iov_len = iov->MPL_IOV_LEN;
 		MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "        %d", (int)iov_len);
                 if (rreq->dev.drop_data == FALSE) {
-                    MPIU_Memcpy (iov->MPL_IOV_BUF, buf, iov_len);
+                    MPIR_Memcpy (iov->MPL_IOV_BUF, buf, iov_len);
                 }
 
                 buflen -= iov_len;
@@ -863,7 +863,7 @@ int MPID_nem_handle_pkt(MPIDI_VC_t *vc, char *buf, intptr_t buflen)
                 {
 		    MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "        %" PRIdPTR, buflen);
                     if (rreq->dev.drop_data == FALSE) {
-                        MPIU_Memcpy (iov->MPL_IOV_BUF, buf, buflen);
+                        MPIR_Memcpy (iov->MPL_IOV_BUF, buf, buflen);
                     }
                     iov->MPL_IOV_BUF = (void *)((char *)iov->MPL_IOV_BUF + buflen);
                     iov->MPL_IOV_LEN -= buflen;
@@ -883,8 +883,8 @@ int MPID_nem_handle_pkt(MPIDI_VC_t *vc, char *buf, intptr_t buflen)
                 if (!reqFn)
                 {
                     /* MT FIXME-N1 */
-#if !(defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT))
-                    MPIU_Assert(MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_GET_RESP);
+#if !(defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ))
+                    MPIR_Assert(MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_GET_RESP);
 #endif
                     mpi_errno = MPID_Request_complete(rreq);
                     if (mpi_errno != MPI_SUCCESS) {
@@ -902,7 +902,7 @@ int MPID_nem_handle_pkt(MPIDI_VC_t *vc, char *buf, intptr_t buflen)
                 if (!complete)
                 {
                     rreq->dev.iov_offset = 0;
-                    MPIU_Assert(rreq->dev.iov_count > 0 && rreq->dev.iov[rreq->dev.iov_offset].MPL_IOV_LEN > 0);
+                    MPIR_Assert(rreq->dev.iov_count > 0 && rreq->dev.iov[rreq->dev.iov_offset].MPL_IOV_LEN > 0);
                     vc_ch->recv_active = rreq;
                     MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "...not complete");
                 }
@@ -917,7 +917,7 @@ int MPID_nem_handle_pkt(MPIDI_VC_t *vc, char *buf, intptr_t buflen)
     while (buflen);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_HANDLE_PKT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_HANDLE_PKT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -943,17 +943,17 @@ int MPIDI_CH3I_Progress_init(void)
 {
     int i;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_INIT);
 
     MPIR_THREAD_CHECK_BEGIN;
     /* FIXME should be appropriately abstracted somehow */
-#   if defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL)
+#   if defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL)
     {
         int err;
 	MPID_Thread_cond_create(&MPIDI_CH3I_progress_completion_cond, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
     }
 #   endif
     MPIR_THREAD_CHECK_END;
@@ -985,7 +985,7 @@ int MPIDI_CH3I_Progress_init(void)
 #ifdef HAVE_SIGNAL
     /* install signal handler for process failure notifications from hydra */
     prev_sighandler = signal(SIGUSR1, sigusr1_handler);
-    MPIR_ERR_CHKANDJUMP1(prev_sighandler == SIG_ERR, mpi_errno, MPI_ERR_OTHER, "**signal", "**signal %s", MPIU_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP1(prev_sighandler == SIG_ERR, mpi_errno, MPI_ERR_OTHER, "**signal", "**signal %s", MPIR_Strerror(errno));
     if (prev_sighandler == SIG_IGN || prev_sighandler == SIG_DFL)
         prev_sighandler = NULL;
 #endif
@@ -997,7 +997,7 @@ int MPIDI_CH3I_Progress_init(void)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_INIT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -1011,9 +1011,9 @@ int MPIDI_CH3I_Progress_finalize(void)
 {
     int mpi_errno = MPI_SUCCESS;
     qn_ent_t *ent;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_FINALIZE);
 
     while(qn_head) {
         ent = qn_head->next;
@@ -1022,7 +1022,7 @@ int MPIDI_CH3I_Progress_finalize(void)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_FINALIZE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -1036,9 +1036,9 @@ static int shm_connection_terminated(MPIDI_VC_t * vc)
 {
     /* This function is called after all sends have completed */
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_SHM_CONNECTION_TERMINATED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SHM_CONNECTION_TERMINATED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_SHM_CONNECTION_TERMINATED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_SHM_CONNECTION_TERMINATED);
 
     if (vc->ch.lmt_vc_terminated) {
         mpi_errno = vc->ch.lmt_vc_terminated(vc);
@@ -1055,7 +1055,7 @@ static int shm_connection_terminated(MPIDI_VC_t * vc)
 
     MPL_DBG_MSG_D(MPIDI_CH3_DBG_DISCONNECT, TYPICAL, "Terminated VC %d", vc->pg_rank);
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_SHM_CONNECTION_TERMINATED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_SHM_CONNECTION_TERMINATED);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -1069,10 +1069,10 @@ static int shm_connection_terminated(MPIDI_VC_t * vc)
 int MPIDI_CH3_Connection_terminate(MPIDI_VC_t * vc)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_CONNECTION_TERMINATE);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_CONNECTION_TERMINATE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_CONNECTION_TERMINATE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_CONNECTION_TERMINATE);
 
     MPL_DBG_MSG_D(MPIDI_CH3_DBG_DISCONNECT, TYPICAL, "Terminating VC %d", vc->pg_rank);
 
@@ -1115,7 +1115,7 @@ int MPIDI_CH3_Connection_terminate(MPIDI_VC_t * vc)
                    have completed.  */
                 vc_term_element_t *ep;
                 MPL_DBG_MSG(MPIDI_CH3_DBG_DISCONNECT, TYPICAL, "Shm send queue not empty, waiting to terminate");
-                MPIU_CHKPMEM_MALLOC(ep, vc_term_element_t *, sizeof(vc_term_element_t), mpi_errno, "vc_term_element");
+                MPIR_CHKPMEM_MALLOC(ep, vc_term_element_t *, sizeof(vc_term_element_t), mpi_errno, "vc_term_element");
                 ep->vc = vc;
                 ep->req = MPIDI_CH3I_shm_sendq.tail;
                 MPIR_Request_add_ref(ep->req); /* make sure this doesn't get released before we can check it */
@@ -1130,11 +1130,11 @@ int MPIDI_CH3_Connection_terminate(MPIDI_VC_t * vc)
     }
     
  fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_CONNECTION_TERMINATE);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_CONNECTION_TERMINATE);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 /* end MPIDI_CH3_Connection_terminate() */
@@ -1147,9 +1147,9 @@ int MPIDI_CH3I_Complete_sendq_with_error(MPIDI_VC_t * vc)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *req, *prev;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_COMPLETE_SENDQ_WITH_ERROR);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_COMPLETE_SENDQ_WITH_ERROR);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_COMPLETE_SENDQ_WITH_ERROR);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_COMPLETE_SENDQ_WITH_ERROR);
 
     req = MPIDI_CH3I_shm_sendq.head;
     prev = NULL;
@@ -1179,7 +1179,7 @@ int MPIDI_CH3I_Complete_sendq_with_error(MPIDI_VC_t * vc)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_COMPLETE_SENDQ_WITH_ERROR);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_COMPLETE_SENDQ_WITH_ERROR);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -1196,16 +1196,16 @@ static int pkt_NETMOD_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *bu
     int mpi_errno = MPI_SUCCESS;
     MPID_nem_pkt_netmod_t * const netmod_pkt = (MPID_nem_pkt_netmod_t *)pkt;
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
-    MPIDI_STATE_DECL(MPID_STATE_PKT_NETMOD_HANDLER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_PKT_NETMOD_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_PKT_NETMOD_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_PKT_NETMOD_HANDLER);
 
-    MPIU_Assert_fmt_msg(vc_ch->pkt_handler && netmod_pkt->subtype < vc_ch->num_pkt_handlers, ("no handler defined for netmod-local packet"));
+    MPIR_Assert_fmt_msg(vc_ch->pkt_handler && netmod_pkt->subtype < vc_ch->num_pkt_handlers, ("no handler defined for netmod-local packet"));
 
     mpi_errno = vc_ch->pkt_handler[netmod_pkt->subtype](vc, pkt, buflen, rreqp);
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_PKT_NETMOD_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_PKT_NETMOD_HANDLER);
     return mpi_errno;
 }
 
@@ -1218,9 +1218,9 @@ int MPIDI_CH3I_Register_anysource_notification(void (*enqueue_fn)(MPIR_Request *
 {
     int mpi_errno = MPI_SUCCESS;
     qn_ent_t *ent;
-    MPIU_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_DECL(1);
 
-    MPIU_CHKPMEM_MALLOC(ent, qn_ent_t *, sizeof(qn_ent_t), mpi_errno, "queue entry");
+    MPIR_CHKPMEM_MALLOC(ent, qn_ent_t *, sizeof(qn_ent_t), mpi_errno, "queue entry");
 
     ent->enqueue_fn = enqueue_fn;
     ent->dequeue_fn = dequeue_fn;
@@ -1228,10 +1228,10 @@ int MPIDI_CH3I_Register_anysource_notification(void (*enqueue_fn)(MPIR_Request *
     qn_head = ent;
 
  fn_exit:
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -1272,10 +1272,10 @@ static int anysource_matched(MPIR_Request *rreq)
             m = ent->dequeue_fn(rreq);
             
             /* this is a crude check to check if the req has been
-               matched by more than one netmod.  When MPIU_Assert() is
+               matched by more than one netmod.  When MPIR_Assert() is
                defined to empty, the extra matched=m is optimized
                away. */
-            MPIU_Assert(!m || !matched);
+            MPIR_Assert(!m || !matched);
             matched = m;
         }
         ent = ent->next;
@@ -1290,9 +1290,9 @@ static int anysource_matched(MPIR_Request *rreq)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 void MPIDI_CH3I_Posted_recv_enqueued(MPIR_Request *rreq)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_POSTED_RECV_ENQUEUED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_POSTED_RECV_ENQUEUED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_POSTED_RECV_ENQUEUED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_POSTED_RECV_ENQUEUED);
 
     /* MT FIXME acquiring MPIDCOMM here violates lock ordering rules,
      * easily causes deadlock */
@@ -1324,11 +1324,11 @@ void MPIDI_CH3I_Posted_recv_enqueued(MPIR_Request *rreq)
          * ways to do this that don't require a hook on every request post, but
          * instead do some sort of caching or something analogous to branch
          * prediction. */
-#if !(defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT))
+#if !(defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ))
         /* enqueue fastbox */
 
         /* don't enqueue a fastbox for yourself */
-        MPIU_Assert(rreq->comm != NULL);
+        MPIR_Assert(rreq->comm != NULL);
         if (rreq->dev.match.parts.rank == rreq->comm->rank)
             goto fn_exit;
 
@@ -1346,7 +1346,7 @@ void MPIDI_CH3I_Posted_recv_enqueued(MPIR_Request *rreq)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_POSTED_RECV_ENQUEUED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_POSTED_RECV_ENQUEUED);
 }
 
 /* returns non-zero when req has been matched by channel */
@@ -1359,9 +1359,9 @@ int MPIDI_CH3I_Posted_recv_dequeued(MPIR_Request *rreq)
     int local_rank = -1;
     MPIDI_VC_t *vc;
     int matched = FALSE;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_POSTED_RECV_DEQUEUED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_POSTED_RECV_DEQUEUED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_POSTED_RECV_DEQUEUED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_POSTED_RECV_DEQUEUED);
 
     if (rreq->dev.match.parts.rank == MPI_ANY_SOURCE)
     {
@@ -1370,7 +1370,7 @@ int MPIDI_CH3I_Posted_recv_dequeued(MPIR_Request *rreq)
     /* MT FIXME we unfortunately must disable this optimization for now in
      * per_object mode. There are possibly other ways to synchronize the
      * fboxes that won't cause lock-ordering deadlocks */
-#if !(defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT))
+#if !(defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ))
     else
     {
         if (rreq->dev.match.parts.rank == rreq->comm->rank)
@@ -1378,7 +1378,7 @@ int MPIDI_CH3I_Posted_recv_dequeued(MPIR_Request *rreq)
 
         /* don't use MPID_NEM_IS_LOCAL, it doesn't handle dynamic processes */
         MPIDI_Comm_get_vc(rreq->comm, rreq->dev.match.parts.rank, &vc);
-        MPIU_Assert(vc != NULL);
+        MPIR_Assert(vc != NULL);
         if (!vc->ch.is_local)
             goto fn_exit;
 
@@ -1392,7 +1392,7 @@ int MPIDI_CH3I_Posted_recv_dequeued(MPIR_Request *rreq)
 #endif
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_POSTED_RECV_DEQUEUED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_POSTED_RECV_DEQUEUED);
     return matched;
 }
 
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_rma_shm.c b/src/mpid/ch3/channels/nemesis/src/ch3_rma_shm.c
index 95a2ee3..ff5eeed 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_rma_shm.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_rma_shm.c
@@ -17,9 +17,9 @@ int MPIDI_CH3_SHM_Win_shared_query(MPIR_Win * win_ptr, int target_rank, MPI_Aint
 {
     int comm_size;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_SHARED_QUERY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_SHARED_QUERY);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3_WIN_SHARED_QUERY);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3_WIN_SHARED_QUERY);
 
     if (win_ptr->comm_ptr->node_comm == NULL) {
         mpi_errno = MPIDI_CH3U_Win_shared_query(win_ptr, target_rank, size, disp_unit, baseptr);
@@ -44,7 +44,7 @@ int MPIDI_CH3_SHM_Win_shared_query(MPIR_Win * win_ptr, int target_rank, MPI_Aint
         for (i = 0; i < comm_size; i++) {
             if (win_ptr->basic_info_table[i].size > 0) {
                 int local_i = win_ptr->comm_ptr->intranode_table[i];
-                MPIU_Assert(local_i >= 0 && local_i < win_ptr->comm_ptr->node_comm->local_size);
+                MPIR_Assert(local_i >= 0 && local_i < win_ptr->comm_ptr->node_comm->local_size);
                 *size = win_ptr->basic_info_table[i].size;
                 *disp_unit = win_ptr->basic_info_table[i].disp_unit;
                 *((void **) baseptr) = win_ptr->shm_base_addrs[local_i];
@@ -55,7 +55,7 @@ int MPIDI_CH3_SHM_Win_shared_query(MPIR_Win * win_ptr, int target_rank, MPI_Aint
     }
     else {
         int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
-        MPIU_Assert(local_target_rank >= 0 &&
+        MPIR_Assert(local_target_rank >= 0 &&
                     local_target_rank < win_ptr->comm_ptr->node_comm->local_size);
         *size = win_ptr->basic_info_table[target_rank].size;
         *disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
@@ -63,7 +63,7 @@ int MPIDI_CH3_SHM_Win_shared_query(MPIR_Win * win_ptr, int target_rank, MPI_Aint
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3_WIN_SHARED_QUERY);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3_WIN_SHARED_QUERY);
     return mpi_errno;
 
   fn_fail:
@@ -78,9 +78,9 @@ int MPIDI_CH3_SHM_Win_shared_query(MPIR_Win * win_ptr, int target_rank, MPI_Aint
 int MPIDI_CH3_SHM_Win_free(MPIR_Win ** win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_SHM_WIN_FREE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_SHM_WIN_FREE);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3_SHM_WIN_FREE);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3_SHM_WIN_FREE);
 
     if ((*win_ptr)->comm_ptr->node_comm == NULL) {
         goto fn_exit;
@@ -120,7 +120,7 @@ int MPIDI_CH3_SHM_Win_free(MPIR_Win ** win_ptr)
          * If node_comm == NULL, this process is the only one on this node, therefore
          * we use comm_self as node comm. */
         node_comm_ptr = (*win_ptr)->comm_ptr->node_comm;
-        MPIU_Assert(node_comm_ptr != NULL);
+        MPIR_Assert(node_comm_ptr != NULL);
 
         if (node_comm_ptr->rank == 0) {
             MPIDI_CH3I_SHM_MUTEX_DESTROY(*win_ptr);
@@ -156,7 +156,7 @@ int MPIDI_CH3_SHM_Win_free(MPIR_Win ** win_ptr)
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3_SHM_WIN_FREE);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3_SHM_WIN_FREE);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c b/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
index 2f378db..6e5937a 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
@@ -36,9 +36,9 @@ static int MPIDI_CH3I_Win_gather_info(void *base, MPI_Aint size, int disp_unit,
 int MPIDI_CH3_Win_fns_init(MPIDI_CH3U_Win_fns_t * win_fns)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_FNS_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_FNS_INIT);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3_WIN_FNS_INIT);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3_WIN_FNS_INIT);
 
     if (MPIDI_CH3I_Shm_supported()) {
         win_fns->allocate_shm = MPIDI_CH3I_Win_allocate_shm;
@@ -47,7 +47,7 @@ int MPIDI_CH3_Win_fns_init(MPIDI_CH3U_Win_fns_t * win_fns)
         win_fns->shared_query = MPIDI_CH3_SHM_Win_shared_query;
     }
 
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3_WIN_FNS_INIT);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3_WIN_FNS_INIT);
 
     return mpi_errno;
 }
@@ -59,16 +59,16 @@ int MPIDI_CH3_Win_fns_init(MPIDI_CH3U_Win_fns_t * win_fns)
 int MPIDI_CH3_Win_hooks_init(MPIDI_CH3U_Win_hooks_t * win_hooks)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_HOOKS_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_HOOKS_INIT);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3_WIN_HOOKS_INIT);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3_WIN_HOOKS_INIT);
 
     if (MPIDI_CH3I_Shm_supported()) {
         win_hooks->win_init = MPIDI_CH3I_Win_init;
         win_hooks->win_free = MPIDI_CH3_SHM_Win_free;
     }
 
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3_WIN_HOOKS_INIT);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3_WIN_HOOKS_INIT);
 
     return mpi_errno;
 }
@@ -83,8 +83,8 @@ int MPIDI_CH3_Win_pkt_orderings_init(MPIDI_CH3U_Win_pkt_ordering_t * win_pkt_ord
     int mpi_errno = MPI_SUCCESS;
     int netmod_ordering = 0;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
 
     win_pkt_orderings->am_flush_ordered = 0;
 
@@ -103,7 +103,7 @@ int MPIDI_CH3_Win_pkt_orderings_init(MPIDI_CH3U_Win_pkt_ordering_t * win_pkt_ord
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -117,9 +117,9 @@ static int MPIDI_CH3I_Win_init(MPI_Aint size, int disp_unit, int create_flavor,
                                MPIR_Info * info, MPIR_Comm * comm_ptr, MPIR_Win ** win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_WIN_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_WIN_INIT);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_WIN_INIT);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_WIN_INIT);
 
     (*win_ptr)->shm_base_addr = NULL;
     (*win_ptr)->shm_segment_len = 0;
@@ -132,7 +132,7 @@ static int MPIDI_CH3I_Win_init(MPI_Aint size, int disp_unit, int create_flavor,
     (*win_ptr)->info_shm_segment_handle = 0;
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_WIN_INIT);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_WIN_INIT);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -161,21 +161,21 @@ static int MPIDI_CH3I_SHM_Wins_match(MPIR_Win ** win_ptr, MPIR_Win ** matched_wi
 
     MPIDI_SHM_Win_t *elem = shm_wins_list;
 
-    MPIU_CHKLMEM_DECL(2);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_WINS_MATCH);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SHM_WINS_MATCH);
+    MPIR_CHKLMEM_DECL(2);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_WINS_MATCH);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_WINS_MATCH);
 
     *matched_win = NULL;
     base_shm_offs = *base_shm_offs_ptr;
     node_comm_ptr = (*win_ptr)->comm_ptr->node_comm;
-    MPIU_Assert(node_comm_ptr != NULL);
+    MPIR_Assert(node_comm_ptr != NULL);
     node_size = node_comm_ptr->local_size;
     node_rank = node_comm_ptr->rank;
 
     comm_size = (*win_ptr)->comm_ptr->local_size;
 
-    MPIU_CHKLMEM_MALLOC(node_ranks, int *, node_size * sizeof(int), mpi_errno, "node_ranks");
-    MPIU_CHKLMEM_MALLOC(node_ranks_in_shm_node, int *, node_size * sizeof(int),
+    MPIR_CHKLMEM_MALLOC(node_ranks, int *, node_size * sizeof(int), mpi_errno, "node_ranks");
+    MPIR_CHKLMEM_MALLOC(node_ranks_in_shm_node, int *, node_size * sizeof(int),
                         mpi_errno, "node_ranks_in_shm_comm");
 
     for (i = 0; i < node_size; i++) {
@@ -247,7 +247,7 @@ static int MPIDI_CH3I_SHM_Wins_match(MPIR_Win ** win_ptr, MPIR_Win ** matched_wi
         for (i = 0; i < comm_size; ++i) {
             int i_node_rank = (*win_ptr)->comm_ptr->intranode_table[i];
             if (i_node_rank >= 0) {
-                MPIU_Assert(i_node_rank < node_size);
+                MPIR_Assert(i_node_rank < node_size);
 
                 if (base_shm_offs[i_node_rank] < 0 ||
                     base_shm_offs[i_node_rank] + (*win_ptr)->basic_info_table[i].size >
@@ -273,8 +273,8 @@ static int MPIDI_CH3I_SHM_Wins_match(MPIR_Win ** win_ptr, MPIR_Win ** matched_wi
     if (shm_node_group_ptr != NULL)
         mpi_errno = MPIR_Group_free_impl(shm_node_group_ptr);
 
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SHM_WINS_MATCH);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_WINS_MATCH);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -293,10 +293,10 @@ static int MPIDI_CH3I_Win_detect_shm(MPIR_Win ** win_ptr)
     int i, node_size;
     MPI_Aint *base_shm_offs;
 
-    MPIU_CHKPMEM_DECL(1);
-    MPIU_CHKLMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_WIN_DETECT_SHM);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_WIN_DETECT_SHM);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_WIN_DETECT_SHM);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_WIN_DETECT_SHM);
 
     if ((*win_ptr)->comm_ptr->node_comm == NULL) {
         goto fn_exit;
@@ -304,7 +304,7 @@ static int MPIDI_CH3I_Win_detect_shm(MPIR_Win ** win_ptr)
 
     node_size = (*win_ptr)->comm_ptr->node_comm->local_size;
 
-    MPIU_CHKLMEM_MALLOC(base_shm_offs, MPI_Aint *, node_size * sizeof(MPI_Aint),
+    MPIR_CHKLMEM_MALLOC(base_shm_offs, MPI_Aint *, node_size * sizeof(MPI_Aint),
                         mpi_errno, "base_shm_offs");
 
     /* Return the first matched shared window.
@@ -318,7 +318,7 @@ static int MPIDI_CH3I_Win_detect_shm(MPIR_Win ** win_ptr)
         goto fn_exit;
 
     (*win_ptr)->shm_allocated = TRUE;
-    MPIU_CHKPMEM_MALLOC((*win_ptr)->shm_base_addrs, void **,
+    MPIR_CHKPMEM_MALLOC((*win_ptr)->shm_base_addrs, void **,
                         node_size * sizeof(void *), mpi_errno, "(*win_ptr)->shm_base_addrs");
 
     /* Compute the base address of shm buffer on each process.
@@ -333,12 +333,12 @@ static int MPIDI_CH3I_Win_detect_shm(MPIR_Win ** win_ptr)
     (*win_ptr)->shm_mutex = shm_win_ptr->shm_mutex;
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_WIN_DETECT_SHM);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_WIN_DETECT_SHM);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
@@ -357,10 +357,10 @@ static int MPIDI_CH3I_Win_gather_info(void *base, MPI_Aint size, int disp_unit,
     int i, k;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKLMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_WIN_GATHER_INFO);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_WIN_GATHER_INFO);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_WIN_GATHER_INFO);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_WIN_GATHER_INFO);
 
     if ((*win_ptr)->comm_ptr->node_comm == NULL) {
         mpi_errno = MPIDI_CH3U_Win_gather_info(base, size, disp_unit, info, comm_ptr, win_ptr);
@@ -371,7 +371,7 @@ static int MPIDI_CH3I_Win_gather_info(void *base, MPI_Aint size, int disp_unit,
     comm_rank = (*win_ptr)->comm_ptr->rank;
 
     node_comm_ptr = (*win_ptr)->comm_ptr->node_comm;
-    MPIU_Assert(node_comm_ptr != NULL);
+    MPIR_Assert(node_comm_ptr != NULL);
     node_rank = node_comm_ptr->rank;
 
     (*win_ptr)->info_shm_segment_len = comm_size * sizeof(MPIDI_Win_basic_info_t);
@@ -447,10 +447,10 @@ static int MPIDI_CH3I_Win_gather_info(void *base, MPI_Aint size, int disp_unit,
 
     (*win_ptr)->basic_info_table = (MPIDI_Win_basic_info_t *) ((*win_ptr)->info_shm_base_addr);
 
-    MPIU_CHKLMEM_MALLOC(tmp_buf, MPI_Aint *, 4 * comm_size * sizeof(MPI_Aint),
+    MPIR_CHKLMEM_MALLOC(tmp_buf, MPI_Aint *, 4 * comm_size * sizeof(MPI_Aint),
                         mpi_errno, "tmp_buf");
 
-    tmp_buf[4 * comm_rank] = MPIU_PtrToAint(base);
+    tmp_buf[4 * comm_rank] = MPIR_Ptr_to_aint(base);
     tmp_buf[4 * comm_rank + 1] = size;
     tmp_buf[4 * comm_rank + 2] = (MPI_Aint) disp_unit;
     tmp_buf[4 * comm_rank + 3] = (MPI_Aint) (*win_ptr)->handle;
@@ -464,7 +464,7 @@ static int MPIDI_CH3I_Win_gather_info(void *base, MPI_Aint size, int disp_unit,
         /* only node_rank == 0 writes results to basic_info_table on shared memory region. */
         k = 0;
         for (i = 0; i < comm_size; i++) {
-            (*win_ptr)->basic_info_table[i].base_addr = MPIU_AintToPtr(tmp_buf[k++]);
+            (*win_ptr)->basic_info_table[i].base_addr = MPIR_Aint_to_ptr(tmp_buf[k++]);
             (*win_ptr)->basic_info_table[i].size = tmp_buf[k++];
             (*win_ptr)->basic_info_table[i].disp_unit = (int) tmp_buf[k++];
             (*win_ptr)->basic_info_table[i].win_handle = (MPI_Win) tmp_buf[k++];
@@ -477,8 +477,8 @@ static int MPIDI_CH3I_Win_gather_info(void *base, MPI_Aint size, int disp_unit,
         MPIR_ERR_POP(mpi_errno);
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_WIN_GATHER_INFO);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_WIN_GATHER_INFO);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -500,11 +500,11 @@ static int MPIDI_CH3I_Win_allocate_shm(MPI_Aint size, int disp_unit, MPIR_Info *
     MPI_Aint *node_sizes;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
     int noncontig = FALSE;
-    MPIU_CHKPMEM_DECL(1);
-    MPIU_CHKLMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_WIN_ALLOCATE_SHM);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_WIN_ALLOCATE_SHM);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_WIN_ALLOCATE_SHM);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_WIN_ALLOCATE_SHM);
 
     if ((*win_ptr)->comm_ptr->node_comm == NULL) {
         mpi_errno =
@@ -522,19 +522,19 @@ static int MPIDI_CH3I_Win_allocate_shm(MPI_Aint size, int disp_unit, MPIR_Info *
      * If node_comm == NULL, this process is the only one on this node, therefore
      * we use comm_self as node comm. */
     node_comm_ptr = (*win_ptr)->comm_ptr->node_comm;
-    MPIU_Assert(node_comm_ptr != NULL);
+    MPIR_Assert(node_comm_ptr != NULL);
     node_size = node_comm_ptr->local_size;
     node_rank = node_comm_ptr->rank;
 
     MPIR_T_PVAR_TIMER_START(RMA, rma_wincreate_allgather);
     /* allocate memory for the base addresses, disp_units, and
      * completion counters of all processes */
-    MPIU_CHKPMEM_MALLOC((*win_ptr)->shm_base_addrs, void **,
+    MPIR_CHKPMEM_MALLOC((*win_ptr)->shm_base_addrs, void **,
                         node_size * sizeof(void *), mpi_errno, "(*win_ptr)->shm_base_addrs");
 
     /* get the sizes of the windows and window objectsof
      * all processes.  allocate temp. buffer for communication */
-    MPIU_CHKLMEM_MALLOC(node_sizes, MPI_Aint *, node_size * sizeof(MPI_Aint), mpi_errno,
+    MPIR_CHKLMEM_MALLOC(node_sizes, MPI_Aint *, node_size * sizeof(MPI_Aint), mpi_errno,
                         "node_sizes");
 
     /* FIXME: This needs to be fixed for heterogeneous systems */
@@ -753,12 +753,12 @@ static int MPIDI_CH3I_Win_allocate_shm(MPI_Aint size, int disp_unit, MPIR_Info *
     MPIDI_CH3I_SHM_Wins_append(&shm_wins_list, (*win_ptr));
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_WIN_ALLOCATE_SHM);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_WIN_ALLOCATE_SHM);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c b/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c
index f0ce8d2..87b1d35 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c
@@ -23,10 +23,10 @@ UT_array *coll_fns_array = NULL;
 int MPIDI_CH3I_comm_create(MPIR_Comm *comm, void *param)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_COMM_CREATE);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_COMM_CREATE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_COMM_CREATE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_COMM_CREATE);
 
 #ifndef ENABLED_SHM_COLLECTIVES
     goto fn_exit;
@@ -60,7 +60,7 @@ int MPIDI_CH3I_comm_create(MPIR_Comm *comm, void *param)
         }
 
         /* allocate and init new coll_fns table */
-        MPIU_CHKPMEM_MALLOC(cf, MPIR_Collops *, sizeof(*cf), mpi_errno, "cf");
+        MPIR_CHKPMEM_MALLOC(cf, MPIR_Collops *, sizeof(*cf), mpi_errno, "cf");
         *cf = *comm->coll_fns;
         cf->ref_count = 1;
         cf->Barrier = barrier;
@@ -72,13 +72,13 @@ int MPIDI_CH3I_comm_create(MPIR_Comm *comm, void *param)
     }
     
  fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_COMM_CREATE);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_COMM_CREATE);
     return mpi_errno;
  fn_oom: /* out-of-memory handler for utarray operations */
     MPIR_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "utarray");
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -89,9 +89,9 @@ int MPIDI_CH3I_comm_create(MPIR_Comm *comm, void *param)
 int MPIDI_CH3I_comm_destroy(MPIR_Comm *comm, void *param)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_COMM_DESTROY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_COMM_DESTROY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_COMM_DESTROY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_COMM_DESTROY);
 #ifndef ENABLED_SHM_COLLECTIVES
     goto fn_exit;
 #endif
@@ -116,7 +116,7 @@ int MPIDI_CH3I_comm_destroy(MPIR_Comm *comm, void *param)
     }
     
  fn_exit: ATTRIBUTE((unused))
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_COMM_DESTROY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_COMM_DESTROY);
     return mpi_errno;
 }
 
@@ -136,7 +136,7 @@ static int alloc_barrier_vars (MPIR_Comm *comm, MPID_nem_barrier_vars_t **vars)
        This may result in two different communicators using the same
        barier_vars.  This code is being left in for now as an example of how to
        override collective operations. */
-    MPIU_Assert(0);
+    MPIR_Assert(0);
 
     for (i = 0; i < MPID_NEM_NUM_BARRIER_VARS; ++i)
     {
@@ -168,7 +168,7 @@ static int barrier(MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag)
     int prev;
     int sense;
     
-    MPIU_Assert(comm_ptr->hierarchy_kind == MPIR_COMM_HIERARCHY_KIND__NODE);
+    MPIR_Assert(comm_ptr->hierarchy_kind == MPIR_COMM_HIERARCHY_KIND__NODE);
     
     /* Trivial barriers return immediately */
     if (comm_ptr->local_size == 1)
@@ -230,9 +230,9 @@ int MPID_nem_barrier_vars_init (MPID_nem_barrier_vars_t *barrier_region)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_BARRIER_VARS_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_BARRIER_VARS_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_BARRIER_VARS_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_BARRIER_VARS_INIT);
     if (MPID_nem_mem_region.local_rank == 0)
         for (i = 0; i < MPID_NEM_NUM_BARRIER_VARS; ++i)
         {
@@ -243,7 +243,7 @@ int MPID_nem_barrier_vars_init (MPID_nem_barrier_vars_t *barrier_region)
             OPA_store_int(&barrier_region[i].sig, 0);
         }
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_BARRIER_VARS_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_BARRIER_VARS_INIT);
     return mpi_errno;
 }
 
@@ -254,14 +254,14 @@ int MPID_nem_barrier_vars_init (MPID_nem_barrier_vars_t *barrier_region)
 static int nem_coll_finalize(void *param ATTRIBUTE((unused)))
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_NEM_COLL_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_NEM_COLL_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_NEM_COLL_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_NEM_COLL_FINALIZE);
 
     utarray_free(coll_fns_array);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_NEM_COLL_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_NEM_COLL_FINALIZE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -275,15 +275,15 @@ static int nem_coll_finalize(void *param ATTRIBUTE((unused)))
 int MPID_nem_coll_init(void)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_COLL_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_COLL_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_COLL_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_COLL_INIT);
 
     utarray_new(coll_fns_array, &ut_ptr_icd);
     MPIR_Add_finalize(nem_coll_finalize, NULL, MPIR_FINALIZE_CALLBACK_PRIO-1);
     
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_COLL_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_COLL_INIT);
     return mpi_errno;
  fn_oom: /* out-of-memory handler for utarray operations */
     MPIR_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "utarray");
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3i_eagernoncontig.c b/src/mpid/ch3/channels/nemesis/src/ch3i_eagernoncontig.c
index fe31782..bca1df2 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3i_eagernoncontig.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3i_eagernoncontig.c
@@ -23,9 +23,9 @@ int MPIDI_CH3I_SendNoncontig( MPIDI_VC_t *vc, MPIR_Request *sreq, void *header,
     int mpi_errno = MPI_SUCCESS;
     int again = 0;
     intptr_t orig_segment_first = sreq->dev.segment_first;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);
 
     MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)header);
 
@@ -68,7 +68,7 @@ int MPIDI_CH3I_SendNoncontig( MPIDI_VC_t *vc, MPIR_Request *sreq, void *header,
         else
         {
             /* part of message was sent, make this req an active send */
-            MPIU_Assert(MPIDI_CH3I_shm_active_send == NULL);
+            MPIR_Assert(MPIDI_CH3I_shm_active_send == NULL);
             MPIDI_CH3I_shm_active_send = sreq;
         }
         MPIDI_CH3I_Sendq_enqueue(&MPIDI_CH3I_shm_sendq, sreq);
@@ -78,7 +78,7 @@ int MPIDI_CH3I_SendNoncontig( MPIDI_VC_t *vc, MPIR_Request *sreq, void *header,
     /* finished sending all data, complete the request */
     if (!sreq->dev.OnDataAvail)
     {
-        MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
+        MPIR_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
         mpi_errno = MPID_Request_complete(sreq);
         if (mpi_errno != MPI_SUCCESS) {
             MPIR_ERR_POP(mpi_errno);
@@ -90,14 +90,14 @@ int MPIDI_CH3I_SendNoncontig( MPIDI_VC_t *vc, MPIR_Request *sreq, void *header,
         int complete = 0;
         mpi_errno = sreq->dev.OnDataAvail(vc, sreq, &complete);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-        MPIU_Assert(complete); /* all data has been sent, we should always complete */
+        MPIR_Assert(complete); /* all data has been sent, we should always complete */
 
         MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, ".... complete %d bytes", (int) (sreq->dev.segment_size));
     }
 
  fn_exit:
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_alloc.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_alloc.c
index c3fd575..f093850 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_alloc.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_alloc.c
@@ -74,19 +74,19 @@ int MPIDI_CH3I_Seg_alloc(size_t len, void **ptr_p)
 {
     int mpi_errno = MPI_SUCCESS;
     alloc_elem_t *ep;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEG_ALLOC);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEG_ALLOC);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEG_ALLOC);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_SEG_ALLOC);
 
     /* round up to multiple of 8 to ensure the start of the next
        region is 64-bit aligned. */
     len = ROUND_UP_8(len);
 
-    MPIU_Assert(len);
-    MPIU_Assert(ptr_p);
+    MPIR_Assert(len);
+    MPIR_Assert(ptr_p);
 
-    MPIU_CHKPMEM_MALLOC(ep, alloc_elem_t *, sizeof(alloc_elem_t), mpi_errno, "el");
+    MPIR_CHKPMEM_MALLOC(ep, alloc_elem_t *, sizeof(alloc_elem_t), mpi_errno, "el");
     
     ep->ptr_p = ptr_p;
     ep->len = len;
@@ -96,11 +96,11 @@ int MPIDI_CH3I_Seg_alloc(size_t len, void **ptr_p)
     segment_len += len;
     
  fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEG_ALLOC);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_SEG_ALLOC);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -141,15 +141,15 @@ int MPIDI_CH3I_Seg_commit(MPID_nem_seg_ptr_t memory, int num_local, int local_ra
     void *current_addr;
     void *start_addr ATTRIBUTE((unused));
     size_t size_left;
-    MPIU_CHKPMEM_DECL (1);
-    MPIU_CHKLMEM_DECL (2);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEG_COMMIT);
+    MPIR_CHKPMEM_DECL (1);
+    MPIR_CHKLMEM_DECL (2);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEG_COMMIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEG_COMMIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_SEG_COMMIT);
 
     /* MPIDI_CH3I_Seg_alloc() needs to have been called before this function */
-    MPIU_Assert(!ALLOCQ_EMPTY());
-    MPIU_Assert(segment_len > 0);
+    MPIR_Assert(!ALLOCQ_EMPTY());
+    MPIR_Assert(segment_len > 0);
 
     /* allocate an area to check if the segment was allocated symmetrically */
     mpi_errno = MPIDI_CH3I_Seg_alloc(sizeof(asym_check_region), (void **)&asym_check_region_p);
@@ -169,7 +169,7 @@ int MPIDI_CH3I_Seg_commit(MPID_nem_seg_ptr_t memory, int num_local, int local_ra
        region containing the barrier vars. */
     
     /* add space for local barrier region.  Use a whole cacheline. */
-    MPIU_Assert(MPID_NEM_CACHE_LINE_LEN >= sizeof(MPID_nem_barrier_t));
+    MPIR_Assert(MPID_NEM_CACHE_LINE_LEN >= sizeof(MPID_nem_barrier_t));
     segment_len += MPID_NEM_CACHE_LINE_LEN;
 
 #ifdef OPA_USE_LOCK_BASED_PRIMITIVES
@@ -182,7 +182,7 @@ int MPIDI_CH3I_Seg_commit(MPID_nem_seg_ptr_t memory, int num_local, int local_ra
     /* offset from memory->base_addr to the start of ipc_lock */
     ipc_lock_offset = MPID_NEM_CACHE_LINE_LEN;
 
-    MPIU_Assert(ipc_lock_offset >= sizeof(OPA_emulation_ipl_t));
+    MPIR_Assert(ipc_lock_offset >= sizeof(OPA_emulation_ipl_t));
     segment_len += MPID_NEM_CACHE_LINE_LEN;
 #endif
 
@@ -194,7 +194,7 @@ int MPIDI_CH3I_Seg_commit(MPID_nem_seg_ptr_t memory, int num_local, int local_ra
     {
         char *addr;
 
-        MPIU_CHKPMEM_MALLOC (addr, char *, segment_len + MPID_NEM_CACHE_LINE_LEN, mpi_errno, "segment");
+        MPIR_CHKPMEM_MALLOC (addr, char *, segment_len + MPID_NEM_CACHE_LINE_LEN, mpi_errno, "segment");
 
         memory->base_addr = addr;
         current_addr = (char *)(((uintptr_t)addr + (uintptr_t)MPID_NEM_CACHE_LINE_LEN-1) & (~((uintptr_t)MPID_NEM_CACHE_LINE_LEN-1)));
@@ -217,7 +217,7 @@ int MPIDI_CH3I_Seg_commit(MPID_nem_seg_ptr_t memory, int num_local, int local_ra
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
             /* post name of shared file */
-            MPIU_Assert (MPID_nem_mem_region.local_procs[0] == MPID_nem_mem_region.rank);
+            MPIR_Assert (MPID_nem_mem_region.local_procs[0] == MPID_nem_mem_region.rank);
 
             mpi_errno = MPIU_SHMW_Hnd_get_serialized_by_ref(memory->hnd, &serialized_hnd);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -243,7 +243,7 @@ int MPIDI_CH3I_Seg_commit(MPID_nem_seg_ptr_t memory, int num_local, int local_ra
             int found = FALSE;
 
             /* Allocate space for pmi key and val */
-            MPIU_CHKLMEM_MALLOC(val, char *, PMI2_MAX_VALLEN, mpi_errno, "val");
+            MPIR_CHKLMEM_MALLOC(val, char *, PMI2_MAX_VALLEN, mpi_errno, "val");
 
             /* get name of shared file */
             mpi_errno = PMI2_Info_GetNodeAttr("sharedFilename", val, PMI2_MAX_VALLEN, &found, TRUE);
@@ -290,7 +290,7 @@ int MPIDI_CH3I_Seg_commit(MPID_nem_seg_ptr_t memory, int num_local, int local_ra
     {
         char *addr;
 
-        MPIU_CHKPMEM_MALLOC (addr, char *, segment_len + MPID_NEM_CACHE_LINE_LEN, mpi_errno, "segment");
+        MPIR_CHKPMEM_MALLOC (addr, char *, segment_len + MPID_NEM_CACHE_LINE_LEN, mpi_errno, "segment");
 
         memory->base_addr = addr;
         current_addr = (char *)(((uintptr_t)addr + (uintptr_t)MPID_NEM_CACHE_LINE_LEN-1) & (~((uintptr_t)MPID_NEM_CACHE_LINE_LEN-1)));
@@ -313,11 +313,11 @@ int MPIDI_CH3I_Seg_commit(MPID_nem_seg_ptr_t memory, int num_local, int local_ra
         /* Allocate space for pmi key and val */
         pmi_errno = PMI_KVS_Get_key_length_max(&key_max_sz);
         MPIR_ERR_CHKANDJUMP1(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %d", pmi_errno);
-        MPIU_CHKLMEM_MALLOC(key, char *, key_max_sz, mpi_errno, "key");
+        MPIR_CHKLMEM_MALLOC(key, char *, key_max_sz, mpi_errno, "key");
 
         pmi_errno = PMI_KVS_Get_value_length_max(&val_max_sz);
         MPIR_ERR_CHKANDJUMP1(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %d", pmi_errno);
-        MPIU_CHKLMEM_MALLOC(val, char *, val_max_sz, mpi_errno, "val");
+        MPIR_CHKLMEM_MALLOC(val, char *, val_max_sz, mpi_errno, "val");
 
         mpi_errno = MPIDI_PG_GetConnKVSname (&kvs_name);
         if (mpi_errno) MPIR_ERR_POP (mpi_errno);
@@ -327,7 +327,7 @@ int MPIDI_CH3I_Seg_commit(MPID_nem_seg_ptr_t memory, int num_local, int local_ra
             if (mpi_errno != MPI_SUCCESS) MPIR_ERR_POP (mpi_errno);
 
             /* post name of shared file */
-            MPIU_Assert (MPID_nem_mem_region.local_procs[0] == MPID_nem_mem_region.rank);
+            MPIR_Assert (MPID_nem_mem_region.local_procs[0] == MPID_nem_mem_region.rank);
             MPL_snprintf (key, key_max_sz, "sharedFilename[%i]", MPID_nem_mem_region.rank);
 
             mpi_errno = MPIU_SHMW_Hnd_get_serialized_by_ref(memory->hnd, &serialized_hnd);
@@ -398,13 +398,13 @@ int MPIDI_CH3I_Seg_commit(MPID_nem_seg_ptr_t memory, int num_local, int local_ra
 
     /* reserve room for shared mem barrier (We used a whole cacheline) */
     current_addr = (char *)current_addr + MPID_NEM_CACHE_LINE_LEN;
-    MPIU_Assert(size_left >= MPID_NEM_CACHE_LINE_LEN);
+    MPIR_Assert(size_left >= MPID_NEM_CACHE_LINE_LEN);
     size_left -= MPID_NEM_CACHE_LINE_LEN;
 
 #ifdef OPA_USE_LOCK_BASED_PRIMITIVES
     /* reserve room for the opa emulation lock */
     current_addr = (char *)current_addr + MPID_NEM_CACHE_LINE_LEN;
-    MPIU_Assert(size_left >= MPID_NEM_CACHE_LINE_LEN);
+    MPIR_Assert(size_left >= MPID_NEM_CACHE_LINE_LEN);
     size_left -= MPID_NEM_CACHE_LINE_LEN;
 #endif
 
@@ -415,29 +415,29 @@ int MPIDI_CH3I_Seg_commit(MPID_nem_seg_ptr_t memory, int num_local, int local_ra
         ALLOCQ_DEQUEUE(&ep);
 
         *(ep->ptr_p) = current_addr;
-        MPIU_Assert(size_left >= ep->len);
+        MPIR_Assert(size_left >= ep->len);
         size_left -= ep->len;
         current_addr = (char *)current_addr + ep->len;
 
         MPL_free(ep);
 
-        MPIU_Assert((char *)current_addr <= (char *)start_addr + segment_len);
+        MPIR_Assert((char *)current_addr <= (char *)start_addr + segment_len);
     }
     while (!ALLOCQ_EMPTY());
 
     mpi_errno = check_alloc(num_local, local_rank);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEG_COMMIT);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_SEG_COMMIT);
     return mpi_errno;
  fn_fail:
     /* --BEGIN ERROR HANDLING-- */
     MPIU_SHMW_Seg_remove(memory->hnd);
     MPIU_SHMW_Hnd_finalize(&(memory->hnd));
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
@@ -450,9 +450,9 @@ int MPIDI_CH3I_Seg_commit(MPID_nem_seg_ptr_t memory, int num_local, int local_ra
 int MPIDI_CH3I_Seg_destroy(void)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEG_DESTROY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEG_DESTROY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEG_DESTROY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_SEG_DESTROY);
 
     if (MPID_nem_mem_region.num_local == 1)
         MPL_free(MPID_nem_mem_region.memory.base_addr);
@@ -465,7 +465,7 @@ int MPIDI_CH3I_Seg_destroy(void)
 
  fn_exit:
     MPIU_SHMW_Hnd_finalize(&(MPID_nem_mem_region.memory.hnd));
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEG_DESTROY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_SEG_DESTROY);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -481,9 +481,9 @@ int MPIDI_CH3I_Seg_destroy(void)
 static int check_alloc(int num_local, int local_rank)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_CHECK_ALLOC);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CHECK_ALLOC);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CHECK_ALLOC);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CHECK_ALLOC);
 
     if (local_rank == 0) {
         asym_check_region_p->base_ptr = MPID_nem_mem_region.memory.base_addr;
@@ -516,7 +516,7 @@ static int check_alloc(int num_local, int local_rank)
     }
       
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_CHECK_ALLOC);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CHECK_ALLOC);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_barrier.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_barrier.c
index e2c7910..7de3eb8 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_barrier.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_barrier.c
@@ -16,9 +16,9 @@ static int barrier_init = 0;
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPID_nem_barrier_init(MPID_nem_barrier_t *barrier_region, int init_values)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_BARRIER_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_BARRIER_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_BARRIER_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_BARRIER_INIT);
     
     MPID_nem_mem_region.barrier = barrier_region;
     if (init_values) {
@@ -29,7 +29,7 @@ int MPID_nem_barrier_init(MPID_nem_barrier_t *barrier_region, int init_values)
     sense = 0;
     barrier_init = 1;
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_BARRIER_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_BARRIER_INIT);
 
     return MPI_SUCCESS;
 }
@@ -42,9 +42,9 @@ int MPID_nem_barrier_init(MPID_nem_barrier_t *barrier_region, int init_values)
 int MPID_nem_barrier(void)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_BARRIER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_BARRIER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_BARRIER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_BARRIER);
 
     if (MPID_nem_mem_region.num_local == 1)
         goto fn_exit;
@@ -67,6 +67,6 @@ int MPID_nem_barrier(void)
 
  fn_fail:
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_BARRIER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_BARRIER);
     return mpi_errno;
 }
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_ckpt.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_ckpt.c
index 6ab78a0..2c4a19c 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_ckpt.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_ckpt.c
@@ -159,9 +159,9 @@ int MPIDI_nem_ckpt_init(void)
     cr_callback_id_t cb_id;
     cr_client_id_t client_id;
     int ret;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_NEM_CKPT_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_NEM_CKPT_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_NEM_CKPT_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_NEM_CKPT_INIT);
 
     if (!MPIR_CVAR_NEMESIS_ENABLE_CKPOINT)
         goto fn_exit;
@@ -170,18 +170,18 @@ int MPIDI_nem_ckpt_init(void)
     MPIR_ERR_CHKANDJUMP(client_id < 0 && errno == ENOSYS, mpi_errno, MPI_ERR_OTHER, "**blcr_mod");
 
     cb_id = cr_register_callback(ckpt_cb, NULL, CR_THREAD_CONTEXT);
-    MPIR_ERR_CHKANDJUMP1(cb_id == -1, mpi_errno, MPI_ERR_OTHER, "**intern", "**intern %s", MPIU_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP1(cb_id == -1, mpi_errno, MPI_ERR_OTHER, "**intern", "**intern %s", MPIR_Strerror(errno));
     
     checkpointing = FALSE;
     current_wave = 0;
 
     ret = sem_init(&ckpt_sem, 0, 0);
-    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**sem_init", "**sem_init %s", MPIU_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**sem_init", "**sem_init %s", MPIR_Strerror(errno));
     ret = sem_init(&cont_sem, 0, 0);
-    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**sem_init", "**sem_init %s", MPIU_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**sem_init", "**sem_init %s", MPIR_Strerror(errno));
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_NEM_CKPT_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_NEM_CKPT_INIT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -195,17 +195,17 @@ int MPIDI_nem_ckpt_finalize(void)
 {
     int mpi_errno = MPI_SUCCESS;
     int ret;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_NEM_CKPT_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_NEM_CKPT_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_NEM_CKPT_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_NEM_CKPT_FINALIZE);
 
     ret = sem_destroy(&ckpt_sem);
-    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**sem_destroy", "**sem_destroy %s", MPIU_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**sem_destroy", "**sem_destroy %s", MPIR_Strerror(errno));
     ret = sem_destroy(&cont_sem);
-    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**sem_destroy", "**sem_destroy %s", MPIU_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**sem_destroy", "**sem_destroy %s", MPIR_Strerror(errno));
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_NEM_CKPT_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_NEM_CKPT_FINALIZE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -222,9 +222,9 @@ static int reinit_pmi(void)
     int pg_rank, pg_size;
     int kvs_name_sz, pg_id_sz;
     
-    MPIDI_STATE_DECL(MPID_STATE_REINIT_PMI);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_REINIT_PMI);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_REINIT_PMI);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_REINIT_PMI);
 
     /* Init pmi and do some sanity checks */
     ret = PMI_Init(&has_parent);
@@ -264,7 +264,7 @@ static int reinit_pmi(void)
     CHECK_ERR(ret, "PMI_Get_my_name");
 
     
-    MPIDI_FUNC_EXIT(MPID_STATE_REINIT_PMI);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_REINIT_PMI);
     return 0;
 }
 
@@ -284,10 +284,10 @@ static int restore_env(pid_t parent_pid, int rank)
     MPL_snprintf(env_filename, MAX_STR_LEN, "/tmp/hydra-env-file-%d:%d", parent_pid, rank); 
 
     f = fopen(env_filename, "r");
-    CHECK_ERR(!f, MPIU_Strerror (errno));
+    CHECK_ERR(!f, MPIR_Strerror (errno));
 
     ret = unlink(env_filename);
-    CHECK_ERR(ret, MPIU_Strerror (errno));
+    CHECK_ERR(ret, MPIR_Strerror (errno));
 
     while (fgets(var_val, MAX_STR_LEN, f)) {
         size_t len = strlen(var_val);
@@ -295,11 +295,11 @@ static int restore_env(pid_t parent_pid, int rank)
         if (var_val[len-1] == '\n')
             var_val[len-1] = '\0';
         ret = MPL_putenv(MPL_strdup(var_val));
-        CHECK_ERR(ret != 0, MPIU_Strerror (errno));
+        CHECK_ERR(ret != 0, MPIR_Strerror (errno));
     }
 
     ret = fclose(f);
-    CHECK_ERR(ret, MPIU_Strerror (errno));
+    CHECK_ERR(ret, MPIR_Strerror (errno));
 
     return 0;
 }
@@ -327,9 +327,9 @@ static int open_io_socket(socktype_t socktype, int rank, int dupfd)
     int port;
     int len;
     char *id_p;
-    MPIDI_STATE_DECL(MPID_STATE_OPEN_IO_SOCKET);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_OPEN_IO_SOCKET);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_OPEN_IO_SOCKET);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_OPEN_IO_SOCKET);
 
     memset(&sock_addr, 0, sizeof(sock_addr));
     memset(&addr, 0, sizeof(addr));
@@ -371,7 +371,7 @@ static int open_io_socket(socktype_t socktype, int rank, int dupfd)
     ret = close(fd);
     CHECK_ERR_ERRNO(ret, "close socket");
     
-    MPIDI_FUNC_EXIT(MPID_STATE_OPEN_IO_SOCKET);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_OPEN_IO_SOCKET);
 fn_exit:
     return 0;
 }
@@ -383,9 +383,9 @@ fn_exit:
 static int restore_stdinouterr(int rank)
 {
     int ret;
-    MPIDI_STATE_DECL(MPID_STATE_RESTORE_STDINOUTERR);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_RESTORE_STDINOUTERR);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_RESTORE_STDINOUTERR);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_RESTORE_STDINOUTERR);
 
     if (rank == 0) {
         ret = open_io_socket(IN_SOCK,  rank, 0);
@@ -396,7 +396,7 @@ static int restore_stdinouterr(int rank)
     ret = open_io_socket(ERR_SOCK, rank, 2);
     CHECK_ERR(ret, "open stdin socket");
 
-    MPIDI_FUNC_EXIT(MPID_STATE_RESTORE_STDINOUTERR);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_RESTORE_STDINOUTERR);
     return 0;
 }
 
@@ -409,9 +409,9 @@ int MPIDI_nem_ckpt_start(void)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_NEM_CKPT_START);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_NEM_CKPT_START);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_NEM_CKPT_START);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_NEM_CKPT_START);
 
     if (checkpointing)
         goto fn_exit;
@@ -456,7 +456,7 @@ int MPIDI_nem_ckpt_start(void)
     
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_NEM_CKPT_START);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_NEM_CKPT_START);
     return mpi_errno;
 fn_fail:
 
@@ -472,9 +472,9 @@ int MPIDI_nem_ckpt_finish(void)
     int mpi_errno = MPI_SUCCESS;
     int i;
     int ret;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_NEM_CKPT_FINISH);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_NEM_CKPT_FINISH);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_NEM_CKPT_FINISH);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_NEM_CKPT_FINISH);
 
     /* Since we're checkpointing the shared memory region (i.e., the
        channels between local procs), we don't have to flush those
@@ -486,12 +486,12 @@ int MPIDI_nem_ckpt_finish(void)
     do {
         ret = sem_post(&ckpt_sem);
     } while (ret == -1 && errno == EINTR);
-    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**sem_post", "**sem_post %s", MPIU_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**sem_post", "**sem_post %s", MPIR_Strerror(errno));
 
     do {
         ret = sem_wait(&cont_sem);
     } while (ret == -1 && errno == EINTR);
-    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**sem_wait", "**sem_wait %s", MPIU_Strerror(errno));
+    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**sem_wait", "**sem_wait %s", MPIR_Strerror(errno));
 
     mpi_errno = MPID_nem_barrier();
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -516,7 +516,7 @@ int MPIDI_nem_ckpt_finish(void)
     checkpointing = FALSE;
     
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_NEM_CKPT_FINISH);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_NEM_CKPT_FINISH);
     return mpi_errno;
 fn_fail:
 
@@ -532,16 +532,16 @@ static int pkt_ckpt_marker_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_nem_pkt_ckpt_marker_t * const ckpt_pkt = (MPID_nem_pkt_ckpt_marker_t *)pkt;
-    MPIDI_STATE_DECL(MPID_STATE_PKT_CKPT_MARKER_HANDLER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_PKT_CKPT_MARKER_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_PKT_CKPT_MARKER_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_PKT_CKPT_MARKER_HANDLER);
 
     if (!checkpointing) {
         mpi_errno = MPIDI_nem_ckpt_start();
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     }
 
-    MPIU_Assert(current_wave == ckpt_pkt->wave);
+    MPIR_Assert(current_wave == ckpt_pkt->wave);
 
     --marker_count;
 
@@ -559,7 +559,7 @@ static int pkt_ckpt_marker_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_
     *req = NULL;
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_PKT_CKPT_MARKER_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_PKT_CKPT_MARKER_HANDLER);
     return mpi_errno;
 fn_fail:
 
@@ -574,9 +574,9 @@ fn_fail:
 int MPIDI_nem_ckpt_pkthandler_init(MPIDI_CH3_PktHandler_Fcn *pktArray[], int arraySize)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_CKPT_PKTHANDLER_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_CKPT_PKTHANDLER_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_CKPT_PKTHANDLER_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_CKPT_PKTHANDLER_INIT);
 
     /* Check that the array is large enough */
     if (arraySize <= MPIDI_CH3_PKT_END_ALL) {
@@ -586,7 +586,7 @@ int MPIDI_nem_ckpt_pkthandler_init(MPIDI_CH3_PktHandler_Fcn *pktArray[], int arr
     pktArray[MPIDI_NEM_PKT_CKPT_MARKER] = pkt_ckpt_marker_handler;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_CKPT_PKTHANDLER_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_CKPT_PKTHANDLER_INIT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_debug.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_debug.c
index 7ff1d69..342b175 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_debug.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_debug.c
@@ -14,9 +14,9 @@
 #define FCNAME MPL_QUOTE(FUNCNAME)
 void MPID_nem_dbg_dump_cell (volatile struct MPID_nem_cell *cell)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DBG_DUMP_CELL);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_DBG_DUMP_CELL);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DBG_DUMP_CELL);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_DBG_DUMP_CELL);
 
     MPL_DBG_MSG_D (MPIR_DBG_OTHER, TERSE, "  src = %6d", cell->pkt.header.source);
     MPL_DBG_MSG_D (MPIR_DBG_OTHER, TERSE, "  dst = %6d", cell->pkt.header.dest);
@@ -24,7 +24,7 @@ void MPID_nem_dbg_dump_cell (volatile struct MPID_nem_cell *cell)
     MPL_DBG_MSG_D (MPIR_DBG_OTHER, TERSE, "  sqn = %6d", cell->pkt.header.seqno);
     MPL_DBG_MSG_D (MPIR_DBG_OTHER, TERSE, "  typ = %6d", cell->pkt.header.type);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DBG_DUMP_CELL);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_DBG_DUMP_CELL);
 }
 
 #define state_case(suffix)             \
@@ -127,7 +127,7 @@ void MPID_nem_dbg_print_all_sendq(FILE *stream)
     MPIDI_PG_Get_iterator(&iter);
     while (MPIDI_PG_Has_next(&iter)) {
         MPIDI_PG_Get_next(&iter, &pg);
-        fprintf(stream, "PG ptr=%p size=%d id=%s refcount=%d\n", pg, pg->size, (const char*)pg->id, MPIU_Object_get_ref(pg));
+        fprintf(stream, "PG ptr=%p size=%d id=%s refcount=%d\n", pg, pg->size, (const char*)pg->id, MPIR_Object_get_ref(pg));
         for (i = 0; i < MPIDI_PG_Get_size(pg); ++i) {
             MPIDI_PG_Get_vc(pg, i, &vc);
             MPID_nem_dbg_print_vc_sendq(stream, vc);
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_finalize.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_finalize.c
index 51e0f6e..dd50ac3 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_finalize.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_finalize.c
@@ -19,12 +19,12 @@
 int MPID_nem_finalize(void)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_FINALIZE);
 
     /* this test is not the right one */
-/*     MPIU_Assert(MPID_nem_queue_empty( MPID_nem_mem_region.RecvQ[MPID_nem_mem_region.rank])); */
+/*     MPIR_Assert(MPID_nem_queue_empty( MPID_nem_mem_region.RecvQ[MPID_nem_mem_region.rank])); */
 
     /* these are allocated in MPID_nem_mpich_init, not MPID_nem_init */
     MPL_free(MPID_nem_recv_seqno);
@@ -62,7 +62,7 @@ int MPID_nem_finalize(void)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_FINALIZE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_init.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_init.c
index ba4a20c..cf08b32 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_init.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_init.c
@@ -127,7 +127,7 @@ MPID_nem_init(int pg_rank, MPIDI_PG_t *pg_p, int has_parent ATTRIBUTE((unused)))
     MPID_nem_queue_t *recv_queues_p = NULL;
     MPID_nem_queue_t *free_queues_p = NULL;
 
-    MPIU_CHKPMEM_DECL(9);
+    MPIR_CHKPMEM_DECL(9);
 
     /* TODO add compile-time asserts (rather than run-time) and convert most of these */
 
@@ -135,19 +135,19 @@ MPID_nem_init(int pg_rank, MPIDI_PG_t *pg_p, int has_parent ATTRIBUTE((unused)))
        packet.  This is needed because we no longer include channel
        packet types in the CH3 packet types to allow dynamic channel
        loading. */
-    MPIU_Assert(sizeof(MPIDI_CH3_nem_pkt_t) <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(sizeof(MPIDI_CH3_nem_pkt_t) <= sizeof(MPIDI_CH3_Pkt_t));
 
     /* The MPID_nem_cell_rel_ptr_t defined in mpid_nem_datatypes.h
        should only contain a OPA_ptr_t.  This is to check that
        absolute pointers are exactly the same size as relative
        pointers. */
-    MPIU_Assert(sizeof(MPID_nem_cell_rel_ptr_t) == sizeof(OPA_ptr_t));
+    MPIR_Assert(sizeof(MPID_nem_cell_rel_ptr_t) == sizeof(OPA_ptr_t));
 
     /* Make sure the cell structure looks like it should */
-    MPIU_Assert(MPID_NEM_CELL_PAYLOAD_LEN + MPID_NEM_CELL_HEAD_LEN == sizeof(MPID_nem_cell_t));
-    MPIU_Assert(sizeof(MPID_nem_cell_t) == sizeof(MPID_nem_abs_cell_t));
+    MPIR_Assert(MPID_NEM_CELL_PAYLOAD_LEN + MPID_NEM_CELL_HEAD_LEN == sizeof(MPID_nem_cell_t));
+    MPIR_Assert(sizeof(MPID_nem_cell_t) == sizeof(MPID_nem_abs_cell_t));
     /* Make sure payload is aligned on a double */
-    MPIU_Assert(MPID_NEM_ALIGNED(&((MPID_nem_cell_t*)0)->pkt.p.payload[0], sizeof(double)));
+    MPIR_Assert(MPID_NEM_ALIGNED(&((MPID_nem_cell_t*)0)->pkt.p.payload[0], sizeof(double)));
 
     /* Initialize the business card */
     mpi_errno = MPIDI_CH3I_BCInit( &bc_val, &val_max_remaining );
@@ -155,7 +155,7 @@ MPID_nem_init(int pg_rank, MPIDI_PG_t *pg_p, int has_parent ATTRIBUTE((unused)))
     publish_bc_orig = bc_val;
 
     ret = gethostname (MPID_nem_hostname, MAX_HOSTNAME_LEN);
-    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**sock_gethost", "**sock_gethost %s %d", MPIU_Strerror (errno), errno);
+    MPIR_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**sock_gethost", "**sock_gethost %s %d", MPIR_Strerror (errno), errno);
 
     MPID_nem_hostname[MAX_HOSTNAME_LEN-1] = '\0';
 
@@ -163,20 +163,20 @@ MPID_nem_init(int pg_rank, MPIDI_PG_t *pg_p, int has_parent ATTRIBUTE((unused)))
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
 #ifdef MEM_REGION_IN_HEAP
-    MPIU_CHKPMEM_MALLOC (MPID_nem_mem_region_ptr, MPID_nem_mem_region_t *, sizeof(MPID_nem_mem_region_t), mpi_errno, "mem_region");
+    MPIR_CHKPMEM_MALLOC (MPID_nem_mem_region_ptr, MPID_nem_mem_region_t *, sizeof(MPID_nem_mem_region_t), mpi_errno, "mem_region");
 #endif /* MEM_REGION_IN_HEAP */
 
     MPID_nem_mem_region.num_seg        = 7;
-    MPIU_CHKPMEM_MALLOC (MPID_nem_mem_region.seg, MPID_nem_seg_info_ptr_t, MPID_nem_mem_region.num_seg * sizeof(MPID_nem_seg_info_t), mpi_errno, "mem_region segments");
+    MPIR_CHKPMEM_MALLOC (MPID_nem_mem_region.seg, MPID_nem_seg_info_ptr_t, MPID_nem_mem_region.num_seg * sizeof(MPID_nem_seg_info_t), mpi_errno, "mem_region segments");
     MPID_nem_mem_region.rank           = pg_rank;
     MPID_nem_mem_region.num_local      = num_local;
     MPID_nem_mem_region.num_procs      = num_procs;
     MPID_nem_mem_region.local_procs    = local_procs;
     MPID_nem_mem_region.local_rank     = local_rank;
-    MPIU_CHKPMEM_MALLOC (MPID_nem_mem_region.local_ranks, int *, num_procs * sizeof(int), mpi_errno, "mem_region local ranks");
+    MPIR_CHKPMEM_MALLOC (MPID_nem_mem_region.local_ranks, int *, num_procs * sizeof(int), mpi_errno, "mem_region local ranks");
     MPID_nem_mem_region.ext_procs      = num_procs - num_local ;
     if (MPID_nem_mem_region.ext_procs > 0)
-        MPIU_CHKPMEM_MALLOC (MPID_nem_mem_region.ext_ranks, int *, MPID_nem_mem_region.ext_procs * sizeof(int), mpi_errno, "mem_region ext ranks");
+        MPIR_CHKPMEM_MALLOC (MPID_nem_mem_region.ext_ranks, int *, MPID_nem_mem_region.ext_procs * sizeof(int), mpi_errno, "mem_region ext ranks");
     MPID_nem_mem_region.next           = NULL;
 
     for (idx = 0 ; idx < num_procs; idx++)
@@ -275,8 +275,8 @@ MPID_nem_init(int pg_rank, MPIDI_PG_t *pg_p, int has_parent ATTRIBUTE((unused)))
     MPID_nem_mem_region.Elements = cells_p[local_rank];
 
     /* Tables of pointers to shared memory Qs */
-    MPIU_CHKPMEM_MALLOC(MPID_nem_mem_region.FreeQ, MPID_nem_queue_ptr_t *, num_procs * sizeof(MPID_nem_queue_ptr_t), mpi_errno, "FreeQ");
-    MPIU_CHKPMEM_MALLOC(MPID_nem_mem_region.RecvQ, MPID_nem_queue_ptr_t *, num_procs * sizeof(MPID_nem_queue_ptr_t), mpi_errno, "RecvQ");
+    MPIR_CHKPMEM_MALLOC(MPID_nem_mem_region.FreeQ, MPID_nem_queue_ptr_t *, num_procs * sizeof(MPID_nem_queue_ptr_t), mpi_errno, "FreeQ");
+    MPIR_CHKPMEM_MALLOC(MPID_nem_mem_region.RecvQ, MPID_nem_queue_ptr_t *, num_procs * sizeof(MPID_nem_queue_ptr_t), mpi_errno, "RecvQ");
 
     /* Init table entry for our Qs */
     MPID_nem_mem_region.FreeQ[pg_rank] = &free_queues_p[local_rank];
@@ -333,8 +333,8 @@ MPID_nem_init(int pg_rank, MPIDI_PG_t *pg_p, int has_parent ATTRIBUTE((unused)))
 	MPID_nem_mem_region.FreeQ[grank] = &free_queues_p[idx];
 	MPID_nem_mem_region.RecvQ[grank] = &recv_queues_p[idx];
 
-	MPIU_Assert(MPID_NEM_ALIGNED(MPID_nem_mem_region.FreeQ[grank], MPID_NEM_CACHE_LINE_LEN));
-	MPIU_Assert(MPID_NEM_ALIGNED(MPID_nem_mem_region.RecvQ[grank], MPID_NEM_CACHE_LINE_LEN));
+	MPIR_Assert(MPID_NEM_ALIGNED(MPID_nem_mem_region.FreeQ[grank], MPID_NEM_CACHE_LINE_LEN));
+	MPIR_Assert(MPID_NEM_ALIGNED(MPID_nem_mem_region.RecvQ[grank], MPID_NEM_CACHE_LINE_LEN));
     }
 
     /* make pointers to our queues global so we don't have to dereference the array */
@@ -348,10 +348,10 @@ MPID_nem_init(int pg_rank, MPIDI_PG_t *pg_p, int has_parent ATTRIBUTE((unused)))
 
     
     /* Allocate table of pointers to fastboxes */
-    MPIU_CHKPMEM_MALLOC(MPID_nem_mem_region.mailboxes.in,  MPID_nem_fastbox_t **, num_local * sizeof(MPID_nem_fastbox_t *), mpi_errno, "fastboxes");
-    MPIU_CHKPMEM_MALLOC(MPID_nem_mem_region.mailboxes.out, MPID_nem_fastbox_t **, num_local * sizeof(MPID_nem_fastbox_t *), mpi_errno, "fastboxes");
+    MPIR_CHKPMEM_MALLOC(MPID_nem_mem_region.mailboxes.in,  MPID_nem_fastbox_t **, num_local * sizeof(MPID_nem_fastbox_t *), mpi_errno, "fastboxes");
+    MPIR_CHKPMEM_MALLOC(MPID_nem_mem_region.mailboxes.out, MPID_nem_fastbox_t **, num_local * sizeof(MPID_nem_fastbox_t *), mpi_errno, "fastboxes");
 
-    MPIU_Assert(num_local > 0);
+    MPIR_Assert(num_local > 0);
 
 #define MAILBOX_INDEX(sender, receiver) ( ((sender) > (receiver)) ? ((num_local-1) * (sender) + (receiver)) :		\
                                           (((sender) < (receiver)) ? ((num_local-1) * (sender) + ((receiver)-1)) : 0) )
@@ -411,12 +411,12 @@ MPID_nem_init(int pg_rank, MPIDI_PG_t *pg_p, int has_parent ATTRIBUTE((unused)))
 
     MPID_nem_init_stats(num_local);
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit:
     return mpi_errno;
  fn_fail:
     /* --BEGIN ERROR HANDLING-- */
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 
@@ -432,10 +432,10 @@ MPID_nem_vc_init (MPIDI_VC_t *vc)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_VC_INIT);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_VC_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_VC_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_VC_INIT);
     
     vc_ch->pkt_handler = NULL;
     vc_ch->num_pkt_handlers = 0;
@@ -449,7 +449,7 @@ MPID_nem_vc_init (MPIDI_VC_t *vc)
     vc_ch->ckpt_restart_vc    = NULL;
 #endif
     vc_ch->pending_pkt_len    = 0;
-    MPIU_CHKPMEM_MALLOC (vc_ch->pending_pkt, MPIDI_CH3_Pkt_t *, sizeof (MPIDI_CH3_Pkt_t), mpi_errno, "pending_pkt");
+    MPIR_CHKPMEM_MALLOC (vc_ch->pending_pkt, MPIDI_CH3_Pkt_t *, sizeof (MPIDI_CH3_Pkt_t), mpi_errno, "pending_pkt");
 
     /* We do different things for vcs in the COMM_WORLD pg vs other pgs
        COMM_WORLD vcs may use shared memory, and already have queues allocated
@@ -587,7 +587,7 @@ MPID_nem_vc_init (MPIDI_VC_t *vc)
 /*         /\* iStartContigMsg iSendContig and sendNoncontig_fn must */
 /*            be set for nonlocal processes.  Default functions only */
 /*            support shared-memory communication. *\/ */
-/*         MPIU_Assert(vc_ch->iStartContigMsg && vc_ch->iSendContig && vc->sendNoncontig_fn); */
+/*         MPIR_Assert(vc_ch->iStartContigMsg && vc_ch->iSendContig && vc->sendNoncontig_fn); */
 
     }
 
@@ -598,12 +598,12 @@ MPID_nem_vc_init (MPIDI_VC_t *vc)
        to NULL */
     vc_ch->sendq_head = NULL;
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_VC_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_VC_INIT);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -616,9 +616,9 @@ MPID_nem_vc_destroy(MPIDI_VC_t *vc)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_VC_DESTROY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_VC_DESTROY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_VC_DESTROY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_VC_DESTROY);
 
     MPL_free(vc_ch->pending_pkt);
 
@@ -626,7 +626,7 @@ MPID_nem_vc_destroy(MPIDI_VC_t *vc)
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
     fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_VC_DESTROY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_VC_DESTROY);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -666,12 +666,12 @@ static int get_local_procs(MPIDI_PG_t *pg, int our_pg_rank, int *num_local_p,
     int i;
     int num_local = 0;
     MPID_Node_id_t our_node_id;
-    MPIU_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_DECL(1);
 
-    MPIU_Assert(our_pg_rank < pg->size);
+    MPIR_Assert(our_pg_rank < pg->size);
     our_node_id = pg->vct[our_pg_rank].node_id;
 
-    MPIU_CHKPMEM_MALLOC(procs, int *, pg->size * sizeof(int), mpi_errno, "local process index array");
+    MPIR_CHKPMEM_MALLOC(procs, int *, pg->size * sizeof(int), mpi_errno, "local process index array");
 
     for (i = 0; i < pg->size; ++i) {
         if (our_node_id == pg->vct[i].node_id) {
@@ -683,7 +683,7 @@ static int get_local_procs(MPIDI_PG_t *pg, int our_pg_rank, int *num_local_p,
         }
     }
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
 
     if (num_local_p != NULL)
         *num_local_p = num_local;
@@ -693,7 +693,7 @@ fn_exit:
     return mpi_errno;
 fn_fail:
     /* --BEGIN ERROR HANDLING-- */
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c
index d49da80..d8a5aa9 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c
@@ -56,9 +56,9 @@ static int pkt_COOKIE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *bu
 int MPID_nem_lmt_pkthandler_init(MPIDI_CH3_PktHandler_Fcn *pktArray[], int arraySize)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_PKTHANDLER_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_PKTHANDLER_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_PKTHANDLER_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_PKTHANDLER_INIT);
 
     /* Check that the array is large enough */
     if (arraySize <= MPIDI_CH3_PKT_END_ALL) {
@@ -71,7 +71,7 @@ int MPID_nem_lmt_pkthandler_init(MPIDI_CH3_PktHandler_Fcn *pktArray[], int array
     pktArray[MPIDI_NEM_PKT_LMT_COOKIE] = pkt_COOKIE_handler;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_PKTHANDLER_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_PKTHANDLER_INIT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -91,9 +91,9 @@ int MPID_nem_lmt_RndvSend(MPIR_Request **sreq_p, const void * buf, MPI_Aint coun
     MPID_PKT_DECL_CAST(upkt, MPID_nem_pkt_lmt_rts_t, rts_pkt);
     MPIDI_VC_t *vc;
     MPIR_Request *sreq =*sreq_p;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_RNDVSEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_RNDVSEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_RNDVSEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_RNDVSEND);
 
     MPIDI_Comm_get_vc_set_active(comm, rank, &vc);
 
@@ -132,7 +132,7 @@ int MPID_nem_lmt_RndvSend(MPIR_Request **sreq_p, const void * buf, MPI_Aint coun
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_RNDVSEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_RNDVSEND);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -150,9 +150,9 @@ int MPID_nem_lmt_RndvRecv(MPIDI_VC_t *vc, MPIR_Request *rreq)
 {
     int mpi_errno = MPI_SUCCESS;
     int complete = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_RNDVRECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_RNDVRECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_RNDVRECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_RNDVRECV);
 
     /* if the lmt functions are not set, fall back to the default rendezvous code */
     if (vc->ch.lmt_initiate_lmt == NULL)
@@ -167,10 +167,10 @@ int MPID_nem_lmt_RndvRecv(MPIDI_VC_t *vc, MPIR_Request *rreq)
     mpi_errno = do_cts(vc, rreq, &complete);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Assert(complete);
+    MPIR_Assert(complete);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_RNDVRECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_RNDVRECV);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -188,10 +188,10 @@ static int pkt_RTS_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *bufle
     MPID_nem_pkt_lmt_rts_t * const rts_pkt = (MPID_nem_pkt_lmt_rts_t *)pkt;
     char *data_buf;
     intptr_t data_len;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_PKT_RTS_HANDLER);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_PKT_RTS_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_PKT_RTS_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_PKT_RTS_HANDLER);
 
     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX);
 
@@ -224,7 +224,7 @@ static int pkt_RTS_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *bufle
     {
         /* set for the cookie to be received into the tmp_cookie in the request */
         MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"haven't received entire cookie");
-        MPIU_CHKPMEM_MALLOC(rreq->ch.lmt_tmp_cookie.MPL_IOV_BUF, char *, rts_pkt->cookie_len, mpi_errno, "tmp cookie buf");
+        MPIR_CHKPMEM_MALLOC(rreq->ch.lmt_tmp_cookie.MPL_IOV_BUF, char *, rts_pkt->cookie_len, mpi_errno, "tmp cookie buf");
         rreq->ch.lmt_tmp_cookie.MPL_IOV_LEN = rts_pkt->cookie_len;
 
         rreq->dev.iov[0] = rreq->ch.lmt_tmp_cookie;
@@ -260,10 +260,10 @@ static int pkt_RTS_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *bufle
         {
             /* receive cookie into tmp_cookie in the request */
             MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"received entire cookie");
-            MPIU_CHKPMEM_MALLOC(rreq->ch.lmt_tmp_cookie.MPL_IOV_BUF, char *, rts_pkt->cookie_len, mpi_errno, "tmp cookie buf");
+            MPIR_CHKPMEM_MALLOC(rreq->ch.lmt_tmp_cookie.MPL_IOV_BUF, char *, rts_pkt->cookie_len, mpi_errno, "tmp cookie buf");
             rreq->ch.lmt_tmp_cookie.MPL_IOV_LEN = rts_pkt->cookie_len;
         
-            MPIU_Memcpy(rreq->ch.lmt_tmp_cookie.MPL_IOV_BUF, data_buf, rts_pkt->cookie_len);
+            MPIR_Memcpy(rreq->ch.lmt_tmp_cookie.MPL_IOV_BUF, data_buf, rts_pkt->cookie_len);
             *buflen = sizeof(MPIDI_CH3_Pkt_t) + rts_pkt->cookie_len;
             *rreqp = NULL;
         }
@@ -275,7 +275,7 @@ static int pkt_RTS_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *bufle
             MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"posted request found");
             mpi_errno = do_cts(vc, rreq, &complete);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
-            MPIU_Assert(complete);
+            MPIR_Assert(complete);
         }
         else
         {
@@ -287,13 +287,13 @@ static int pkt_RTS_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *bufle
     }
 
     
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit:
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX);
-    MPIDI_FUNC_EXIT(MPID_STATE_PKT_RTS_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_PKT_RTS_HANDLER);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -309,10 +309,10 @@ static int pkt_CTS_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *bufle
     char *data_buf;
     intptr_t data_len;
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_PKT_CTS_HANDLER);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_PKT_CTS_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_PKT_CTS_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_PKT_CTS_HANDLER);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"received rndv CTS pkt");
 
@@ -361,7 +361,7 @@ static int pkt_CTS_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *bufle
             /* create a recv req and set up to receive the cookie into the sreq's tmp_cookie */
             MPIR_Request *rreq;
 
-            MPIU_CHKPMEM_MALLOC(sreq->ch.lmt_tmp_cookie.MPL_IOV_BUF, char *, cts_pkt->cookie_len, mpi_errno, "tmp cookie buf");
+            MPIR_CHKPMEM_MALLOC(sreq->ch.lmt_tmp_cookie.MPL_IOV_BUF, char *, cts_pkt->cookie_len, mpi_errno, "tmp cookie buf");
             sreq->ch.lmt_tmp_cookie.MPL_IOV_LEN = cts_pkt->cookie_len;
 
             MPIDI_Request_create_rreq(rreq, mpi_errno, goto fn_fail);
@@ -387,11 +387,11 @@ static int pkt_CTS_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *bufle
     }
 
  fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_PKT_CTS_HANDLER);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_PKT_CTS_HANDLER);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -404,9 +404,9 @@ static int pkt_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *bufl
     int mpi_errno = MPI_SUCCESS;
     MPID_nem_pkt_lmt_done_t * const done_pkt = (MPID_nem_pkt_lmt_done_t *)pkt;
     MPIR_Request *req;
-    MPIDI_STATE_DECL(MPID_STATE_PKT_DONE_HANDLER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_PKT_DONE_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_PKT_DONE_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_PKT_DONE_HANDLER);
 
     *buflen = sizeof(MPIDI_CH3_Pkt_t);
     MPIR_Request_get_ptr(done_pkt->req_id, req);
@@ -435,7 +435,7 @@ static int pkt_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *bufl
 
  fn_exit:
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPIDI_FUNC_EXIT(MPID_STATE_PKT_DONE_HANDLER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_PKT_DONE_HANDLER);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -452,22 +452,22 @@ static int pkt_COOKIE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *bu
     MPIR_Request *req;
     char *data_buf;
     intptr_t data_len;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_PKT_COOKIE_HANDLER);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_PKT_COOKIE_HANDLER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_PKT_COOKIE_HANDLER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_PKT_COOKIE_HANDLER);
 
     data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
     data_buf = (char *)pkt + sizeof(MPIDI_CH3_Pkt_t);
 
     if (cookie_pkt->from_sender) {
         MPIR_Request_get_ptr(cookie_pkt->receiver_req_id, req);
-        MPIU_Assert(req != NULL);
+        MPIR_Assert(req != NULL);
         req->ch.lmt_req_id = cookie_pkt->sender_req_id;
     }
     else {
         MPIR_Request_get_ptr(cookie_pkt->sender_req_id, req);
-        MPIU_Assert(req != NULL);
+        MPIR_Assert(req != NULL);
         req->ch.lmt_req_id = cookie_pkt->receiver_req_id;
     }
 
@@ -494,7 +494,7 @@ static int pkt_COOKIE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *bu
             MPIR_Request *rreq;
 
             MPIDI_Request_create_rreq(rreq, mpi_errno, goto fn_fail);
-            MPIU_CHKPMEM_MALLOC(rreq->ch.lmt_tmp_cookie.MPL_IOV_BUF, char *, cookie_pkt->cookie_len, mpi_errno, "tmp cookie buf");
+            MPIR_CHKPMEM_MALLOC(rreq->ch.lmt_tmp_cookie.MPL_IOV_BUF, char *, cookie_pkt->cookie_len, mpi_errno, "tmp cookie buf");
             /* FIXME:  where does this request get freed? */
             rreq->ch.lmt_tmp_cookie.MPL_IOV_LEN = cookie_pkt->cookie_len;
 
@@ -519,11 +519,11 @@ static int pkt_COOKIE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *bu
     }
 
  fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_PKT_COOKIE_HANDLER);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_PKT_COOKIE_HANDLER);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -540,9 +540,9 @@ static int do_cts(MPIDI_VC_t *vc, MPIR_Request *rreq, int *complete)
     MPI_Aint dt_true_lb ATTRIBUTE((unused));
     MPIDU_Datatype* dt_ptr;
     MPL_IOV s_cookie;
-    MPIDI_STATE_DECL(MPID_STATE_DO_CTS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_DO_CTS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_DO_CTS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_DO_CTS);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"posted request found");
 
@@ -571,7 +571,7 @@ static int do_cts(MPIDI_VC_t *vc, MPIR_Request *rreq, int *complete)
     *complete = TRUE;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_DO_CTS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_DO_CTS);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -586,9 +586,9 @@ static int do_send(MPIDI_VC_t *vc, MPIR_Request *rreq, int *complete)
     int mpi_errno = MPI_SUCCESS;
     MPL_IOV r_cookie;
     MPIR_Request * const sreq = rreq->ch.lmt_req;
-    MPIDI_STATE_DECL(MPID_STATE_DO_SEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_DO_SEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_DO_SEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_DO_SEND);
 
     r_cookie = sreq->ch.lmt_tmp_cookie;
 
@@ -604,7 +604,7 @@ static int do_send(MPIDI_VC_t *vc, MPIR_Request *rreq, int *complete)
     *complete = TRUE;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_DO_SEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_DO_SEND);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -619,9 +619,9 @@ static int do_cookie(MPIDI_VC_t *vc, MPIR_Request *rreq, int *complete)
     int mpi_errno = MPI_SUCCESS;
     MPL_IOV cookie;
     MPIR_Request *req = rreq->ch.lmt_req;
-    MPIDI_STATE_DECL(MPID_STATE_DO_COOKIE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_DO_COOKIE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_DO_COOKIE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_DO_COOKIE);
 
     cookie = req->ch.lmt_tmp_cookie;
 
@@ -637,7 +637,7 @@ static int do_cookie(MPIDI_VC_t *vc, MPIR_Request *rreq, int *complete)
     *complete = TRUE;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_DO_COOKIE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_DO_COOKIE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_dma.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_dma.c
index 4952eec..12b63cb 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_dma.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_dma.c
@@ -90,7 +90,7 @@ static int open_knem_dev(void)
                          "**shm_open %s %d", KNEM_DEVICE_FILENAME, errno);
     err = ioctl(knem_fd, KNEM_CMD_GET_INFO, &info);
     MPIR_ERR_CHKANDJUMP2(err < 0, mpi_errno, MPI_ERR_OTHER, "**ioctl",
-                         "**ioctl %d %s", errno, MPIU_Strerror(errno));
+                         "**ioctl %d %s", errno, MPIR_Strerror(errno));
     MPIR_ERR_CHKANDJUMP2(info.abi != KNEM_ABI_VERSION, mpi_errno, MPI_ERR_OTHER,
                          "**abi_version_mismatch", "**abi_version_mismatch %D %D",
                          (unsigned long)KNEM_ABI_VERSION, (unsigned long)info.abi);
@@ -147,7 +147,7 @@ static int do_dma_send(MPIDI_VC_t *vc,  MPIR_Request *sreq, int send_iov_n,
     err = ioctl(knem_fd, KNEM_CMD_CREATE_REGION, &cr);
 #endif
     MPIR_ERR_CHKANDJUMP2(err < 0, mpi_errno, MPI_ERR_OTHER, "**ioctl",
-                         "**ioctl %d %s", errno, MPIU_Strerror(errno));
+                         "**ioctl %d %s", errno, MPIR_Strerror(errno));
 #if KNEM_ABI_VERSION < MPICH_NEW_KNEM_ABI_VERSION
     *s_cookiep = sendcmd.send_cookie;
 #else
@@ -203,7 +203,7 @@ static int do_dma_recv(int iov_n, MPL_IOV iov[], knem_cookie_t s_cookie, int nod
     err = ioctl(knem_fd, KNEM_CMD_INLINE_COPY, &icopy);
 #endif
     MPIR_ERR_CHKANDJUMP2(err < 0, mpi_errno, MPI_ERR_OTHER, "**ioctl",
-                         "**ioctl %d %s", errno, MPIU_Strerror(errno));
+                         "**ioctl %d %s", errno, MPIR_Strerror(errno));
 
 #if KNEM_ABI_VERSION < MPICH_NEW_KNEM_ABI_VERSION
     *status_p_p = &knem_status[recvcmd.status_index];
@@ -324,23 +324,23 @@ int MPID_nem_lmt_dma_initiate_lmt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIR_Req
 {
     int mpi_errno = MPI_SUCCESS;
     MPID_nem_pkt_lmt_rts_t * const rts_pkt = (MPID_nem_pkt_lmt_rts_t *)pkt;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_INITIATE_LMT);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_INITIATE_LMT);
     
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_INITIATE_LMT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_INITIATE_LMT);
 
-    MPIU_CHKPMEM_MALLOC(sreq->ch.s_cookie, knem_cookie_t *, sizeof(knem_cookie_t), mpi_errno, "s_cookie");
+    MPIR_CHKPMEM_MALLOC(sreq->ch.s_cookie, knem_cookie_t *, sizeof(knem_cookie_t), mpi_errno, "s_cookie");
 
     mpi_errno = send_sreq_data(vc, sreq, sreq->ch.s_cookie);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     MPID_nem_lmt_send_RTS(vc, rts_pkt, sreq->ch.s_cookie, sizeof(knem_cookie_t));
 
 fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_INITIATE_LMT);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_INITIATE_LMT);
     return mpi_errno;
 fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -362,9 +362,9 @@ int MPID_nem_lmt_dma_start_recv(MPIDI_VC_t *vc, MPIR_Request *rreq, MPL_IOV s_co
     volatile knem_status_t *status;
     knem_status_t current_status;
     struct lmt_dma_node *node = NULL;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_START_RECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_START_RECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_START_RECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_START_RECV);
 
     /* MT: this code assumes only one thread can be at this point at a time */
     if (knem_fd < 0) {
@@ -389,7 +389,7 @@ int MPID_nem_lmt_dma_start_recv(MPIDI_VC_t *vc, MPIR_Request *rreq, MPL_IOV s_co
             /* segment_ptr may be non-null when this is a continuation of a
                many-part message that we couldn't fit in one single flight of
                iovs. */
-            MPIU_Assert(rreq->dev.segment_ptr == NULL);
+            MPIR_Assert(rreq->dev.segment_ptr == NULL);
             rreq->dev.segment_ptr = MPIDU_Segment_alloc();
             MPIR_ERR_CHKANDJUMP1((rreq->dev.segment_ptr == NULL), mpi_errno,
                                  MPI_ERR_OTHER, "**nomem",
@@ -405,8 +405,8 @@ int MPID_nem_lmt_dma_start_recv(MPIDI_VC_t *vc, MPIR_Request *rreq, MPL_IOV s_co
         }
     }
 
-    MPIU_Assert(s_cookie.MPL_IOV_LEN == sizeof(knem_cookie_t));
-    MPIU_Assert(s_cookie.MPL_IOV_BUF != NULL);
+    MPIR_Assert(s_cookie.MPL_IOV_LEN == sizeof(knem_cookie_t));
+    MPIR_Assert(s_cookie.MPL_IOV_BUF != NULL);
     mpi_errno = do_dma_recv(rreq->dev.iov_count, rreq->dev.iov,
                             *((knem_cookie_t *)s_cookie.MPL_IOV_BUF), nodma,
                             &status, &current_status);
@@ -452,7 +452,7 @@ int MPID_nem_lmt_dma_start_recv(MPIDI_VC_t *vc, MPIR_Request *rreq, MPL_IOV s_co
     ++MPID_nem_local_lmt_pending;
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_START_RECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_START_RECV);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -467,9 +467,9 @@ int MPID_nem_lmt_dma_done_send(MPIDI_VC_t *vc, MPIR_Request *sreq)
     int mpi_errno = MPI_SUCCESS;
     int complete = 0;
     int (*reqFn)(MPIDI_VC_t *, MPIR_Request *, int *);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_DONE_SEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_DONE_SEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_DONE_SEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_DONE_SEND);
 
     /* free cookie from RTS packet */
     MPL_free(sreq->ch.s_cookie);
@@ -500,10 +500,10 @@ int MPID_nem_lmt_dma_done_send(MPIDI_VC_t *vc, MPIR_Request *sreq)
     }
     else {
         /* There is more data to send. */
-        MPIU_Assert(("should never be incomplete!", 0));
+        MPIR_Assert(("should never be incomplete!", 0));
     }
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_DONE_SEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_DONE_SEND);
 fn_exit:
     return MPI_SUCCESS;
 fn_fail:
@@ -518,9 +518,9 @@ fn_fail:
 int MPID_nem_lmt_dma_handle_cookie(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV cookie)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_HANDLE_COOKIE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_HANDLE_COOKIE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_HANDLE_COOKIE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_HANDLE_COOKIE);
 
     if (cookie.MPL_IOV_LEN == 0 && cookie.MPL_IOV_BUF == NULL) {
         /* req is a send request, we need to initiate another knem request and
@@ -535,7 +535,7 @@ int MPID_nem_lmt_dma_handle_cookie(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV co
 
         /* If we were complete we should have received a DONE message instead
            of a COOKIE message. */
-        MPIU_Assert(!complete);
+        MPIR_Assert(!complete);
 
         mpi_errno = do_dma_send(vc, req, req->dev.iov_count, &req->dev.iov[0], &s_cookie);
         if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -549,7 +549,7 @@ int MPID_nem_lmt_dma_handle_cookie(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV co
     }
 
 fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_HANDLE_COOKIE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_HANDLE_COOKIE);
     return MPI_SUCCESS;
 }
 
@@ -563,9 +563,9 @@ int MPID_nem_lmt_dma_progress(void)
     struct lmt_dma_node *prev = NULL;
     struct lmt_dma_node *free_me = NULL;
     struct lmt_dma_node *cur = outstanding_head;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_PROGRESS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_PROGRESS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_PROGRESS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_PROGRESS);
     
     /* Iterate over a linked-list of (req,status_idx)-tuples looking for
        completed/failed requests.  Currently knem only provides status to the
@@ -653,7 +653,7 @@ int MPID_nem_lmt_dma_progress(void)
     }
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_PROGRESS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_PROGRESS);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -666,14 +666,14 @@ fn_fail:
 int MPID_nem_lmt_dma_vc_terminated(MPIDI_VC_t *vc)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_VC_TERMINATED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_VC_TERMINATED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_VC_TERMINATED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_VC_TERMINATED);
 
     /* Do nothing.  KNEM should abort any ops with dead processes. */
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_VC_TERMINATED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_VC_TERMINATED);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -692,11 +692,11 @@ int MPID_nem_lmt_dma_vc_terminated(MPIDI_VC_t *vc)
 int MPID_nem_lmt_dma_start_send(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV r_cookie)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_START_SEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_START_SEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_START_SEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_START_SEND);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_START_SEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_START_SEND);
     return mpi_errno;
 }
 
@@ -707,11 +707,11 @@ int MPID_nem_lmt_dma_start_send(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV r_coo
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPID_nem_lmt_dma_done_recv(MPIDI_VC_t *vc, MPIR_Request *rreq)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_DONE_RECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_DONE_RECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_DONE_RECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_DONE_RECV);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_DONE_RECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_DONE_RECV);
     return MPI_SUCCESS;
 }
 
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_shm.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_shm.c
index 47bd392..acba48f 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_shm.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_shm.c
@@ -128,9 +128,9 @@ int MPID_nem_lmt_shm_initiate_lmt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIR_Req
     MPI_Aint dt_true_lb ATTRIBUTE((unused));
     MPIDU_Datatype* dt_ptr;
     MPID_nem_pkt_lmt_rts_t * const rts_pkt = (MPID_nem_pkt_lmt_rts_t *)pkt;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_INITIATE_LMT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_INITIATE_LMT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_INITIATE_LMT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_INITIATE_LMT);
 
     /* MT FIXME deadlock: we hold the LMT CS right now, the following macro
      * unexpectedly acquires the MPIDCOMM CS.  The normal locking order is
@@ -151,7 +151,7 @@ int MPID_nem_lmt_shm_initiate_lmt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIR_Req
     req->ch.lmt_data_sz = data_sz;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_INITIATE_LMT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_INITIATE_LMT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -165,14 +165,14 @@ int MPID_nem_lmt_shm_start_recv(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV s_coo
 {
     int mpi_errno = MPI_SUCCESS;
     int done = FALSE;
-    MPIU_CHKPMEM_DECL(2);
+    MPIR_CHKPMEM_DECL(2);
     MPID_nem_lmt_shm_wait_element_t *e;
     int queue_initially_empty;
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
     char *ser_lmt_copy_buf_handle=NULL;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_START_RECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_START_RECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_START_RECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_START_RECV);
 
     if (vc_ch->lmt_copy_buf == NULL)
     {
@@ -199,7 +199,7 @@ int MPID_nem_lmt_shm_start_recv(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV s_coo
 
     queue_initially_empty = LMT_SHM_Q_EMPTY(vc_ch->lmt_queue) && vc_ch->lmt_active_lmt == NULL;
 
-    MPIU_CHKPMEM_MALLOC (e, MPID_nem_lmt_shm_wait_element_t *, sizeof (MPID_nem_lmt_shm_wait_element_t), mpi_errno, "lmt wait queue element");
+    MPIR_CHKPMEM_MALLOC (e, MPID_nem_lmt_shm_wait_element_t *, sizeof (MPID_nem_lmt_shm_wait_element_t), mpi_errno, "lmt wait queue element");
     e->progress = lmt_shm_recv_progress;
     e->req = req;
     LMT_SHM_Q_ENQUEUE(&vc_ch->lmt_queue, e); /* MT: not thread safe */
@@ -218,22 +218,22 @@ int MPID_nem_lmt_shm_start_recv(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV s_coo
 
         MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "lmt recv not finished:  enqueue");
 
-        MPIU_CHKPMEM_MALLOC (pe, lmt_shm_prog_element_t *, sizeof (lmt_shm_prog_element_t), mpi_errno, "lmt progress queue element");
+        MPIR_CHKPMEM_MALLOC (pe, lmt_shm_prog_element_t *, sizeof (lmt_shm_prog_element_t), mpi_errno, "lmt progress queue element");
         pe->vc = vc;
         LMT_SHM_L_ADD(pe);
         MPID_nem_local_lmt_pending = TRUE;
-        MPIU_Assert(!vc_ch->lmt_enqueued);
+        MPIR_Assert(!vc_ch->lmt_enqueued);
         vc_ch->lmt_enqueued = TRUE;
     }
 
-    MPIU_Assert(LMT_SHM_Q_EMPTY(vc_ch->lmt_queue) || !LMT_SHM_L_EMPTY());
+    MPIR_Assert(LMT_SHM_Q_EMPTY(vc_ch->lmt_queue) || !LMT_SHM_L_EMPTY());
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_START_RECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_START_RECV);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -248,10 +248,10 @@ int MPID_nem_lmt_shm_start_send(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV r_coo
     int queue_initially_empty;
     MPID_nem_lmt_shm_wait_element_t *e;
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
-    MPIU_CHKPMEM_DECL(3);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_START_SEND);
+    MPIR_CHKPMEM_DECL(3);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_START_SEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_START_SEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_START_SEND);
 
     if (vc_ch->lmt_copy_buf == NULL){
         mpi_errno = MPIU_SHMW_Hnd_deserialize(vc_ch->lmt_copy_buf_handle, r_cookie.MPL_IOV_BUF, strlen(r_cookie.MPL_IOV_BUF));
@@ -291,7 +291,7 @@ int MPID_nem_lmt_shm_start_send(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV r_coo
 
     queue_initially_empty = LMT_SHM_Q_EMPTY(vc_ch->lmt_queue) && vc_ch->lmt_active_lmt == NULL;
 
-    MPIU_CHKPMEM_MALLOC (e, MPID_nem_lmt_shm_wait_element_t *, sizeof (MPID_nem_lmt_shm_wait_element_t), mpi_errno, "lmt wait queue element");
+    MPIR_CHKPMEM_MALLOC (e, MPID_nem_lmt_shm_wait_element_t *, sizeof (MPID_nem_lmt_shm_wait_element_t), mpi_errno, "lmt wait queue element");
     e->progress = lmt_shm_send_progress;
     e->req = req;
     LMT_SHM_Q_ENQUEUE(&vc_ch->lmt_queue, e); /* MT: not thread safe */
@@ -308,23 +308,23 @@ int MPID_nem_lmt_shm_start_send(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV r_coo
         /* lmt send didn't finish, enqueue it to be completed later */
         lmt_shm_prog_element_t *pe;
 
-        MPIU_CHKPMEM_MALLOC (pe, lmt_shm_prog_element_t *, sizeof (lmt_shm_prog_element_t), mpi_errno, "lmt progress queue element");
+        MPIR_CHKPMEM_MALLOC (pe, lmt_shm_prog_element_t *, sizeof (lmt_shm_prog_element_t), mpi_errno, "lmt progress queue element");
         pe->vc = vc;
         LMT_SHM_L_ADD(pe);
         MPID_nem_local_lmt_pending = TRUE;
-        MPIU_Assert(!vc_ch->lmt_enqueued);
+        MPIR_Assert(!vc_ch->lmt_enqueued);
         vc_ch->lmt_enqueued = TRUE;
         MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "lmt send not finished:  enqueue");
    }
 
-    MPIU_Assert(LMT_SHM_Q_EMPTY(vc_ch->lmt_queue) || !LMT_SHM_L_EMPTY());
+    MPIR_Assert(LMT_SHM_Q_EMPTY(vc_ch->lmt_queue) || !LMT_SHM_L_EMPTY());
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_return:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_START_SEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_START_SEND);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_return;
 }
 
@@ -339,9 +339,9 @@ static int get_next_req(MPIDI_VC_t *vc)
     MPID_nem_copy_buf_t * const copy_buf = vc_ch->lmt_copy_buf;
     int prev_owner_rank;
     MPIR_Request *req;
-    MPIDI_STATE_DECL(MPID_STATE_GET_NEXT_REQ);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_GET_NEXT_REQ);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_GET_NEXT_REQ);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_GET_NEXT_REQ);
 
     prev_owner_rank = OPA_cas_int(&copy_buf->owner_info.val.rank, NO_OWNER, MPIDI_Process.my_pg_rank);
 
@@ -423,7 +423,7 @@ static int get_next_req(MPIDI_VC_t *vc)
     vc_ch->lmt_surfeit = 0;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_GET_NEXT_REQ);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_GET_NEXT_REQ);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -449,16 +449,16 @@ static int lmt_shm_send_progress(MPIDI_VC_t *vc, MPIR_Request *req, int *done)
     intptr_t last;
     int buf_num;
     intptr_t data_sz, copy_limit;
-    MPIDI_STATE_DECL(MPID_STATE_LMT_SHM_SEND_PROGRESS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_LMT_SHM_SEND_PROGRESS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_LMT_SHM_SEND_PROGRESS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_LMT_SHM_SEND_PROGRESS);
 
     DBG_LMT(MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE, (MPL_DBG_FDEST, "ctr=%d rank=%d", copy_buf->owner_info.val.ctr, vc->pg_rank)));
 
     copy_buf->sender_present.val = TRUE;
 
-    MPIU_Assert(req == vc_ch->lmt_active_lmt->req);
-/*     MPIU_Assert(MPIDI_Request_get_type(req) == MPIDI_REQUEST_TYPE_SEND); */
+    MPIR_Assert(req == vc_ch->lmt_active_lmt->req);
+/*     MPIR_Assert(MPIDI_Request_get_type(req) == MPIDI_REQUEST_TYPE_SEND); */
 
     data_sz = req->ch.lmt_data_sz;
     buf_num = vc_ch->lmt_buf_num;
@@ -495,7 +495,7 @@ static int lmt_shm_send_progress(MPIDI_VC_t *vc, MPIR_Request *req, int *done)
         last = (data_sz - first <= copy_limit) ? data_sz : first + copy_limit;
 	MPIDU_Segment_pack(req->dev.segment_ptr, first, &last, (void *)copy_buf->buf[buf_num]); /* cast away volatile */
         OPA_write_barrier();
-        MPIU_Assign_trunc(copy_buf->len[buf_num].val, (last - first), volatile int);
+        MPIR_Assign_trunc(copy_buf->len[buf_num].val, (last - first), volatile int);
 
         first = last;
         buf_num = (buf_num+1) % NUM_BUFS;
@@ -514,7 +514,7 @@ static int lmt_shm_send_progress(MPIDI_VC_t *vc, MPIR_Request *req, int *done)
 
  fn_exit:
     copy_buf->sender_present.val = FALSE;
-    MPIDI_FUNC_EXIT(MPID_STATE_LMT_SHM_SEND_PROGRESS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_LMT_SHM_SEND_PROGRESS);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -545,9 +545,9 @@ static int lmt_shm_recv_progress(MPIDI_VC_t *vc, MPIR_Request *req, int *done)
     intptr_t surfeit;
     char *src_buf;
     char tmpbuf[MPID_NEM_CACHE_LINE_LEN];
-    MPIDI_STATE_DECL(MPID_STATE_LMT_SHM_RECV_PROGRESS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_LMT_SHM_RECV_PROGRESS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_LMT_SHM_RECV_PROGRESS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_LMT_SHM_RECV_PROGRESS);
 
     DBG_LMT(MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE, (MPL_DBG_FDEST, "ctr=%d rank=%d", copy_buf->owner_info.val.ctr, vc->pg_rank)));
 
@@ -597,7 +597,7 @@ static int lmt_shm_recv_progress(MPIDI_VC_t *vc, MPIR_Request *req, int *done)
             OPA_read_write_barrier();
             copy_buf->len[(buf_num-1)].val = 0;
             /* Make sure we copied at least the leftover data from last time */
-            MPIU_Assert(last - first > surfeit);
+            MPIR_Assert(last - first > surfeit);
 
             MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "freed previous buffer");
         }
@@ -613,7 +613,7 @@ static int lmt_shm_recv_progress(MPIDI_VC_t *vc, MPIR_Request *req, int *done)
             if (buf_num == NUM_BUFS-1)
             {
                 /* if we're wrapping back to buf 0, then we can copy it directly */
-                MPIU_Memcpy(((char *)copy_buf->buf[0]) - surfeit, surfeit_ptr, surfeit);
+                MPIR_Memcpy(((char *)copy_buf->buf[0]) - surfeit, surfeit_ptr, surfeit);
 
                 OPA_read_write_barrier();
                 copy_buf->len[buf_num].val = 0;
@@ -621,8 +621,8 @@ static int lmt_shm_recv_progress(MPIDI_VC_t *vc, MPIR_Request *req, int *done)
             else
             {
                 /* otherwise, we need to copy to a tmpbuf first to make sure the src and dest addresses don't overlap */
-                MPIU_Memcpy(tmpbuf, surfeit_ptr, surfeit);
-                MPIU_Memcpy(((char *)copy_buf->buf[buf_num+1]) - surfeit, tmpbuf, surfeit);
+                MPIR_Memcpy(tmpbuf, surfeit_ptr, surfeit);
+                MPIR_Memcpy(((char *)copy_buf->buf[buf_num+1]) - surfeit, tmpbuf, surfeit);
             }
 
             MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "copied leftover data");
@@ -656,7 +656,7 @@ static int lmt_shm_recv_progress(MPIDI_VC_t *vc, MPIR_Request *req, int *done)
 
  fn_exit:
     copy_buf->receiver_present.val = FALSE;
-    MPIDI_FUNC_EXIT(MPID_STATE_LMT_SHM_RECV_PROGRESS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_LMT_SHM_RECV_PROGRESS);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -668,11 +668,11 @@ static int lmt_shm_recv_progress(MPIDI_VC_t *vc, MPIR_Request *req, int *done)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPID_nem_lmt_shm_handle_cookie(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV cookie)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_HANDLE_COOKIE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_HANDLE_COOKIE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_HANDLE_COOKIE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_HANDLE_COOKIE);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_HANDLE_COOKIE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_HANDLE_COOKIE);
     return MPI_SUCCESS;
 }
 
@@ -682,11 +682,11 @@ int MPID_nem_lmt_shm_handle_cookie(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV co
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPID_nem_lmt_shm_done_send(MPIDI_VC_t *vc, MPIR_Request *req)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_DONE_SEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_DONE_SEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_DONE_SEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_DONE_SEND);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_DONE_SEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_DONE_SEND);
     return MPI_SUCCESS;
 }
 
@@ -696,11 +696,11 @@ int MPID_nem_lmt_shm_done_send(MPIDI_VC_t *vc, MPIR_Request *req)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPID_nem_lmt_shm_done_recv(MPIDI_VC_t *vc, MPIR_Request *req)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_DONE_RECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_DONE_RECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_DONE_RECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_DONE_RECV);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_DONE_RECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_DONE_RECV);
     return MPI_SUCCESS;
 }
 
@@ -714,9 +714,9 @@ static inline int lmt_shm_progress_vc(MPIDI_VC_t *vc, int *done)
     int done_req = FALSE;
     MPID_nem_lmt_shm_wait_element_t *we;
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
-    MPIDI_STATE_DECL(MPID_STATE_LMT_SHM_PROGRESS_VC);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_LMT_SHM_PROGRESS_VC);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_LMT_SHM_PROGRESS_VC);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_LMT_SHM_PROGRESS_VC);
 
     *done = FALSE;
 
@@ -750,7 +750,7 @@ static inline int lmt_shm_progress_vc(MPIDI_VC_t *vc, int *done)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_LMT_SHM_PROGRESS_VC);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_LMT_SHM_PROGRESS_VC);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -764,9 +764,9 @@ int MPID_nem_lmt_shm_progress(void)
 {
     int mpi_errno = MPI_SUCCESS;
     lmt_shm_prog_element_t *pe;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_PROGRESS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_PROGRESS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_PROGRESS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_PROGRESS);
 
     pe = LMT_SHM_L_HEAD();
 
@@ -780,9 +780,9 @@ int MPID_nem_lmt_shm_progress(void)
         if (done)
         {
             lmt_shm_prog_element_t *f;
-            MPIU_Assert(LMT_SHM_Q_EMPTY(pe->vc->ch.lmt_queue));
-            MPIU_Assert(pe->vc->ch.lmt_active_lmt == NULL);
-            MPIU_Assert(pe->vc->ch.lmt_enqueued);
+            MPIR_Assert(LMT_SHM_Q_EMPTY(pe->vc->ch.lmt_queue));
+            MPIR_Assert(pe->vc->ch.lmt_active_lmt == NULL);
+            MPIR_Assert(pe->vc->ch.lmt_enqueued);
             pe->vc->ch.lmt_enqueued = FALSE;
 
             f = pe;
@@ -798,7 +798,7 @@ int MPID_nem_lmt_shm_progress(void)
         MPID_nem_local_lmt_pending = FALSE;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_PROGRESS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_PROGRESS);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -815,9 +815,9 @@ int MPID_nem_lmt_shm_vc_terminated(MPIDI_VC_t *vc)
     MPID_nem_lmt_shm_wait_element_t *we;
     int req_errno = MPI_SUCCESS;
     MPIR_Request *req = NULL;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_VC_TERMINATED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_SHM_VC_TERMINATED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_VC_TERMINATED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_SHM_VC_TERMINATED);
 
     if (vc->state != MPIDI_VC_STATE_CLOSED) {
         MPIR_ERR_SET1(req_errno, MPIX_ERR_PROC_FAILED, "**comm_fail", "**comm_fail %d", vc->pg_rank);
@@ -858,7 +858,7 @@ int MPID_nem_lmt_shm_vc_terminated(MPIDI_VC_t *vc)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_VC_TERMINATED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_SHM_VC_TERMINATED);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -872,9 +872,9 @@ int MPID_nem_lmt_shm_vc_terminated(MPIDI_VC_t *vc)
 static int MPID_nem_allocate_shm_region(MPID_nem_copy_buf_t **buf_p, MPIU_SHMW_Hnd_t handle)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_ALLOCATE_SHM_REGION);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_ALLOCATE_SHM_REGION);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_ALLOCATE_SHM_REGION);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_ALLOCATE_SHM_REGION);
 
     if (*buf_p)
     {
@@ -886,7 +886,7 @@ static int MPID_nem_allocate_shm_region(MPID_nem_copy_buf_t **buf_p, MPIU_SHMW_H
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_ALLOCATE_SHM_REGION);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_ALLOCATE_SHM_REGION);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -899,9 +899,9 @@ static int MPID_nem_allocate_shm_region(MPID_nem_copy_buf_t **buf_p, MPIU_SHMW_H
 static int MPID_nem_attach_shm_region(MPID_nem_copy_buf_t **buf_p, MPIU_SHMW_Hnd_t handle)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_ATTACH_SHM_REGION);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_ATTACH_SHM_REGION);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_ATTACH_SHM_REGION);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_ATTACH_SHM_REGION);
 
     if(*buf_p)
     {
@@ -916,7 +916,7 @@ static int MPID_nem_attach_shm_region(MPID_nem_copy_buf_t **buf_p, MPIU_SHMW_Hnd
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_ATTACH_SHM_REGION);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_ATTACH_SHM_REGION);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -929,15 +929,15 @@ static int MPID_nem_attach_shm_region(MPID_nem_copy_buf_t **buf_p, MPIU_SHMW_Hnd
 static int MPID_nem_detach_shm_region(MPID_nem_copy_buf_t **buf_p, MPIU_SHMW_Hnd_t handle)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DETACH_SHM_REGION);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_DETACH_SHM_REGION);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DETACH_SHM_REGION);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_DETACH_SHM_REGION);
 
     mpi_errno = MPIU_SHMW_Seg_detach(handle, (char **)buf_p, sizeof(MPID_nem_copy_buf_t));
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DETACH_SHM_REGION);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_DETACH_SHM_REGION);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -950,9 +950,9 @@ static int MPID_nem_detach_shm_region(MPID_nem_copy_buf_t **buf_p, MPIU_SHMW_Hnd
 static int MPID_nem_delete_shm_region(MPID_nem_copy_buf_t **buf_p, MPIU_SHMW_Hnd_t *handle_p)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DELETE_SHM_REGION);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_DELETE_SHM_REGION);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DELETE_SHM_REGION);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_DELETE_SHM_REGION);
 
     mpi_errno = MPIU_SHMW_Seg_remove(*handle_p);
     if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); }
@@ -964,7 +964,7 @@ static int MPID_nem_delete_shm_region(MPID_nem_copy_buf_t **buf_p, MPIU_SHMW_Hnd
     if(mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DELETE_SHM_REGION);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_DELETE_SHM_REGION);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_vmsplice.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_vmsplice.c
index a8b21c6..204b68c 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_vmsplice.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_vmsplice.c
@@ -107,7 +107,7 @@ static int populate_iov_from_req(MPIR_Request *req)
     }
     else {
         /* use the segment routines to handle the iovec creation */
-        MPIU_Assert(req->dev.segment_ptr == NULL);
+        MPIR_Assert(req->dev.segment_ptr == NULL);
 
         req->dev.iov_count = MPL_IOV_LIMIT;
         req->dev.iov_offset = 0;
@@ -150,7 +150,7 @@ static int do_vmsplice(MPIR_Request *sreq, int pipe_fd, MPL_IOV iov[],
     if (err < 0) {
         if (errno == EAGAIN) goto fn_exit;
         MPIR_ERR_CHKANDJUMP2(errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**vmsplice",
-                             "**vmsplice %d %s", errno, MPIU_Strerror(errno));
+                             "**vmsplice %d %s", errno, MPIR_Strerror(errno));
     }
 
     *complete = adjust_partially_xferred_iov(iov, iov_offset, iov_count, err);
@@ -181,9 +181,9 @@ int MPID_nem_lmt_vmsplice_initiate_lmt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPI
     MPID_nem_pkt_lmt_rts_t * const rts_pkt = (MPID_nem_pkt_lmt_rts_t *)pkt;
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
     int complete = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_INITIATE_LMT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_INITIATE_LMT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_INITIATE_LMT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_INITIATE_LMT);
 
     /* re-use the same pipe per-pair,per-sender */
     if (vc_ch->lmt_copy_buf_handle == NULL) {
@@ -193,7 +193,7 @@ int MPID_nem_lmt_vmsplice_initiate_lmt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPI
 
         pipe_name = tempnam(NULL, "lmt_");
         MPIR_ERR_CHKANDJUMP2(!pipe_name, mpi_errno, MPI_ERR_OTHER, "**tempnam",
-                             "**tempnam %d %s", errno, MPIU_Strerror(errno));
+                             "**tempnam %d %s", errno, MPIR_Strerror(errno));
 
         vc_ch->lmt_copy_buf_handle = MPL_strdup(pipe_name);
         /* XXX DJG hack */
@@ -202,7 +202,7 @@ int MPID_nem_lmt_vmsplice_initiate_lmt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPI
 
         err = mkfifo(vc_ch->lmt_copy_buf_handle, 0660);
         MPIR_ERR_CHKANDJUMP2(err < 0, mpi_errno, MPI_ERR_OTHER, "**mkfifo",
-                             "**mkfifo %d %s", errno, MPIU_Strerror(errno));
+                             "**mkfifo %d %s", errno, MPIR_Strerror(errno));
     }
 
     /* can't start sending data yet, need full RTS/CTS handshake */
@@ -212,7 +212,7 @@ int MPID_nem_lmt_vmsplice_initiate_lmt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPI
 
 fn_fail:
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_INITIATE_LMT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_INITIATE_LMT);
     return mpi_errno;
 }
 
@@ -224,12 +224,12 @@ static int do_readv(MPIR_Request *rreq, int pipe_fd, MPL_IOV iov[],
 
     nread = readv(pipe_fd, &rreq->dev.iov[rreq->dev.iov_offset], rreq->dev.iov_count);
     MPIR_ERR_CHKANDJUMP2(nread < 0 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**read",
-                         "**readv %d %s", errno, MPIU_Strerror(errno));
+                         "**readv %d %s", errno, MPIR_Strerror(errno));
 
     if (nread < 0) {
         if (errno == EAGAIN) goto fn_exit;
         MPIR_ERR_CHKANDJUMP2(errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**vmsplice",
-                             "**vmsplice %d %s", errno, MPIU_Strerror(errno));
+                             "**vmsplice %d %s", errno, MPIR_Strerror(errno));
     }
 
     *complete = adjust_partially_xferred_iov(iov, iov_offset, iov_count, nread);
@@ -263,12 +263,12 @@ int MPID_nem_lmt_vmsplice_start_recv(MPIDI_VC_t *vc, MPIR_Request *rreq, MPL_IOV
     struct lmt_vmsplice_node *node = NULL;
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
     int pipe_fd;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_START_RECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_START_RECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_START_RECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_START_RECV);
 
     if (vc_ch->lmt_recv_copy_buf_handle == NULL) {
-        MPIU_Assert(s_cookie.MPL_IOV_BUF != NULL);
+        MPIR_Assert(s_cookie.MPL_IOV_BUF != NULL);
         vc_ch->lmt_recv_copy_buf_handle = MPL_strdup(s_cookie.MPL_IOV_BUF);
     }
 
@@ -276,7 +276,7 @@ int MPID_nem_lmt_vmsplice_start_recv(MPIDI_VC_t *vc, MPIR_Request *rreq, MPL_IOV
        so that we don't have two open's on the critical path every time. */
     pipe_fd = open(vc_ch->lmt_recv_copy_buf_handle, O_NONBLOCK|O_RDONLY);
     MPIR_ERR_CHKANDJUMP1(pipe_fd < 0, mpi_errno, MPI_ERR_OTHER, "**open",
-                         "**open %s", MPIU_Strerror(errno));
+                         "**open %s", MPIR_Strerror(errno));
 
     MPID_nem_lmt_send_CTS(vc, rreq, NULL, 0);
 
@@ -297,7 +297,7 @@ int MPID_nem_lmt_vmsplice_start_recv(MPIDI_VC_t *vc, MPIR_Request *rreq, MPL_IOV
     }
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_START_RECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_START_RECV);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -312,9 +312,9 @@ int MPID_nem_lmt_vmsplice_progress(void)
     struct lmt_vmsplice_node *prev = NULL;
     struct lmt_vmsplice_node *free_me = NULL;
     struct lmt_vmsplice_node *cur = outstanding_head;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_PROGRESS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_PROGRESS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_PROGRESS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_PROGRESS);
     
     while (cur) {
         int complete = 0;
@@ -366,7 +366,7 @@ int MPID_nem_lmt_vmsplice_progress(void)
     }
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_PROGRESS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_PROGRESS);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -380,21 +380,21 @@ fn_fail:
 int MPID_nem_lmt_vmsplice_start_send(MPIDI_VC_t *vc, MPIR_Request *sreq, MPL_IOV r_cookie)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_START_SEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_START_SEND);
     int pipe_fd;
     int complete;
     struct lmt_vmsplice_node *node = NULL;
     int (*reqFn)(MPIDI_VC_t *, MPIR_Request *, int *);
     MPIDI_CH3I_VC *vc_ch = &vc->ch;
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_START_SEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_START_SEND);
 
     /* Must do this after the other side has opened for reading, otherwise we
        will error out with ENXIO.  This will be indicated by the receipt of a
        CTS message. */
     pipe_fd = open(vc_ch->lmt_copy_buf_handle, O_NONBLOCK|O_WRONLY);
     MPIR_ERR_CHKANDJUMP1(pipe_fd < 0, mpi_errno, MPI_ERR_OTHER, "**open",
-                         "**open %s", MPIU_Strerror(errno));
+                         "**open %s", MPIR_Strerror(errno));
 
     mpi_errno = populate_iov_from_req(sreq);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -418,7 +418,7 @@ int MPID_nem_lmt_vmsplice_start_send(MPIDI_VC_t *vc, MPIR_Request *sreq, MPL_IOV
 
 fn_fail:
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_START_SEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_START_SEND);
     return mpi_errno;
 }
 
@@ -429,16 +429,16 @@ fn_exit:
 int MPIDI_CH3_MPID_nem_lmt_vmsplice_vc_terminated(MPIDI_VC_t *vc)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_MPID_NEM_LMT_VMSPLICE_VC_TERMINATED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_MPID_NEM_LMT_VMSPLICE_VC_TERMINATED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_MPID_NEM_LMT_VMSPLICE_VC_TERMINATED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_MPID_NEM_LMT_VMSPLICE_VC_TERMINATED);
 
     /* FIXME: need to handle the case where a VC is terminated due to
        a process failure.  We need to remove any outstanding LMT ops
        for this VC. */
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_MPID_NEM_LMT_VMSPLICE_VC_TERMINATED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_MPID_NEM_LMT_VMSPLICE_VC_TERMINATED);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -457,13 +457,13 @@ int MPIDI_CH3_MPID_nem_lmt_vmsplice_vc_terminated(MPIDI_VC_t *vc)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPID_nem_lmt_vmsplice_done_recv(MPIDI_VC_t *vc, MPIR_Request *rreq)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_DONE_RECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_DONE_RECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_DONE_RECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_DONE_RECV);
 
     /* nop */
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_DONE_RECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_DONE_RECV);
     return MPI_SUCCESS;
 }
 
@@ -474,13 +474,13 @@ int MPID_nem_lmt_vmsplice_done_recv(MPIDI_VC_t *vc, MPIR_Request *rreq)
 int MPID_nem_lmt_vmsplice_done_send(MPIDI_VC_t *vc, MPIR_Request *sreq)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_DONE_SEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_DONE_SEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_DONE_SEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_DONE_SEND);
 
     /* nop */
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_DONE_SEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_DONE_SEND);
     return MPI_SUCCESS;
 }
 
@@ -492,13 +492,13 @@ int MPID_nem_lmt_vmsplice_done_send(MPIDI_VC_t *vc, MPIR_Request *sreq)
 int MPID_nem_lmt_vmsplice_handle_cookie(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV cookie)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_HANDLE_COOKIE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_HANDLE_COOKIE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_HANDLE_COOKIE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_HANDLE_COOKIE);
 
     /* nop */
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_HANDLE_COOKIE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_HANDLE_COOKIE);
     return MPI_SUCCESS;
 }
 
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_mpich.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_mpich.c
index 68acd83..a0635e2 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_mpich.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_mpich.c
@@ -32,14 +32,14 @@ MPID_nem_mpich_init(void)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPIU_CHKPMEM_DECL (2);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_INIT);
+    MPIR_CHKPMEM_DECL (2);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_INIT);
 
     MPID_nem_prefetched_cell = NULL;
 
-    MPIU_CHKPMEM_MALLOC (MPID_nem_recv_seqno, unsigned short *, sizeof(*MPID_nem_recv_seqno) * MPID_nem_mem_region.num_procs, mpi_errno, "recv seqno");
+    MPIR_CHKPMEM_MALLOC (MPID_nem_recv_seqno, unsigned short *, sizeof(*MPID_nem_recv_seqno) * MPID_nem_mem_region.num_procs, mpi_errno, "recv seqno");
 
     for (i = 0; i < MPID_nem_mem_region.num_procs; ++i)
     {
@@ -47,7 +47,7 @@ MPID_nem_mpich_init(void)
     }
 
     /* set up fbox queue */
-    MPIU_CHKPMEM_MALLOC (MPID_nem_fboxq_elem_list, MPID_nem_fboxq_elem_t *, MPID_nem_mem_region.num_local * sizeof(MPID_nem_fboxq_elem_t), mpi_errno, "fastbox element list");
+    MPIR_CHKPMEM_MALLOC (MPID_nem_fboxq_elem_list, MPID_nem_fboxq_elem_t *, MPID_nem_mem_region.num_local * sizeof(MPID_nem_fboxq_elem_t), mpi_errno, "fastbox element list");
 
     for (i = 0; i < MPID_nem_mem_region.num_local; ++i)
     {
@@ -64,13 +64,13 @@ MPID_nem_mpich_init(void)
     MPID_nem_curr_fbox_all_poll = &MPID_nem_fboxq_elem_list[0];
     MPID_nem_fboxq_elem_list_last = &MPID_nem_fboxq_elem_list[MPID_nem_mem_region.num_local - 1];
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_INIT);
     return mpi_errno;
 fn_fail:
     /* --BEGIN ERROR HANDLING-- */
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
@@ -89,16 +89,16 @@ int MPID_nem_send_iov(MPIDI_VC_t *vc, MPIR_Request **sreq_ptr, MPL_IOV *iov, int
     MPL_IOV *data_iov = &iov[1]; /* iov of just the data, not the header */
     int data_n_iov = n_iov - 1;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_SEND_IOV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_SEND_IOV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_SEND_IOV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_SEND_IOV);
 
     if (*sreq_ptr == NULL)
     {
 	/* create a request */
 	sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-	MPIU_Assert(sreq != NULL);
-	MPIU_Object_set_ref(sreq, 2);
+	MPIR_Assert(sreq != NULL);
+	MPIR_Object_set_ref(sreq, 2);
 	sreq->kind = MPIR_REQUEST_KIND__SEND;
         sreq->dev.OnDataAvail = 0;
     }
@@ -123,11 +123,11 @@ int MPID_nem_send_iov(MPIDI_VC_t *vc, MPIR_Request **sreq_ptr, MPL_IOV *iov, int
         /* --END ERROR HANDLING-- */
     }
 
-    MPIU_Assert(sreq->dev.tmpbuf_sz >= data_sz);
+    MPIR_Assert(sreq->dev.tmpbuf_sz >= data_sz);
 
     iov_data_copied = 0;
     for (i = 0; i < data_n_iov; ++i) {
-        MPIU_Memcpy((char*) sreq->dev.tmpbuf + iov_data_copied, data_iov[i].MPL_IOV_BUF, data_iov[i].MPL_IOV_LEN);
+        MPIR_Memcpy((char*) sreq->dev.tmpbuf + iov_data_copied, data_iov[i].MPL_IOV_BUF, data_iov[i].MPL_IOV_LEN);
         iov_data_copied += data_iov[i].MPL_IOV_LEN;
     }
 
@@ -137,7 +137,7 @@ int MPID_nem_send_iov(MPIDI_VC_t *vc, MPIR_Request **sreq_ptr, MPL_IOV *iov, int
     *sreq_ptr = sreq;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_SEND_IOV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_SEND_IOV);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_mpich_rma.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_mpich_rma.c
index 9451e72..bb34944 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_mpich_rma.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_mpich_rma.c
@@ -19,12 +19,12 @@ int
 MPID_nem_mpich_alloc_win (void **buf, int len, MPID_nem_mpich_win_t **win)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_ALLOC_WIN);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_ALLOC_WIN);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_ALLOC_WIN);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_ALLOC_WIN);
 
-    MPIU_CHKPMEM_MALLOC (*win, MPID_nem_mpich_win_t *, sizeof (MPID_nem_mpich_win_t), mpi_errno, "rma win object");
+    MPIR_CHKPMEM_MALLOC (*win, MPID_nem_mpich_win_t *, sizeof (MPID_nem_mpich_win_t), mpi_errno, "rma win object");
 
     mpi_errno = MPID_nem_allocate_shared_memory ((char **)buf, len, &(*win)->handle);
     if (mpi_errno) MPIR_ERR_POP (mpi_errno);
@@ -34,12 +34,12 @@ MPID_nem_mpich_alloc_win (void **buf, int len, MPID_nem_mpich_win_t **win)
     (*win)->len = len;
     (*win)->local_address = *buf;
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_ALLOC_WIN);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_ALLOC_WIN);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -51,11 +51,11 @@ int
 MPID_nem_mpich_free_win (MPID_nem_mpich_win_t *win)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_FREE_WIN);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_FREE_WIN);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_FREE_WIN);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_FREE_WIN);
 
-    MPIU_Assert (win->proc == MPID_nem_mem_region.rank);
+    MPIR_Assert (win->proc == MPID_nem_mem_region.rank);
 
     mpi_errno = MPID_nem_remove_shared_memory (win->handle);
     if (mpi_errno) MPIR_ERR_POP (mpi_errno);
@@ -67,7 +67,7 @@ MPID_nem_mpich_free_win (MPID_nem_mpich_win_t *win)
     MPL_free (win);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_FREE_WIN);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_FREE_WIN);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -81,9 +81,9 @@ int
 MPID_nem_mpich_attach_win (void **buf, MPID_nem_mpich_win_t *remote_win)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_ATTACH_WIN);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_ATTACH_WIN);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_ATTACH_WIN);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_ATTACH_WIN);
 
     if (remote_win->proc == MPID_nem_mem_region.rank)
     {
@@ -99,7 +99,7 @@ MPID_nem_mpich_attach_win (void **buf, MPID_nem_mpich_win_t *remote_win)
     remote_win->local_address = *buf;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_ATTACH_WIN);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_ATTACH_WIN);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -113,9 +113,9 @@ int
 MPID_nem_mpich_detach_win (MPID_nem_mpich_win_t *remote_win)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_DETACH_WIN);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_DETACH_WIN);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_DETACH_WIN);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_DETACH_WIN);
 
     if (remote_win->proc != MPID_nem_mem_region.rank)
     {
@@ -125,7 +125,7 @@ MPID_nem_mpich_detach_win (MPID_nem_mpich_win_t *remote_win)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_DETACH_WIN);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_DETACH_WIN);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -140,19 +140,19 @@ MPID_nem_mpich_win_put (void *s_buf, void *d_buf, int len, MPID_nem_mpich_win_t
 {
     int mpi_errno = MPI_SUCCESS;
     char *_d_buf = d_buf;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_WIN_PUT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_WIN_PUT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_WIN_PUT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_WIN_PUT);
 
     _d_buf += (char *)remote_win->local_address - (char *)remote_win->home_address;
 
     if (_d_buf < (char *)remote_win->local_address || _d_buf + len > (char *)remote_win->local_address + remote_win->len)
         MPIR_ERR_SETANDJUMP (mpi_errno, MPI_ERR_OTHER, "**winput_oob");
 
-    MPIU_Memcpy (_d_buf, s_buf, len);
+    MPIR_Memcpy (_d_buf, s_buf, len);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_WIN_PUT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_WIN_PUT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -168,9 +168,9 @@ MPID_nem_mpich_win_putv (struct iovec **s_iov, int *s_niov, struct iovec **d_iov
     int mpi_errno = MPI_SUCCESS;
     size_t diff;
     int len;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_WIN_PUTV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_WIN_PUTV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_WIN_PUTV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_WIN_PUTV);
 
     diff = (char *)remote_win->local_address - (char *)remote_win->home_address;
 
@@ -183,7 +183,7 @@ MPID_nem_mpich_win_putv (struct iovec **s_iov, int *s_niov, struct iovec **d_iov
 	if ((*s_iov)->iov_len > (*d_iov)->iov_len)
 	{
 	    len = (*d_iov)->iov_len;
-	    MPIU_Memcpy ((char*)((*d_iov)->iov_base) + diff, (*s_iov)->iov_base, len);
+	    MPIR_Memcpy ((char*)((*d_iov)->iov_base) + diff, (*s_iov)->iov_base, len);
 
 	    (*s_iov)->iov_base = (char *)(*s_iov)->iov_base + len;
 	    (*s_iov)->iov_len =- len;
@@ -197,7 +197,7 @@ MPID_nem_mpich_win_putv (struct iovec **s_iov, int *s_niov, struct iovec **d_iov
 	else if ((*s_iov)->iov_len > (*d_iov)->iov_len)
 	{
 	    len = (*s_iov)->iov_len;
-	    MPIU_Memcpy ((char*)((*d_iov)->iov_base) + diff, (*s_iov)->iov_base, len);
+	    MPIR_Memcpy ((char*)((*d_iov)->iov_base) + diff, (*s_iov)->iov_base, len);
 
 	    ++(*s_iov);
 	    --(*s_niov);
@@ -208,7 +208,7 @@ MPID_nem_mpich_win_putv (struct iovec **s_iov, int *s_niov, struct iovec **d_iov
 	else
 	{
 	    len = (*s_iov)->iov_len;
-	    MPIU_Memcpy ((char*)((*d_iov)->iov_base) + diff, (*s_iov)->iov_base, len);
+	    MPIR_Memcpy ((char*)((*d_iov)->iov_base) + diff, (*s_iov)->iov_base, len);
 
 	    ++(*s_iov);
 	    --(*s_niov);
@@ -222,7 +222,7 @@ MPID_nem_mpich_win_putv (struct iovec **s_iov, int *s_niov, struct iovec **d_iov
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_WIN_PUTV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_WIN_PUTV);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -237,19 +237,19 @@ MPID_nem_mpich_win_get (void *s_buf, void *d_buf, int len, MPID_nem_mpich_win_t
 {
     int mpi_errno = MPI_SUCCESS;
     char *_s_buf = s_buf;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_WIN_GET);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_WIN_GET);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_WIN_GET);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_WIN_GET);
 
     _s_buf += (char *)remote_win->local_address - (char *)remote_win->home_address;
 
     if (_s_buf < (char *)remote_win->local_address || _s_buf + len > (char *)remote_win->local_address + remote_win->len)
         MPIR_ERR_SETANDJUMP (mpi_errno, MPI_ERR_OTHER, "**winget_oob");
 
-    MPIU_Memcpy (d_buf, _s_buf, len);
+    MPIR_Memcpy (d_buf, _s_buf, len);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_WIN_GET);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_WIN_GET);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -265,9 +265,9 @@ MPID_nem_mpich_win_getv (struct iovec **s_iov, int *s_niov, struct iovec **d_iov
     int mpi_errno = MPI_SUCCESS;
     size_t diff;
     int len;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_WIN_GETV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_WIN_GETV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_WIN_GETV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_WIN_GETV);
 
     diff = (char *)remote_win->local_address - (char *)remote_win->home_address;
 
@@ -280,7 +280,7 @@ MPID_nem_mpich_win_getv (struct iovec **s_iov, int *s_niov, struct iovec **d_iov
 	if ((*d_iov)->iov_len > (*s_iov)->iov_len)
 	{
 	    len = (*s_iov)->iov_len;
-	    MPIU_Memcpy ((char*)((*d_iov)->iov_base) + diff, (*s_iov)->iov_base, len);
+	    MPIR_Memcpy ((char*)((*d_iov)->iov_base) + diff, (*s_iov)->iov_base, len);
 
 	    (*d_iov)->iov_base = (char *)(*d_iov)->iov_base + len;
 	    (*d_iov)->iov_len =- len;
@@ -294,7 +294,7 @@ MPID_nem_mpich_win_getv (struct iovec **s_iov, int *s_niov, struct iovec **d_iov
 	else if ((*d_iov)->iov_len > (*s_iov)->iov_len)
 	{
 	    len = (*d_iov)->iov_len;
-	    MPIU_Memcpy ((char*)((*d_iov)->iov_base) + diff, (*s_iov)->iov_base, len);
+	    MPIR_Memcpy ((char*)((*d_iov)->iov_base) + diff, (*s_iov)->iov_base, len);
 
 	    ++(*d_iov);
 	    --(*d_niov);
@@ -305,7 +305,7 @@ MPID_nem_mpich_win_getv (struct iovec **s_iov, int *s_niov, struct iovec **d_iov
 	else
 	{
 	    len = (*d_iov)->iov_len;
-	    MPIU_Memcpy ((char*)((*d_iov)->iov_base) + diff, (*s_iov)->iov_base, len);
+	    MPIR_Memcpy ((char*)((*d_iov)->iov_base) + diff, (*s_iov)->iov_base, len);
 
 	    ++(*d_iov);
 	    --(*d_niov);
@@ -319,7 +319,7 @@ MPID_nem_mpich_win_getv (struct iovec **s_iov, int *s_niov, struct iovec **d_iov
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_WIN_GETV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_WIN_GETV);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -343,9 +343,9 @@ MPID_nem_mpich_serialize_win (void *buf, int buf_len, MPID_nem_mpich_win_t *win,
     int bl = buf_len;
     char *b = (char *)buf;
     int handle_len;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_SERIALIZE_WIN);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_SERIALIZE_WIN);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_SERIALIZE_WIN);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_SERIALIZE_WIN);
 
     handle_len = strlen (win->handle);
     str_errno = MPL_str_add_int_arg (&b, &bl, WIN_HANLEN_KEY, handle_len);
@@ -367,7 +367,7 @@ MPID_nem_mpich_serialize_win (void *buf, int buf_len, MPID_nem_mpich_win_t *win,
     *len = buf_len - bl;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_SERIALIZE_WIN);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_SERIALIZE_WIN);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -385,17 +385,17 @@ MPID_nem_mpich_deserialize_win (void *buf, int buf_len, MPID_nem_mpich_win_t **w
     int ol;
     char *b = (char *)buf;
     int handle_len;
-    MPIU_CHKPMEM_DECL(2);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_DESERIALIZE_WIN);
+    MPIR_CHKPMEM_DECL(2);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_DESERIALIZE_WIN);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_DESERIALIZE_WIN);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_DESERIALIZE_WIN);
 
-    MPIU_CHKPMEM_MALLOC (*win, MPID_nem_mpich_win_t *, sizeof (MPID_nem_mpich_win_t), mpi_errno, "win object");
+    MPIR_CHKPMEM_MALLOC (*win, MPID_nem_mpich_win_t *, sizeof (MPID_nem_mpich_win_t), mpi_errno, "win object");
 
     str_errno = MPL_str_get_int_arg (b, WIN_HANLEN_KEY, &handle_len);
     MPIR_ERR_CHKANDJUMP (str_errno == MPL_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**nomem");
     MPIR_ERR_CHKANDJUMP (str_errno == MPL_STR_FAIL, mpi_errno, MPI_ERR_OTHER, "**windeserialize");
-    MPIU_CHKPMEM_MALLOC ((*win)->handle, char *, handle_len, mpi_errno, "window handle");
+    MPIR_CHKPMEM_MALLOC ((*win)->handle, char *, handle_len, mpi_errno, "window handle");
 
     str_errno = MPL_str_get_string_arg(b, WIN_HANDLE_KEY, (*win)->handle, handle_len);
     MPIR_ERR_CHKANDJUMP (str_errno == MPL_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**nomem");
@@ -412,12 +412,12 @@ MPID_nem_mpich_deserialize_win (void *buf, int buf_len, MPID_nem_mpich_win_t **w
 
     (*win)->local_address = 0;
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_DESERIALIZE_WIN);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_DESERIALIZE_WIN);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -429,16 +429,16 @@ int
 MPID_nem_mpich_register_memory (void *buf, int len)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_REGISTER_MEMORY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_REGISTER_MEMORY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_REGISTER_MEMORY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_REGISTER_MEMORY);
 
 /*     if (MPID_NEM_NET_MODULE == MPID_NEM_GM_MODULE) */
 /*     { */
 /* 	/\*return MPID_nem_gm_module_register_mem (buf, len);*\/ */
 /*     } */
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_REGISTER_MEMORY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_REGISTER_MEMORY);
     return mpi_errno;
 }
 
@@ -450,16 +450,16 @@ int
 MPID_nem_mpich_deregister_memory (void *buf, int len)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_DEREGISTER_MEMORY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_DEREGISTER_MEMORY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_DEREGISTER_MEMORY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_DEREGISTER_MEMORY);
 
 /*     if (MPID_NEM_NET_MODULE == MPID_NEM_GM_MODULE) */
 /*     { */
 /* 	/\*return MPID_nem_gm_module_deregister_mem (buf, len);*\/ */
 /*     } */
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_DEREGISTER_MEMORY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_DEREGISTER_MEMORY);
     return mpi_errno;
 }
 
@@ -471,9 +471,9 @@ int
 MPID_nem_mpich_put (void *s_buf, void *d_buf, int len, int proc, int *completion_ctr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_PUT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_PUT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_PUT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_PUT);
 
     if (MPID_NEM_IS_LOCAL (proc))
     {
@@ -495,7 +495,7 @@ MPID_nem_mpich_put (void *s_buf, void *d_buf, int len, int proc, int *completion
       */
     }
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_PUT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_PUT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -511,9 +511,9 @@ MPID_nem_mpich_putv (struct iovec **s_iov, int *s_niov, struct iovec **d_iov, in
 {
     int mpi_errno = MPI_SUCCESS;
     /* int len;*/
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_PUTV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_PUTV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_PUTV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_PUTV);
 
     if (MPID_NEM_IS_LOCAL (proc))
     {
@@ -572,7 +572,7 @@ MPID_nem_mpich_putv (struct iovec **s_iov, int *s_niov, struct iovec **d_iov, in
       */
     }
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_PUTV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_PUTV);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -586,9 +586,9 @@ int
 MPID_nem_mpich_get (void *s_buf, void *d_buf, int len, int proc, int *completion_ctr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_GET);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_GET);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_GET);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_GET);
 
     if (MPID_NEM_IS_LOCAL (proc))
     {
@@ -610,7 +610,7 @@ MPID_nem_mpich_get (void *s_buf, void *d_buf, int len, int proc, int *completion
       */
     }
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_GET);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_GET);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -626,9 +626,9 @@ MPID_nem_mpich_getv (struct iovec **s_iov, int *s_niov, struct iovec **d_iov, in
 {
     int mpi_errno = MPI_SUCCESS;
     /* int len; */
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_GETV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_MPICH_GETV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MPICH_GETV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_MPICH_GETV);
 
     if (MPID_NEM_IS_LOCAL (proc))
     {
@@ -687,7 +687,7 @@ MPID_nem_mpich_getv (struct iovec **s_iov, int *s_niov, struct iovec **d_iov, in
       */
     }
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MPICH_GETV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_MPICH_GETV);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_network.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_network.c
index 1f5cc31..7c89e20 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_network.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_network.c
@@ -41,11 +41,11 @@ int MPID_nem_choose_netmod(void)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_CHOOSE_NETMOD);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_CHOOSE_NETMOD);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_CHOOSE_NETMOD);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_CHOOSE_NETMOD);
 
-    MPIU_Assert(MPIR_CVAR_NEMESIS_NETMOD != NULL);
+    MPIR_Assert(MPIR_CVAR_NEMESIS_NETMOD != NULL);
     if (strcmp(MPIR_CVAR_NEMESIS_NETMOD, "") == 0)
     {
         /* netmod not specified, using the default */
@@ -59,7 +59,7 @@ int MPID_nem_choose_netmod(void)
 
     for (i = 0; i < MPID_nem_num_netmods; ++i)
     {
-        if (!MPIU_Strncasecmp(MPIR_CVAR_NEMESIS_NETMOD, MPID_nem_netmod_strings[i], MPID_NEM_MAX_NETMOD_STRING_LEN))
+        if (!MPIR_Strncasecmp(MPIR_CVAR_NEMESIS_NETMOD, MPID_nem_netmod_strings[i], MPID_NEM_MAX_NETMOD_STRING_LEN))
         {
             MPID_nem_netmod_func = MPID_nem_netmod_funcs[i];
             MPID_nem_netmod_id = i;
@@ -73,7 +73,7 @@ int MPID_nem_choose_netmod(void)
     MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**invalid_netmod", "**invalid_netmod %s", MPIR_CVAR_NEMESIS_NETMOD);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_CHOOSE_NETMOD);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_CHOOSE_NETMOD);
     return mpi_errno;
  fn_fail:
 
diff --git a/src/mpid/ch3/channels/sock/src/ch3_finalize.c b/src/mpid/ch3/channels/sock/src/ch3_finalize.c
index 0beac57..fe80272 100644
--- a/src/mpid/ch3/channels/sock/src/ch3_finalize.c
+++ b/src/mpid/ch3/channels/sock/src/ch3_finalize.c
@@ -14,14 +14,14 @@
 int MPIDI_CH3_Finalize( void )
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_CH3_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_CH3_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_CH3_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_CH3_FINALIZE);
 
     mpi_errno = MPIDI_CH3I_Progress_finalize();
     if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }
 
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_CH3_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_CH3_FINALIZE);
     return mpi_errno;
 }
diff --git a/src/mpid/ch3/channels/sock/src/ch3_init.c b/src/mpid/ch3/channels/sock/src/ch3_init.c
index 52db566..8b3db04 100644
--- a/src/mpid/ch3/channels/sock/src/ch3_init.c
+++ b/src/mpid/ch3/channels/sock/src/ch3_init.c
@@ -27,9 +27,9 @@ int MPIDI_CH3_Init(int has_parent, MPIDI_PG_t * pg_p, int pg_rank )
     char *publish_bc_orig = NULL;
     char *bc_val = NULL;
     int val_max_remaining;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_CH3_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_CH3_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_CH3_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_CH3_INIT);
 
     mpi_errno = MPIDI_CH3I_Progress_init();
     if (mpi_errno != MPI_SUCCESS) MPIR_ERR_POP(mpi_errno);
@@ -54,7 +54,7 @@ int MPIDI_CH3_Init(int has_parent, MPIDI_PG_t * pg_p, int pg_rank )
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_CH3_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_CH3_INIT);
     return mpi_errno;
  fn_fail:
     if (publish_bc_orig != NULL) {
diff --git a/src/mpid/ch3/channels/sock/src/ch3_isend.c b/src/mpid/ch3/channels/sock/src/ch3_isend.c
index 44aed4f..141b09e 100644
--- a/src/mpid/ch3/channels/sock/src/ch3_isend.c
+++ b/src/mpid/ch3/channels/sock/src/ch3_isend.c
@@ -13,15 +13,15 @@
 static void update_request(MPIR_Request * sreq, void * hdr,
 			   intptr_t hdr_sz, size_t nb)
 {
-    MPIDI_STATE_DECL(MPID_STATE_UPDATE_REQUEST);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_UPDATE_REQUEST);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_UPDATE_REQUEST);
-    MPIU_Assert(hdr_sz == sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_UPDATE_REQUEST);
+    MPIR_Assert(hdr_sz == sizeof(MPIDI_CH3_Pkt_t));
     sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) hdr;
     sreq->dev.iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST)((char *) &sreq->dev.pending_pkt + nb);
     sreq->dev.iov[0].MPL_IOV_LEN = hdr_sz - nb;
     sreq->dev.iov_count = 1;
-    MPIDI_FUNC_EXIT(MPID_STATE_UPDATE_REQUEST);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_UPDATE_REQUEST);
 }
 
 #undef FUNCNAME
@@ -34,11 +34,11 @@ int MPIDI_CH3_iSend(MPIDI_VC_t * vc, MPIR_Request * sreq, void * hdr,
     int mpi_errno = MPI_SUCCESS;
     int (*reqFn)(MPIDI_VC_t *, MPIR_Request *, int *);
     MPIDI_CH3I_VC *vcch = &vc->ch;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_ISEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_ISEND);
 
-    MPIU_Assert( hdr_sz <= sizeof(MPIDI_CH3_Pkt_t) );
+    MPIR_Assert( hdr_sz <= sizeof(MPIDI_CH3_Pkt_t) );
 
     /* The sock channel uses a fixed length header, the size of which is the 
        maximum of all possible packet headers */
@@ -73,7 +73,7 @@ int MPIDI_CH3_iSend(MPIDI_VC_t * vc, MPIR_Request * sreq, void * hdr,
                      "write complete %" PRIdPTR " bytes, calling OnDataAvail fcn", nb);
 		    reqFn = sreq->dev.OnDataAvail;
 		    if (!reqFn) {
-			MPIU_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
+			MPIR_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
                         mpi_errno = MPID_Request_complete(sreq);
                         if (mpi_errno != MPI_SUCCESS) {
                             MPIR_ERR_POP(mpi_errno);
@@ -197,7 +197,7 @@ int MPIDI_CH3_iSend(MPIDI_VC_t * vc, MPIR_Request * sreq, void * hdr,
     /* --END ERROR HANDLING-- */
 
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_ISEND);
     return mpi_errno;
 }
 
diff --git a/src/mpid/ch3/channels/sock/src/ch3_isendv.c b/src/mpid/ch3/channels/sock/src/ch3_isendv.c
index e55b87d..80ce60e 100644
--- a/src/mpid/ch3/channels/sock/src/ch3_isendv.c
+++ b/src/mpid/ch3/channels/sock/src/ch3_isendv.c
@@ -14,9 +14,9 @@ static void update_request(MPIR_Request * sreq, MPL_IOV * iov, int iov_count,
 			   int iov_offset, size_t nb)
 {
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_UPDATE_REQUEST);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_UPDATE_REQUEST);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_UPDATE_REQUEST);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_UPDATE_REQUEST);
     
     for (i = 0; i < iov_count; i++)
     {
@@ -24,7 +24,7 @@ static void update_request(MPIR_Request * sreq, MPL_IOV * iov, int iov_count,
     }
     if (iov_offset == 0)
     {
-	MPIU_Assert(iov[0].MPL_IOV_LEN == sizeof(MPIDI_CH3_Pkt_t));
+	MPIR_Assert(iov[0].MPL_IOV_LEN == sizeof(MPIDI_CH3_Pkt_t));
 	sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) iov[0].MPL_IOV_BUF;
 	sreq->dev.iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST) &sreq->dev.pending_pkt;
     }
@@ -33,7 +33,7 @@ static void update_request(MPIR_Request * sreq, MPL_IOV * iov, int iov_count,
     sreq->dev.iov[iov_offset].MPL_IOV_LEN -= nb;
     sreq->dev.iov_count = iov_count;
 
-    MPIDI_FUNC_EXIT(MPID_STATE_UPDATE_REQUEST);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_UPDATE_REQUEST);
 }
 
 #undef FUNCNAME
@@ -46,9 +46,9 @@ int MPIDI_CH3_iSendv(MPIDI_VC_t * vc, MPIR_Request * sreq,
     int mpi_errno = MPI_SUCCESS;
     MPIDI_CH3I_VC *vcch = &vc->ch;
     int (*reqFn)(MPIDI_VC_t *, MPIR_Request *, int *);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISENDV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_ISENDV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISENDV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_ISENDV);
 
     if (sreq->dev.ext_hdr_sz > 0) {
         int i;
@@ -61,8 +61,8 @@ int MPIDI_CH3_iSendv(MPIDI_VC_t * vc, MPIR_Request * sreq,
         n_iov++;
     }
 
-    MPIU_Assert(n_iov <= MPL_IOV_LIMIT);
-    MPIU_Assert(iov[0].MPL_IOV_LEN <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(n_iov <= MPL_IOV_LIMIT);
+    MPIR_Assert(iov[0].MPL_IOV_LEN <= sizeof(MPIDI_CH3_Pkt_t));
 
     /* The sock channel uses a fixed length header, the size of which is the 
        maximum of all possible packet headers */
@@ -138,7 +138,7 @@ int MPIDI_CH3_iSendv(MPIDI_VC_t * vc, MPIR_Request * sreq,
 				 "write complete, calling OnDataAvail fcn");
 		    reqFn = sreq->dev.OnDataAvail;
 		    if (!reqFn) {
-			MPIU_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
+			MPIR_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
                         mpi_errno = MPID_Request_complete(sreq);
                         if (mpi_errno != MPI_SUCCESS) {
                             MPIR_ERR_POP(mpi_errno);
@@ -241,7 +241,7 @@ int MPIDI_CH3_iSendv(MPIDI_VC_t * vc, MPIR_Request * sreq,
     /* --END ERROR HANDLING-- */
 
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISENDV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_ISENDV);
     return mpi_errno;
 }
 
diff --git a/src/mpid/ch3/channels/sock/src/ch3_istartmsg.c b/src/mpid/ch3/channels/sock/src/ch3_istartmsg.c
index 808d0c5..ce68563 100644
--- a/src/mpid/ch3/channels/sock/src/ch3_istartmsg.c
+++ b/src/mpid/ch3/channels/sock/src/ch3_istartmsg.c
@@ -14,18 +14,18 @@ static MPIR_Request * create_request(void * hdr, intptr_t hdr_sz,
 				     size_t nb)
 {
     MPIR_Request * sreq;
-    MPIDI_STATE_DECL(MPID_STATE_CREATE_REQUEST);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CREATE_REQUEST);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CREATE_REQUEST);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CREATE_REQUEST);
 
     sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
     /* --BEGIN ERROR HANDLING-- */
     if (sreq == NULL)
 	return NULL;
     /* --END ERROR HANDLING-- */
-    MPIU_Object_set_ref(sreq, 2);
+    MPIR_Object_set_ref(sreq, 2);
     sreq->kind = MPIR_REQUEST_KIND__SEND;
-    MPIU_Assert(hdr_sz == sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert(hdr_sz == sizeof(MPIDI_CH3_Pkt_t));
     sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) hdr;
     sreq->dev.iov[0].MPL_IOV_BUF = 
 	(MPL_IOV_BUF_CAST)((char *) &sreq->dev.pending_pkt + nb);
@@ -33,7 +33,7 @@ static MPIR_Request * create_request(void * hdr, intptr_t hdr_sz,
     sreq->dev.iov_count = 1;
     sreq->dev.OnDataAvail = 0;
     
-    MPIDI_FUNC_EXIT(MPID_STATE_CREATE_REQUEST);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CREATE_REQUEST);
     return sreq;
 }
 
@@ -55,11 +55,11 @@ int MPIDI_CH3_iStartMsg(MPIDI_VC_t * vc, void * hdr, intptr_t hdr_sz,
     MPIR_Request * sreq = NULL;
     MPIDI_CH3I_VC *vcch = &vc->ch;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISTARTMSG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_ISTARTMSG);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISTARTMSG);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_ISTARTMSG);
     
-    MPIU_Assert( hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIR_Assert( hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
 
     /* The SOCK channel uses a fixed length header, the size of which is the 
        maximum of all possible packet headers */
@@ -211,6 +211,6 @@ int MPIDI_CH3_iStartMsg(MPIDI_VC_t * vc, void * hdr, intptr_t hdr_sz,
 
   fn_fail:
     *sreq_ptr = sreq;
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISTARTMSG);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_ISTARTMSG);
     return mpi_errno;
 }
diff --git a/src/mpid/ch3/channels/sock/src/ch3_istartmsgv.c b/src/mpid/ch3/channels/sock/src/ch3_istartmsgv.c
index 8720914..232ca90 100644
--- a/src/mpid/ch3/channels/sock/src/ch3_istartmsgv.c
+++ b/src/mpid/ch3/channels/sock/src/ch3_istartmsgv.c
@@ -15,16 +15,16 @@ static MPIR_Request * create_request(MPL_IOV * iov, int iov_count,
 {
     MPIR_Request * sreq;
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_CREATE_REQUEST);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CREATE_REQUEST);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CREATE_REQUEST);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CREATE_REQUEST);
     
     sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
     /* --BEGIN ERROR HANDLING-- */
     if (sreq == NULL)
 	return NULL;
     /* --END ERROR HANDLING-- */
-    MPIU_Object_set_ref(sreq, 2);
+    MPIR_Object_set_ref(sreq, 2);
     sreq->kind = MPIR_REQUEST_KIND__SEND;
     
     for (i = 0; i < iov_count; i++)
@@ -33,7 +33,7 @@ static MPIR_Request * create_request(MPL_IOV * iov, int iov_count,
     }
     if (iov_offset == 0)
     {
-	MPIU_Assert(iov[0].MPL_IOV_LEN == sizeof(MPIDI_CH3_Pkt_t));
+	MPIR_Assert(iov[0].MPL_IOV_LEN == sizeof(MPIDI_CH3_Pkt_t));
 	sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) iov[0].MPL_IOV_BUF;
 	sreq->dev.iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST) &sreq->dev.pending_pkt;
     }
@@ -42,7 +42,7 @@ static MPIR_Request * create_request(MPL_IOV * iov, int iov_count,
     sreq->dev.iov_count = iov_count;
     sreq->dev.OnDataAvail = 0;
 
-    MPIDI_FUNC_EXIT(MPID_STATE_CREATE_REQUEST);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CREATE_REQUEST);
     return sreq;
 }
 
@@ -79,11 +79,11 @@ int MPIDI_CH3_iStartMsgv(MPIDI_VC_t * vc, MPL_IOV * iov, int n_iov,
     MPIR_Request * sreq = NULL;
     MPIDI_CH3I_VC *vcch = &vc->ch;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISTARTMSGV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_ISTARTMSGV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISTARTMSGV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_ISTARTMSGV);
 
-    MPIU_Assert( n_iov <= MPL_IOV_LIMIT);
+    MPIR_Assert( n_iov <= MPL_IOV_LIMIT);
 
     /* The SOCK channel uses a fixed length header, the size of which is the 
        maximum of all possible packet headers */
@@ -242,6 +242,6 @@ int MPIDI_CH3_iStartMsgv(MPIDI_VC_t * vc, MPL_IOV * iov, int n_iov,
 
   fn_fail:
     *sreq_ptr = sreq;
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISTARTMSGV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_ISTARTMSGV);
     return mpi_errno;
 }
diff --git a/src/mpid/ch3/channels/sock/src/ch3_progress.c b/src/mpid/ch3/channels/sock/src/ch3_progress.c
index 74583c2..688dbce 100644
--- a/src/mpid/ch3/channels/sock/src/ch3_progress.c
+++ b/src/mpid/ch3/channels/sock/src/ch3_progress.c
@@ -65,9 +65,9 @@ static int MPIDI_CH3i_Progress_test(void)
     int mpi_errno = MPI_SUCCESS;
     int made_progress;
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_TEST);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_TEST);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_TEST);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_TEST);
 
 #   ifdef MPICH_IS_THREADED
     {
@@ -98,7 +98,7 @@ static int MPIDI_CH3i_Progress_test(void)
     
     for (i = 0; i < MAX_PROGRESS_HOOKS; i++) {
         if (progress_hooks[i].active == TRUE) {
-            MPIU_Assert(progress_hooks[i].func_ptr != NULL);
+            MPIR_Assert(progress_hooks[i].func_ptr != NULL);
             mpi_errno = progress_hooks[i].func_ptr(&made_progress);
             if (mpi_errno) MPIR_ERR_POP(mpi_errno);
         }
@@ -124,7 +124,7 @@ static int MPIDI_CH3i_Progress_test(void)
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_TEST);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_TEST);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -140,9 +140,9 @@ static int MPIDI_CH3i_Progress_wait(MPID_Progress_state * progress_state)
 {
     MPIDU_Sock_event_t event;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_WAIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_WAIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_WAIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_WAIT);
 
     /*
      * MT: the following code will be needed if progress can occur between 
@@ -195,7 +195,7 @@ static int MPIDI_CH3i_Progress_wait(MPID_Progress_state * progress_state)
 
         for (i = 0; i < MAX_PROGRESS_HOOKS; i++) {
             if (progress_hooks[i].active == TRUE) {
-                MPIU_Assert(progress_hooks[i].func_ptr != NULL);
+                MPIR_Assert(progress_hooks[i].func_ptr != NULL);
                 mpi_errno = progress_hooks[i].func_ptr(&made_progress);
                 if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                 if (made_progress) {
@@ -230,7 +230,7 @@ static int MPIDI_CH3i_Progress_wait(MPID_Progress_state * progress_state)
 	/* --BEGIN ERROR HANDLING-- */
 	if (mpi_errno != MPI_SUCCESS)
 	{
-	    MPIU_Assert(MPIR_ERR_GET_CLASS(mpi_errno) != MPIDU_SOCK_ERR_TIMEOUT);
+	    MPIR_Assert(MPIR_ERR_GET_CLASS(mpi_errno) != MPIDU_SOCK_ERR_TIMEOUT);
 	    MPIR_ERR_SET(mpi_errno,MPI_ERR_OTHER,"**progress_sock_wait");
 	    goto fn_fail;
 	}
@@ -268,7 +268,7 @@ static int MPIDI_CH3i_Progress_wait(MPID_Progress_state * progress_state)
      */
     progress_state->ch.completion_count = MPIDI_CH3I_progress_completion_count;
     
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_WAIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_WAIT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -309,17 +309,17 @@ int MPIDI_CH3I_Progress_init(void)
 {
     int i;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_INIT);
 
     MPIR_THREAD_CHECK_BEGIN;
     /* FIXME should be appropriately abstracted somehow */
-#   if defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL)
+#   if defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL)
     {
         int err;
 	MPID_Thread_cond_create(&MPIDI_CH3I_progress_completion_cond, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
     }
 #   endif
     MPIR_THREAD_CHECK_END;
@@ -350,7 +350,7 @@ int MPIDI_CH3I_Progress_init(void)
     }
     
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_INIT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -367,9 +367,9 @@ int MPIDI_CH3I_Progress_finalize(void)
     int mpi_errno;
     MPIDI_CH3I_Connection_t *conn = NULL;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_FINALIZE);
 
     /* Shut down the listener */
     mpi_errno = MPIDU_CH3I_ShutdownListener();
@@ -399,17 +399,17 @@ int MPIDI_CH3I_Progress_finalize(void)
 
     MPIR_THREAD_CHECK_BEGIN;
     /* FIXME should be appropriately abstracted somehow */
-#   if defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL)
+#   if defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL)
     {
         int err;
 	MPID_Thread_cond_destroy(&MPIDI_CH3I_progress_completion_cond, &err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
     }
 #   endif
     MPIR_THREAD_CHECK_END;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_FINALIZE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -446,9 +446,9 @@ int MPIDI_CH3_Get_business_card(int myRank, char *value, int length)
 static int MPIDI_CH3I_Progress_handle_sock_event(MPIDU_Sock_event_t * event)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_HANDLE_SOCK_EVENT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_HANDLE_SOCK_EVENT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_HANDLE_SOCK_EVENT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_HANDLE_SOCK_EVENT);
 
     MPL_DBG_MSG_D(MPIDI_CH3_DBG_OTHER,VERBOSE,"Socket event of type %d", event->op_type );
 
@@ -487,14 +487,14 @@ static int MPIDI_CH3I_Progress_handle_sock_event(MPIDU_Sock_event_t * event)
 		if (conn->recv_active == NULL)
 		{
                     intptr_t buflen = sizeof (MPIDI_CH3_Pkt_t);
-		    MPIU_Assert(conn->pkt.type < MPIDI_CH3_PKT_END_CH3);
+		    MPIR_Assert(conn->pkt.type < MPIDI_CH3_PKT_END_CH3);
                     
 		    mpi_errno = pktArray[conn->pkt.type]( conn->vc, &conn->pkt,
 							  &buflen, &rreq );
 		    if (mpi_errno != MPI_SUCCESS) {
 			MPIR_ERR_POP(mpi_errno);
 		    }
-                    MPIU_Assert(buflen == sizeof (MPIDI_CH3_Pkt_t));
+                    MPIR_Assert(buflen == sizeof (MPIDI_CH3_Pkt_t));
 
 		    if (rreq == NULL)
 		    {
@@ -521,7 +521,7 @@ static int MPIDI_CH3I_Progress_handle_sock_event(MPIDU_Sock_event_t * event)
 
 		    reqFn = rreq->dev.OnDataAvail;
 		    if (!reqFn) {
-			MPIU_Assert(MPIDI_Request_get_type(rreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
+			MPIR_Assert(MPIDI_Request_get_type(rreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
                         mpi_errno = MPID_Request_complete(rreq);
                         if (mpi_errno != MPI_SUCCESS) {
                             MPIR_ERR_POP(mpi_errno);
@@ -583,7 +583,7 @@ static int MPIDI_CH3I_Progress_handle_sock_event(MPIDU_Sock_event_t * event)
 
 		reqFn = sreq->dev.OnDataAvail;
 		if (!reqFn) {
-		    MPIU_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
+		    MPIR_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
                     mpi_errno = MPID_Request_complete(sreq);
                     if (mpi_errno != MPI_SUCCESS) {
                         MPIR_ERR_POP(mpi_errno);
@@ -631,7 +631,7 @@ static int MPIDI_CH3I_Progress_handle_sock_event(MPIDU_Sock_event_t * event)
 			{
 			    reqFn = sreq->dev.OnDataAvail;
 			    if (!reqFn) {
-				MPIU_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
+				MPIR_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
                                 mpi_errno = MPID_Request_complete(sreq);
                                 if (mpi_errno != MPI_SUCCESS) {
                                     MPIR_ERR_POP(mpi_errno);
@@ -715,7 +715,7 @@ static int MPIDI_CH3I_Progress_handle_sock_event(MPIDU_Sock_event_t * event)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_HANDLE_SOCK_EVENT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_HANDLE_SOCK_EVENT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -737,7 +737,7 @@ static int MPIDI_CH3I_Progress_delay(unsigned int completion_count)
     int mpi_errno = MPI_SUCCESS, err;
     
     /* FIXME should be appropriately abstracted somehow */
-#   if defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL)
+#   if defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL)
     {
 	while (completion_count == MPIDI_CH3I_progress_completion_count)
 	{
@@ -762,7 +762,7 @@ static int MPIDI_CH3I_Progress_continue(unsigned int completion_count)
 
     MPIR_THREAD_CHECK_BEGIN;
     /* FIXME should be appropriately abstracted somehow */
-#   if defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL)
+#   if defined(MPICH_IS_THREADED) && (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL)
     {
         MPID_Thread_cond_broadcast(&MPIDI_CH3I_progress_completion_cond,&err);
     }
@@ -797,10 +797,10 @@ static inline int connection_pop_sendq_req(MPIDI_CH3I_Connection_t * conn)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIDI_CH3I_VC *vcch = &conn->vc->ch;
-    MPIDI_STATE_DECL(MPID_STATE_CONNECTION_POP_SENDQ_REQ);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONNECTION_POP_SENDQ_REQ);
 
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CONNECTION_POP_SENDQ_REQ);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONNECTION_POP_SENDQ_REQ);
     /* post send of next request on the send queue */
 
     /* FIXME: Is dequeue/get next the operation we really want? */
@@ -816,7 +816,7 @@ static inline int connection_pop_sendq_req(MPIDI_CH3I_Connection_t * conn)
     }
     
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_CONNECTION_POP_SENDQ_REQ);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONNECTION_POP_SENDQ_REQ);
     return mpi_errno;
 }
 
@@ -829,16 +829,16 @@ static inline int connection_pop_sendq_req(MPIDI_CH3I_Connection_t * conn)
 static inline int connection_post_recv_pkt(MPIDI_CH3I_Connection_t * conn)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_CONNECTION_POST_RECV_PKT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONNECTION_POST_RECV_PKT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CONNECTION_POST_RECV_PKT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONNECTION_POST_RECV_PKT);
 
     mpi_errno = MPIDU_Sock_post_read(conn->sock, &conn->pkt, sizeof(conn->pkt), sizeof(conn->pkt), NULL);
     if (mpi_errno != MPI_SUCCESS) {
 	MPIR_ERR_SET(mpi_errno,MPI_ERR_OTHER, "**fail");
     }
     
-    MPIDI_FUNC_EXIT(MPID_STATE_CONNECTION_POST_RECV_PKT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONNECTION_POST_RECV_PKT);
     return mpi_errno;
 }
 
@@ -908,7 +908,7 @@ static int ReadMoreData( MPIDI_CH3I_Connection_t * conn, MPIR_Request *rreq )
 	    
 	    reqFn = rreq->dev.OnDataAvail;
 	    if (!reqFn) {
-		MPIU_Assert(MPIDI_Request_get_type(rreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
+		MPIR_Assert(MPIDI_Request_get_type(rreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
                 mpi_errno = MPID_Request_complete(rreq);
                 if (mpi_errno != MPI_SUCCESS) {
                     MPIR_ERR_POP(mpi_errno);
@@ -973,9 +973,9 @@ int MPIDI_CH3I_Progress_register_hook(int (*progress_fn)(int*), int *id)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_REGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_REGISTER_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_REGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_REGISTER_HOOK);
     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 
     for (i = 0; i < MAX_PROGRESS_HOOKS; i++) {
@@ -996,7 +996,7 @@ int MPIDI_CH3I_Progress_register_hook(int (*progress_fn)(int*), int *id)
 
   fn_exit:
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_REGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_REGISTER_HOOK);
     return mpi_errno;
 
   fn_fail:
@@ -1010,19 +1010,19 @@ int MPIDI_CH3I_Progress_register_hook(int (*progress_fn)(int*), int *id)
 int MPIDI_CH3I_Progress_deregister_hook(int id)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_DEREGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_DEREGISTER_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_DEREGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_DEREGISTER_HOOK);
     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 
-    MPIU_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS && progress_hooks[id].func_ptr != NULL);
+    MPIR_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS && progress_hooks[id].func_ptr != NULL);
 
     progress_hooks[id].func_ptr = NULL;
     progress_hooks[id].active = FALSE;
 
   fn_exit:
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_DEREGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_DEREGISTER_HOOK);
     return mpi_errno;
 
   fn_fail:
@@ -1036,18 +1036,18 @@ int MPIDI_CH3I_Progress_deregister_hook(int id)
 int MPIDI_CH3I_Progress_activate_hook(int id)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 
-    MPIU_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS &&
+    MPIR_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS &&
                 progress_hooks[id].active == FALSE && progress_hooks[id].func_ptr != NULL);
     progress_hooks[id].active = TRUE;
 
   fn_exit:
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
     return mpi_errno;
 
   fn_fail:
@@ -1062,18 +1062,18 @@ int MPIDI_CH3I_Progress_activate_hook(int id)
 int MPIDI_CH3I_Progress_deactivate_hook(int id)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 
-    MPIU_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS &&
+    MPIR_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS &&
                 progress_hooks[id].active == TRUE && progress_hooks[id].func_ptr != NULL);
     progress_hooks[id].active = FALSE;
 
   fn_exit:
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpid/ch3/channels/sock/src/ch3_win_fns.c b/src/mpid/ch3/channels/sock/src/ch3_win_fns.c
index fe13d30..b9f8da8 100644
--- a/src/mpid/ch3/channels/sock/src/ch3_win_fns.c
+++ b/src/mpid/ch3/channels/sock/src/ch3_win_fns.c
@@ -15,13 +15,13 @@
 int MPIDI_CH3_Win_fns_init(MPIDI_CH3U_Win_fns_t *win_fns)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_FNS_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_FNS_INIT);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3_WIN_FNS_INIT);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3_WIN_FNS_INIT);
 
     /* Sock doesn't override any of the default Window functions */
 
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3_WIN_FNS_INIT);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3_WIN_FNS_INIT);
 
     return mpi_errno;
 }
@@ -33,13 +33,13 @@ int MPIDI_CH3_Win_fns_init(MPIDI_CH3U_Win_fns_t *win_fns)
 int MPIDI_CH3_Win_hooks_init(MPIDI_CH3U_Win_hooks_t *win_hooks)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_HOOKS_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_HOOKS_INIT);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3_WIN_HOOKS_INIT);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3_WIN_HOOKS_INIT);
 
     /* Sock doesn't implement any of the Window hooks */
 
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3_WIN_HOOKS_INIT);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3_WIN_HOOKS_INIT);
 
     return mpi_errno;
 }
@@ -52,14 +52,14 @@ int MPIDI_CH3_Win_pkt_orderings_init(MPIDI_CH3U_Win_pkt_ordering_t * win_pkt_ord
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
 
     /* Guarantees ordered AM flush. */
     win_pkt_orderings->am_flush_ordered = 1;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/include/mpid_rma_issue.h b/src/mpid/ch3/include/mpid_rma_issue.h
index e4c855c..1b314c4 100644
--- a/src/mpid/ch3/include/mpid_rma_issue.h
+++ b/src/mpid/ch3/include/mpid_rma_issue.h
@@ -23,9 +23,9 @@
 static inline int immed_copy(void *src, void *dest, size_t len)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_IMMED_COPY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_IMMED_COPY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_IMMED_COPY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_IMMED_COPY);
 
     if (src == NULL || dest == NULL || len == 0)
         goto fn_exit;
@@ -44,11 +44,11 @@ static inline int immed_copy(void *src, void *dest, size_t len)
         *(uint64_t *) dest = *(uint64_t *) src;
         break;
     default:
-        MPIU_Memcpy(dest, (void *) src, len);
+        MPIR_Memcpy(dest, (void *) src, len);
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_IMMED_COPY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_IMMED_COPY);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -66,8 +66,8 @@ static inline int immed_copy(void *src, void *dest, size_t len)
 static inline void fill_in_derived_dtp_info(MPIDI_RMA_dtype_info * dtype_info, void *dataloop,
                                             MPIDU_Datatype* dtp)
 {
-    MPIDI_STATE_DECL(MPID_STATE_FILL_IN_DERIVED_DTP_INFO);
-    MPIDI_FUNC_ENTER(MPID_STATE_FILL_IN_DERIVED_DTP_INFO);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_FILL_IN_DERIVED_DTP_INFO);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_FILL_IN_DERIVED_DTP_INFO);
 
     /* Derived datatype on target, fill derived datatype info. */
     dtype_info->is_contig = dtp->is_contig;
@@ -85,13 +85,13 @@ static inline void fill_in_derived_dtp_info(MPIDI_RMA_dtype_info * dtype_info, v
     dtype_info->has_sticky_ub = dtp->has_sticky_ub;
     dtype_info->has_sticky_lb = dtp->has_sticky_lb;
 
-    MPIU_Assert(dataloop != NULL);
-    MPIU_Memcpy(dataloop, dtp->dataloop, dtp->dataloop_size);
+    MPIR_Assert(dataloop != NULL);
+    MPIR_Memcpy(dataloop, dtp->dataloop, dtp->dataloop_size);
     /* The dataloop can have undefined padding sections, so we need to let
      * valgrind know that it is OK to pass this data to writev later on. */
     MPL_VG_MAKE_MEM_DEFINED(dataloop, dtp->dataloop_size);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_FILL_IN_DERIVED_DTP_INFO);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_FILL_IN_DERIVED_DTP_INFO);
 }
 
 /* Set extended header for ACC operation and return its real size. */
@@ -107,8 +107,8 @@ static int init_accum_ext_pkt(MPIDI_CH3_Pkt_flags_t flags,
     void *dataloop_ptr = NULL;
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_INIT_ACCUM_EXT_PKT);
-    MPIDI_FUNC_ENTER(MPID_STATE_INIT_ACCUM_EXT_PKT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_INIT_ACCUM_EXT_PKT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_INIT_ACCUM_EXT_PKT);
 
     if ((flags & MPIDI_CH3_PKT_FLAG_RMA_STREAM) && target_dtp != NULL) {
         MPIDI_CH3_Ext_pkt_accum_stream_derived_t *_ext_hdr_ptr = NULL;
@@ -171,7 +171,7 @@ static int init_accum_ext_pkt(MPIDI_CH3_Pkt_flags_t flags,
     (*ext_hdr_sz) = _total_sz;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_INIT_ACCUM_EXT_PKT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_INIT_ACCUM_EXT_PKT);
     return mpi_errno;
   fn_fail:
     if ((*ext_hdr_ptr))
@@ -191,20 +191,20 @@ static int init_get_accum_ext_pkt(MPIDI_CH3_Pkt_flags_t flags,
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_INIT_GET_ACCUM_EXT_PKT);
-    MPIDI_FUNC_ENTER(MPID_STATE_INIT_GET_ACCUM_EXT_PKT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_INIT_GET_ACCUM_EXT_PKT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_INIT_GET_ACCUM_EXT_PKT);
 
     /* Check if get_accum still reuses accum' extended packet header. */
-    MPIU_Assert(sizeof(MPIDI_CH3_Ext_pkt_accum_stream_derived_t) ==
+    MPIR_Assert(sizeof(MPIDI_CH3_Ext_pkt_accum_stream_derived_t) ==
                 sizeof(MPIDI_CH3_Ext_pkt_get_accum_stream_derived_t));
-    MPIU_Assert(sizeof(MPIDI_CH3_Ext_pkt_accum_derived_t) ==
+    MPIR_Assert(sizeof(MPIDI_CH3_Ext_pkt_accum_derived_t) ==
                 sizeof(MPIDI_CH3_Ext_pkt_get_accum_derived_t));
-    MPIU_Assert(sizeof(MPIDI_CH3_Ext_pkt_accum_stream_t) ==
+    MPIR_Assert(sizeof(MPIDI_CH3_Ext_pkt_accum_stream_t) ==
                 sizeof(MPIDI_CH3_Ext_pkt_get_accum_stream_t));
 
     mpi_errno = init_accum_ext_pkt(flags, target_dtp, stream_offset, ext_hdr_ptr, ext_hdr_sz);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_INIT_GET_ACCUM_EXT_PKT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_INIT_GET_ACCUM_EXT_PKT);
     return mpi_errno;
 }
 
@@ -233,9 +233,9 @@ static int issue_from_origin_buffer(MPIDI_RMA_Op_t * rma_op, MPIDI_VC_t * vc,
     MPIDI_CH3_Pkt_flags_t flags;
     int is_empty_origin = FALSE;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_ISSUE_FROM_ORIGIN_BUFFER);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_ISSUE_FROM_ORIGIN_BUFFER);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_ISSUE_FROM_ORIGIN_BUFFER);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_ISSUE_FROM_ORIGIN_BUFFER);
 
     /* Judge if origin buffer is empty (this can only happens for
      * GACC and FOP when op is MPI_NO_OP). */
@@ -317,7 +317,7 @@ static int issue_from_origin_buffer(MPIDI_RMA_Op_t * rma_op, MPIDI_VC_t * vc,
     req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
     MPIR_ERR_CHKANDJUMP(req == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");
 
-    MPIU_Object_set_ref(req, 2);
+    MPIR_Object_set_ref(req, 2);
     req->kind = MPIR_REQUEST_KIND__SEND;
 
     /* set extended packet header, it is freed when the request is freed.  */
@@ -373,7 +373,7 @@ static int issue_from_origin_buffer(MPIDI_RMA_Op_t * rma_op, MPIDI_VC_t * vc,
         MPIDU_Datatype_release(target_dtp);
     (*req_ptr) = req;
 
-    MPIDI_FUNC_EXIT(MPID_STATE_ISSUE_FROM_ORIGIN_BUFFER);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_ISSUE_FROM_ORIGIN_BUFFER);
     return mpi_errno;
   fn_fail:
     if (req) {
@@ -406,9 +406,9 @@ static int issue_put_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
     MPIDI_CH3_Ext_pkt_put_derived_t *ext_hdr_ptr = NULL;
     MPI_Aint ext_hdr_sz = 0;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_ISSUE_PUT_OP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_ISSUE_PUT_OP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_ISSUE_PUT_OP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_ISSUE_PUT_OP);
 
     put_pkt->flags |= flags;
 
@@ -460,7 +460,7 @@ static int issue_put_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_ISSUE_PUT_OP);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_ISSUE_PUT_OP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -493,9 +493,9 @@ static int issue_acc_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
     void *ext_hdr_ptr = NULL;
     MPI_Aint ext_hdr_sz = 0;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_ISSUE_ACC_OP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_ISSUE_ACC_OP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_ISSUE_ACC_OP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_ISSUE_ACC_OP);
 
     MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
 
@@ -511,7 +511,7 @@ static int issue_acc_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
         MPIR_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
 
         if (curr_req != NULL) {
-            MPIU_Assert(rma_op->reqs_size == 0 && rma_op->single_req == NULL);
+            MPIR_Assert(rma_op->reqs_size == 0 && rma_op->single_req == NULL);
 
             rma_op->reqs_size = 1;
             rma_op->single_req = curr_req;
@@ -531,18 +531,18 @@ static int issue_acc_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
     }
     else {
         MPIDU_Datatype_get_ptr(rma_op->origin_datatype, origin_dtp_ptr);
-        MPIU_Assert(origin_dtp_ptr != NULL && origin_dtp_ptr->basic_type != MPI_DATATYPE_NULL);
+        MPIR_Assert(origin_dtp_ptr != NULL && origin_dtp_ptr->basic_type != MPI_DATATYPE_NULL);
         MPIDU_Datatype_get_size_macro(origin_dtp_ptr->basic_type, predefined_dtp_size);
         predefined_dtp_count = total_len / predefined_dtp_size;
         MPIDU_Datatype_get_extent_macro(origin_dtp_ptr->basic_type, predefined_dtp_extent);
     }
-    MPIU_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 && predefined_dtp_extent > 0);
+    MPIR_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 && predefined_dtp_extent > 0);
 
     /* Calculate number of predefined elements in each stream unit, and
      * total number of stream units. */
     stream_elem_count = MPIDI_CH3U_Acc_stream_size / predefined_dtp_extent;
     stream_unit_count = (predefined_dtp_count - 1) / stream_elem_count + 1;
-    MPIU_Assert(stream_elem_count > 0 && stream_unit_count > 0);
+    MPIR_Assert(stream_elem_count > 0 && stream_unit_count > 0);
 
     /* If there are more than one stream unit, mark the current packet
      * as stream packet */
@@ -554,7 +554,7 @@ static int issue_acc_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
         MPIDU_Datatype_get_ptr(accum_pkt->datatype, target_dtp_ptr);
 
     rest_len = total_len;
-    MPIU_Assert(rma_op->issued_stream_count >= 0);
+    MPIR_Assert(rma_op->issued_stream_count >= 0);
     for (j = 0; j < stream_unit_count; j++) {
         intptr_t stream_offset, stream_size;
         MPIR_Request *curr_req = NULL;
@@ -588,7 +588,7 @@ static int issue_acc_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
 
         if (curr_req != NULL) {
             if (rma_op->reqs_size == 0) {
-                MPIU_Assert(rma_op->single_req == NULL && rma_op->multi_reqs == NULL);
+                MPIR_Assert(rma_op->single_req == NULL && rma_op->multi_reqs == NULL);
                 rma_op->reqs_size = stream_unit_count;
 
                 if (stream_unit_count > 1) {
@@ -611,7 +611,7 @@ static int issue_acc_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
             accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE) {
             /* if piggybacked with LOCK flag, we
              * only issue the first streaming unit */
-            MPIU_Assert(j == 0);
+            MPIR_Assert(j == 0);
             break;
         }
     }   /* end of for loop */
@@ -622,7 +622,7 @@ static int issue_acc_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_ISSUE_ACC_OP);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_ISSUE_ACC_OP);
     return mpi_errno;
   fn_fail:
     if (rma_op->reqs_size == 1) {
@@ -659,9 +659,9 @@ static int issue_get_acc_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
     void *ext_hdr_ptr = NULL;
     MPI_Aint ext_hdr_sz = 0;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_ISSUE_GET_ACC_OP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_ISSUE_GET_ACC_OP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_ISSUE_GET_ACC_OP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_ISSUE_GET_ACC_OP);
 
     MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
 
@@ -679,7 +679,7 @@ static int issue_get_acc_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
         resp_req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
         MPIR_ERR_CHKANDJUMP(resp_req == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");
 
-        MPIU_Object_set_ref(resp_req, 2);
+        MPIR_Object_set_ref(resp_req, 2);
 
         resp_req->dev.user_buf = rma_op->result_addr;
         resp_req->dev.user_count = rma_op->result_count;
@@ -717,18 +717,18 @@ static int issue_get_acc_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
     }
     else {
         MPIDU_Datatype_get_ptr(get_accum_pkt->datatype, target_dtp_ptr);
-        MPIU_Assert(target_dtp_ptr != NULL && target_dtp_ptr->basic_type != MPI_DATATYPE_NULL);
+        MPIR_Assert(target_dtp_ptr != NULL && target_dtp_ptr->basic_type != MPI_DATATYPE_NULL);
         MPIDU_Datatype_get_size_macro(target_dtp_ptr->basic_type, predefined_dtp_size);
         predefined_dtp_count = total_len / predefined_dtp_size;
         MPIDU_Datatype_get_extent_macro(target_dtp_ptr->basic_type, predefined_dtp_extent);
     }
-    MPIU_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 && predefined_dtp_extent > 0);
+    MPIR_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 && predefined_dtp_extent > 0);
 
     /* Calculate number of predefined elements in each stream unit, and
      * total number of stream units. */
     stream_elem_count = MPIDI_CH3U_Acc_stream_size / predefined_dtp_extent;
     stream_unit_count = (predefined_dtp_count - 1) / stream_elem_count + 1;
-    MPIU_Assert(stream_elem_count > 0 && stream_unit_count > 0);
+    MPIR_Assert(stream_elem_count > 0 && stream_unit_count > 0);
 
     /* If there are more than one stream unit, mark the current packet
      * as stream packet */
@@ -746,7 +746,7 @@ static int issue_get_acc_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
             rma_op->multi_reqs[i] = NULL;
     }
 
-    MPIU_Assert(rma_op->issued_stream_count >= 0);
+    MPIR_Assert(rma_op->issued_stream_count >= 0);
 
     for (j = 0; j < stream_unit_count; j++) {
         intptr_t stream_offset, stream_size;
@@ -774,7 +774,7 @@ static int issue_get_acc_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
         resp_req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
         MPIR_ERR_CHKANDJUMP(resp_req == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");
 
-        MPIU_Object_set_ref(resp_req, 2);
+        MPIR_Object_set_ref(resp_req, 2);
 
         resp_req->dev.user_buf = rma_op->result_addr;
         resp_req->dev.user_count = rma_op->result_count;
@@ -828,7 +828,7 @@ static int issue_get_acc_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
             get_accum_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE) {
             /* if piggybacked with LOCK flag, we
              * only issue the first streaming unit */
-            MPIU_Assert(j == 0);
+            MPIR_Assert(j == 0);
             break;
         }
     }   /* end of for loop */
@@ -839,7 +839,7 @@ static int issue_get_acc_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_ISSUE_GET_ACC_OP);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_ISSUE_GET_ACC_OP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -889,9 +889,9 @@ static int issue_get_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
     MPIDI_CH3_Ext_pkt_get_derived_t *ext_hdr_ptr = NULL;
     MPI_Aint ext_hdr_sz = 0;
     MPL_IOV iov[MPL_IOV_LIMIT];
-    MPIDI_STATE_DECL(MPID_STATE_ISSUE_GET_OP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_ISSUE_GET_OP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_ISSUE_GET_OP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_ISSUE_GET_OP);
 
     rma_op->reqs_size = 1;
 
@@ -904,7 +904,7 @@ static int issue_get_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**nomemreq");
     }
 
-    MPIU_Object_set_ref(curr_req, 2);
+    MPIR_Object_set_ref(curr_req, 2);
 
     curr_req->dev.user_buf = rma_op->origin_addr;
     curr_req->dev.user_count = rma_op->origin_count;
@@ -988,7 +988,7 @@ static int issue_get_op(MPIDI_RMA_Op_t * rma_op, MPIR_Win * win_ptr,
     rma_op->single_req = curr_req;
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_ISSUE_GET_OP);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_ISSUE_GET_OP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1013,9 +1013,9 @@ static int issue_cas_op(MPIDI_RMA_Op_t * rma_op,
     MPIR_Request *rmw_req = NULL;
     MPIR_Request *curr_req = NULL;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_ISSUE_CAS_OP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_ISSUE_CAS_OP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_ISSUE_CAS_OP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_ISSUE_CAS_OP);
 
     rma_op->reqs_size = 1;
 
@@ -1027,7 +1027,7 @@ static int issue_cas_op(MPIDI_RMA_Op_t * rma_op,
 
     /* Set refs on the request to 2: one for the response message, and one for
      * the partial completion handler */
-    MPIU_Object_set_ref(curr_req, 2);
+    MPIR_Object_set_ref(curr_req, 2);
 
     curr_req->dev.user_buf = rma_op->result_addr;
     curr_req->dev.datatype = rma_op->result_datatype;
@@ -1051,7 +1051,7 @@ static int issue_cas_op(MPIDI_RMA_Op_t * rma_op,
     rma_op->single_req = curr_req;
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_ISSUE_CAS_OP);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_ISSUE_CAS_OP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1076,9 +1076,9 @@ static int issue_fop_op(MPIDI_RMA_Op_t * rma_op,
     MPIR_Request *resp_req = NULL;
     MPIR_Request *curr_req = NULL;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_ISSUE_FOP_OP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_ISSUE_FOP_OP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_ISSUE_FOP_OP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_ISSUE_FOP_OP);
 
     rma_op->reqs_size = 1;
 
@@ -1088,7 +1088,7 @@ static int issue_fop_op(MPIDI_RMA_Op_t * rma_op,
     resp_req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
     MPIR_ERR_CHKANDJUMP(resp_req == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");
 
-    MPIU_Object_set_ref(resp_req, 2);
+    MPIR_Object_set_ref(resp_req, 2);
 
     resp_req->dev.user_buf = rma_op->result_addr;
     resp_req->dev.datatype = rma_op->result_datatype;
@@ -1124,7 +1124,7 @@ static int issue_fop_op(MPIDI_RMA_Op_t * rma_op,
     rma_op->single_req = resp_req;
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_ISSUE_FOP_OP);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_ISSUE_FOP_OP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1145,9 +1145,9 @@ static inline int issue_rma_op(MPIDI_RMA_Op_t * op_ptr, MPIR_Win * win_ptr,
                                MPIDI_RMA_Target_t * target_ptr, MPIDI_CH3_Pkt_flags_t flags)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_ISSUE_RMA_OP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_ISSUE_RMA_OP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_ISSUE_RMA_OP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_ISSUE_RMA_OP);
 
     switch (op_ptr->pkt.type) {
     case (MPIDI_CH3_PKT_PUT):
@@ -1180,7 +1180,7 @@ static inline int issue_rma_op(MPIDI_RMA_Op_t * op_ptr, MPIR_Win * win_ptr,
         MPIR_ERR_POP(mpi_errno);
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_ISSUE_RMA_OP);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_ISSUE_RMA_OP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
diff --git a/src/mpid/ch3/include/mpid_rma_lockqueue.h b/src/mpid/ch3/include/mpid_rma_lockqueue.h
index cd3bb22..b7aaab6 100644
--- a/src/mpid/ch3/include/mpid_rma_lockqueue.h
+++ b/src/mpid/ch3/include/mpid_rma_lockqueue.h
@@ -33,7 +33,7 @@ static inline MPIDI_RMA_Target_lock_entry_t *MPIDI_CH3I_Win_target_lock_entry_al
 
     if (new_ptr != NULL) {
         new_ptr->next = NULL;
-        MPIU_Memcpy(&(new_ptr->pkt), pkt, sizeof(*pkt));
+        MPIR_Memcpy(&(new_ptr->pkt), pkt, sizeof(*pkt));
         new_ptr->vc = NULL;
         new_ptr->data = NULL;
         new_ptr->buf_size = 0;
diff --git a/src/mpid/ch3/include/mpid_rma_oplist.h b/src/mpid/ch3/include/mpid_rma_oplist.h
index 8b7ac02..767bcc7 100644
--- a/src/mpid/ch3/include/mpid_rma_oplist.h
+++ b/src/mpid/ch3/include/mpid_rma_oplist.h
@@ -282,8 +282,8 @@ static inline int MPIDI_CH3I_Win_target_free(MPIR_Win * win_ptr, MPIDI_RMA_Targe
     /* We enqueue elements to the right pool, so when they get freed
      * at window free time, they won't conflict with the global pool
      * or other windows */
-    MPIU_Assert(e->pending_net_ops_list_head == NULL);
-    MPIU_Assert(e->pending_user_ops_list_head == NULL);
+    MPIR_Assert(e->pending_net_ops_list_head == NULL);
+    MPIR_Assert(e->pending_user_ops_list_head == NULL);
 
     /* use PREPEND when return objects back to the pool
      * in order to improve cache performance */
@@ -539,7 +539,7 @@ static inline void MPIDI_CH3I_RMA_Ops_free_elem(MPIR_Win * win_ptr, MPIDI_RMA_Op
 {
     MPIDI_RMA_Op_t *tmp_ptr = curr_ptr;
 
-    MPIU_Assert(curr_ptr != NULL);
+    MPIR_Assert(curr_ptr != NULL);
 
     MPL_DL_DELETE(*list, curr_ptr);
 
diff --git a/src/mpid/ch3/include/mpid_rma_shm.h b/src/mpid/ch3/include/mpid_rma_shm.h
index 6679636..a33ddda 100644
--- a/src/mpid/ch3/include/mpid_rma_shm.h
+++ b/src/mpid/ch3/include/mpid_rma_shm.h
@@ -259,13 +259,13 @@ static inline int MPIDI_CH3I_Shm_put_op(const void *origin_addr, int origin_coun
     int mpi_errno = MPI_SUCCESS;
     void *base = NULL;
     int disp_unit;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_PUT_OP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_PUT_OP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SHM_PUT_OP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_PUT_OP);
 
     if (win_ptr->shm_allocated == TRUE) {
         int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
-        MPIU_Assert(local_target_rank >= 0);
+        MPIR_Assert(local_target_rank >= 0);
         base = win_ptr->shm_base_addrs[local_target_rank];
         disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
     }
@@ -281,7 +281,7 @@ static inline int MPIDI_CH3I_Shm_put_op(const void *origin_addr, int origin_coun
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SHM_PUT_OP);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_PUT_OP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -309,13 +309,13 @@ static inline int MPIDI_CH3I_Shm_acc_op(const void *origin_addr, int origin_coun
     MPI_Aint total_len, rest_len;
     MPI_Aint origin_dtp_size;
     MPIDU_Datatype*origin_dtp_ptr = NULL;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_ACC_OP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_ACC_OP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SHM_ACC_OP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_ACC_OP);
 
     if (win_ptr->shm_allocated == TRUE) {
         int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
-        MPIU_Assert(local_target_rank >= 0);
+        MPIR_Assert(local_target_rank >= 0);
         shm_op = 1;
         base = win_ptr->shm_base_addrs[local_target_rank];
         disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
@@ -347,16 +347,16 @@ static inline int MPIDI_CH3I_Shm_acc_op(const void *origin_addr, int origin_coun
     total_len = origin_dtp_size * origin_count;
 
     MPIDU_Datatype_get_ptr(origin_datatype, origin_dtp_ptr);
-    MPIU_Assert(origin_dtp_ptr != NULL && origin_dtp_ptr->basic_type != MPI_DATATYPE_NULL);
+    MPIR_Assert(origin_dtp_ptr != NULL && origin_dtp_ptr->basic_type != MPI_DATATYPE_NULL);
     basic_type = origin_dtp_ptr->basic_type;
     MPIDU_Datatype_get_size_macro(basic_type, predefined_dtp_size);
     predefined_dtp_count = total_len / predefined_dtp_size;
     MPIDU_Datatype_get_extent_macro(basic_type, predefined_dtp_extent);
-    MPIU_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 && predefined_dtp_extent > 0);
+    MPIR_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 && predefined_dtp_extent > 0);
 
     stream_elem_count = MPIDI_CH3U_Acc_stream_size / predefined_dtp_extent;
     stream_unit_count = (predefined_dtp_count - 1) / stream_elem_count + 1;
-    MPIU_Assert(stream_elem_count > 0 && stream_unit_count > 0);
+    MPIR_Assert(stream_elem_count > 0 && stream_unit_count > 0);
 
     rest_len = total_len;
     for (i = 0; i < stream_unit_count; i++) {
@@ -399,7 +399,7 @@ static inline int MPIDI_CH3I_Shm_acc_op(const void *origin_addr, int origin_coun
             MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
         }
 
-        MPIU_Assert(stream_count == (int) stream_count);
+        MPIR_Assert(stream_count == (int) stream_count);
         mpi_errno = do_accumulate_op((void *) packed_buf, (int) stream_count, basic_type,
                                      (void *) ((char *) base + disp_unit * target_disp),
                                      target_count, target_datatype, stream_offset, op);
@@ -415,7 +415,7 @@ static inline int MPIDI_CH3I_Shm_acc_op(const void *origin_addr, int origin_coun
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SHM_ACC_OP);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_ACC_OP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -446,9 +446,9 @@ static inline int MPIDI_CH3I_Shm_get_acc_op(const void *origin_addr, int origin_
     MPIDU_Datatype*origin_dtp_ptr = NULL;
     int is_empty_origin = FALSE;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_GET_ACC_OP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_GET_ACC_OP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SHM_GET_ACC_OP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_GET_ACC_OP);
 
     /* Judge if origin buffer is empty */
     if (op == MPI_NO_OP)
@@ -456,7 +456,7 @@ static inline int MPIDI_CH3I_Shm_get_acc_op(const void *origin_addr, int origin_
 
     if (win_ptr->shm_allocated == TRUE) {
         int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
-        MPIU_Assert(local_target_rank >= 0);
+        MPIR_Assert(local_target_rank >= 0);
         base = win_ptr->shm_base_addrs[local_target_rank];
         disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
         MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
@@ -494,16 +494,16 @@ static inline int MPIDI_CH3I_Shm_get_acc_op(const void *origin_addr, int origin_
     total_len = origin_dtp_size * origin_count;
 
     MPIDU_Datatype_get_ptr(origin_datatype, origin_dtp_ptr);
-    MPIU_Assert(origin_dtp_ptr != NULL && origin_dtp_ptr->basic_type != MPI_DATATYPE_NULL);
+    MPIR_Assert(origin_dtp_ptr != NULL && origin_dtp_ptr->basic_type != MPI_DATATYPE_NULL);
     basic_type = origin_dtp_ptr->basic_type;
     MPIDU_Datatype_get_size_macro(basic_type, predefined_dtp_size);
     predefined_dtp_count = total_len / predefined_dtp_size;
     MPIDU_Datatype_get_extent_macro(basic_type, predefined_dtp_extent);
-    MPIU_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 && predefined_dtp_extent > 0);
+    MPIR_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 && predefined_dtp_extent > 0);
 
     stream_elem_count = MPIDI_CH3U_Acc_stream_size / predefined_dtp_extent;
     stream_unit_count = (predefined_dtp_count - 1) / stream_elem_count + 1;
-    MPIU_Assert(stream_elem_count > 0 && stream_unit_count > 0);
+    MPIR_Assert(stream_elem_count > 0 && stream_unit_count > 0);
 
     rest_len = total_len;
     for (i = 0; i < stream_unit_count; i++) {
@@ -542,7 +542,7 @@ static inline int MPIDI_CH3I_Shm_get_acc_op(const void *origin_addr, int origin_
             packed_buf = tmpbuf;
         }
 
-        MPIU_Assert(stream_count == (int) stream_count);
+        MPIR_Assert(stream_count == (int) stream_count);
         mpi_errno = do_accumulate_op((void *) packed_buf, (int) stream_count, basic_type,
                                      (void *) ((char *) base + disp_unit * target_disp),
                                      target_count, target_datatype, stream_offset, op);
@@ -559,7 +559,7 @@ static inline int MPIDI_CH3I_Shm_get_acc_op(const void *origin_addr, int origin_
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SHM_GET_ACC_OP);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_GET_ACC_OP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -583,13 +583,13 @@ static inline int MPIDI_CH3I_Shm_get_op(void *origin_addr, int origin_count,
     void *base = NULL;
     int disp_unit;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_GET_OP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_GET_OP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SHM_GET_OP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_GET_OP);
 
     if (win_ptr->shm_allocated == TRUE) {
         int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
-        MPIU_Assert(local_target_rank >= 0);
+        MPIR_Assert(local_target_rank >= 0);
         base = win_ptr->shm_base_addrs[local_target_rank];
         disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
     }
@@ -605,7 +605,7 @@ static inline int MPIDI_CH3I_Shm_get_op(void *origin_addr, int origin_count,
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SHM_GET_OP);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_GET_OP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -627,13 +627,13 @@ static inline int MPIDI_CH3I_Shm_cas_op(const void *origin_addr, const void *com
     MPI_Aint len;
     int shm_locked = 0;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_CAS_OP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_CAS_OP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SHM_CAS_OP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_CAS_OP);
 
     if (win_ptr->shm_allocated == TRUE) {
         int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
-        MPIU_Assert(local_target_rank >= 0);
+        MPIR_Assert(local_target_rank >= 0);
         base = win_ptr->shm_base_addrs[local_target_rank];
         disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
 
@@ -648,10 +648,10 @@ static inline int MPIDI_CH3I_Shm_cas_op(const void *origin_addr, const void *com
     dest_addr = (char *) base + disp_unit * target_disp;
 
     MPIDU_Datatype_get_size_macro(datatype, len);
-    MPIU_Memcpy(result_addr, dest_addr, len);
+    MPIR_Memcpy(result_addr, dest_addr, len);
 
     if (MPIR_Compare_equal(compare_addr, dest_addr, datatype)) {
-        MPIU_Memcpy(dest_addr, origin_addr, len);
+        MPIR_Memcpy(dest_addr, origin_addr, len);
     }
 
     if (shm_locked) {
@@ -660,7 +660,7 @@ static inline int MPIDI_CH3I_Shm_cas_op(const void *origin_addr, const void *com
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SHM_CAS_OP);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_CAS_OP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -686,13 +686,13 @@ static inline int MPIDI_CH3I_Shm_fop_op(const void *origin_addr, void *result_ad
     MPI_Aint len;
     int one, shm_locked = 0;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_FOP_OP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_FOP_OP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SHM_FOP_OP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_FOP_OP);
 
     if (win_ptr->shm_allocated == TRUE) {
         int local_target_rank = win_ptr->comm_ptr->intranode_table[target_rank];
-        MPIU_Assert(local_target_rank >= 0);
+        MPIR_Assert(local_target_rank >= 0);
         base = win_ptr->shm_base_addrs[local_target_rank];
         disp_unit = win_ptr->basic_info_table[target_rank].disp_unit;
 
@@ -707,7 +707,7 @@ static inline int MPIDI_CH3I_Shm_fop_op(const void *origin_addr, void *result_ad
     dest_addr = (char *) base + disp_unit * target_disp;
 
     MPIDU_Datatype_get_size_macro(datatype, len);
-    MPIU_Memcpy(result_addr, dest_addr, len);
+    MPIR_Memcpy(result_addr, dest_addr, len);
 
     uop = MPIR_OP_HDL_TO_FN(op);
     one = 1;
@@ -720,7 +720,7 @@ static inline int MPIDI_CH3I_Shm_fop_op(const void *origin_addr, void *result_ad
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SHM_FOP_OP);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_FOP_OP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
diff --git a/src/mpid/ch3/include/mpid_sched.h b/src/mpid/ch3/include/mpid_sched.h
index 5b82fac..ae07119 100644
--- a/src/mpid/ch3/include/mpid_sched.h
+++ b/src/mpid/ch3/include/mpid_sched.h
@@ -4,23 +4,23 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-#ifndef MPID_SCHED_H_INCLUDED
-#define MPID_SCHED_H_INCLUDED
+#ifndef MPIR_SCHED_H_INCLUDED
+#define MPIR_SCHED_H_INCLUDED
 #include "mpidu_sched.h"
 
-#define MPID_Sched_cb MPIDU_Sched_cb
-#define MPID_Sched_cb2 MPIDU_Sched_cb2
-#define MPID_Sched_next_tag  MPIDU_Sched_next_tag
-#define MPID_Sched_create MPIDU_Sched_create
-#define MPID_Sched_clone MPIDU_Sched_clone
-#define MPID_Sched_start MPIDU_Sched_start
-#define MPID_Sched_send MPIDU_Sched_send
-#define MPID_Sched_send_defer MPIDU_Sched_send_defer
-#define MPID_Sched_recv MPIDU_Sched_recv
-#define MPID_Sched_recv_status MPIDU_Sched_recv_status
-#define MPID_Sched_ssend MPIDU_Sched_ssend
-#define MPID_Sched_reduce MPIDU_Sched_reduce
-#define MPID_Sched_copy MPIDU_Sched_copy
-#define MPID_Sched_barrier MPIDU_Sched_barrier
+#define MPIR_Sched_cb MPIDU_Sched_cb
+#define MPIR_Sched_cb2 MPIDU_Sched_cb2
+#define MPIR_Sched_next_tag  MPIDU_Sched_next_tag
+#define MPIR_Sched_create MPIDU_Sched_create
+#define MPIR_Sched_clone MPIDU_Sched_clone
+#define MPIR_Sched_start MPIDU_Sched_start
+#define MPIR_Sched_send MPIDU_Sched_send
+#define MPIR_Sched_send_defer MPIDU_Sched_send_defer
+#define MPIR_Sched_recv MPIDU_Sched_recv
+#define MPIR_Sched_recv_status MPIDU_Sched_recv_status
+#define MPIR_Sched_ssend MPIDU_Sched_ssend
+#define MPIR_Sched_reduce MPIDU_Sched_reduce
+#define MPIR_Sched_copy MPIDU_Sched_copy
+#define MPIR_Sched_barrier MPIDU_Sched_barrier
 
-#endif /* MPID_SCHED_H_INCLUDED */
+#endif /* MPIR_SCHED_H_INCLUDED */
diff --git a/src/mpid/ch3/include/mpidimpl.h b/src/mpid/ch3/include/mpidimpl.h
index 11f1c3e..506e9d6 100644
--- a/src/mpid/ch3/include/mpidimpl.h
+++ b/src/mpid/ch3/include/mpidimpl.h
@@ -76,7 +76,7 @@ typedef struct MPIDI_PG
        MPIU_Object system, but we do use the associated reference counting 
        routines.  Therefore, handle must be present, but is not used 
        except by debugging routines */
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
 
     /* Next pointer used to maintain a list of all process groups known to 
        this process */
@@ -254,7 +254,7 @@ extern MPIDI_Process_t MPIDI_Process;
 
 #  define MPIDI_Request_tls_alloc(req_) \
     do { \
-	(req_) = MPIU_Handle_obj_alloc(&MPIR_Request_mem); \
+	(req_) = MPIR_Handle_obj_alloc(&MPIR_Request_mem); \
         MPL_DBG_MSG_P(MPIDI_CH3_DBG_CHANNEL,VERBOSE,		\
 	       "allocated request, handle=0x%08x", req_);\
     } while (0)
@@ -288,7 +288,7 @@ extern MPIDI_Process_t MPIDI_Process;
 #define MPIDI_Request_create_sreq(sreq_, mpi_errno_, FAIL_)	\
 {								\
     (sreq_) = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);           \
-    MPIU_Object_set_ref((sreq_), 2);				\
+    MPIR_Object_set_ref((sreq_), 2);				\
     (sreq_)->kind = MPIR_REQUEST_KIND__SEND;				\
     (sreq_)->comm = comm;					\
     (sreq_)->dev.partner_request   = NULL;                         \
@@ -307,7 +307,7 @@ extern MPIDI_Process_t MPIDI_Process;
 #define MPIDI_Request_create_rreq(rreq_, mpi_errno_, FAIL_)	\
 {								\
     (rreq_) = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);           \
-    MPIU_Object_set_ref((rreq_), 2);				\
+    MPIR_Object_set_ref((rreq_), 2);				\
     (rreq_)->kind = MPIR_REQUEST_KIND__RECV;				\
     (rreq_)->dev.partner_request   = NULL;                         \
 }
@@ -318,7 +318,7 @@ extern MPIDI_Process_t MPIDI_Process;
     do {                                                                   \
         (rreq_) = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);               \
         if ((rreq_) != NULL) {                                             \
-            MPIU_Object_set_ref((rreq_), 1);                               \
+            MPIR_Object_set_ref((rreq_), 1);                               \
             /* MT FIXME should these be handled by MPIR_Request_create? */ \
             MPIR_cc_set(&(rreq_)->cc, 0);                                  \
             (rreq_)->kind = MPIR_REQUEST_KIND__RECV;                             \
@@ -562,11 +562,11 @@ int MPIDI_CH3_PG_Init( MPIDI_PG_t * );
 
 #define MPIDI_PG_add_ref(pg_)			\
 do {                                            \
-    MPIU_Object_add_ref(pg_);			\
+    MPIR_Object_add_ref(pg_);			\
 } while (0)
 #define MPIDI_PG_release_ref(pg_, inuse_)	\
 do {                                            \
-    MPIU_Object_release_ref(pg_, inuse_);	\
+    MPIR_Object_release_ref(pg_, inuse_);	\
 } while (0)
 
 #define MPIDI_PG_Get_vc(pg_, rank_, vcp_) *(vcp_) = &(pg_)->vct[rank_]
@@ -682,7 +682,7 @@ typedef struct MPIDI_VC
        when debugging objects (the handle kind is used in reporting
        on changes to the object).
     */
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
 
     /* state of the VC */
     MPIDI_VC_State_t state;
@@ -765,7 +765,7 @@ MPIDI_VC_Event_t;
  S*/
 typedef struct MPIDI_VCRT
 {
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
     int size;
     MPIDI_VC_t * vcr_table[1];
 }
@@ -789,10 +789,10 @@ int MPIDI_VC_Init( MPIDI_VC_t *, MPIDI_PG_t *, int );
 
 
 #define MPIDI_VC_add_ref( _vc )                                 \
-    do { MPIU_Object_add_ref( _vc ); } while (0)
+    do { MPIR_Object_add_ref( _vc ); } while (0)
 
 #define MPIDI_VC_release_ref( _vc, _inuse ) \
-    do { MPIU_Object_release_ref( _vc, _inuse ); } while (0)
+    do { MPIR_Object_release_ref( _vc, _inuse ); } while (0)
 
 /*------------------------------
   END VIRTUAL CONNECTION SECTION
@@ -838,7 +838,7 @@ extern MPIDI_CH3U_SRBuf_element_t * MPIDI_CH3U_SRBuf_pool;
 #   define MPIDI_CH3U_SRBuf_free(req_)                                  \
     {                                                                   \
         MPIDI_CH3U_SRBuf_element_t * tmp;                               \
-        MPIU_Assert(MPIDI_Request_get_srbuf_flag(req_));                \
+        MPIR_Assert(MPIDI_Request_get_srbuf_flag(req_));                \
         MPIDI_Request_set_srbuf_flag((req_), FALSE);                    \
         tmp = (MPIDI_CH3U_SRBuf_element_t *) (((MPI_Aint) ((req_)->dev.tmpbuf)) - \
                ((MPI_Aint) MPIDI_CH3U_Offsetof(MPIDI_CH3U_SRBuf_element_t, buf))); \
@@ -1440,7 +1440,7 @@ int MPIDI_CH3U_Receive_data_unexpected(MPIR_Request * rreq, char *buf, intptr_t
 int MPIDI_CH3I_Comm_init(void);
 
 int MPIDI_CH3I_Comm_handle_failed_procs(MPIR_Group *new_failed_procs);
-void MPIDI_CH3I_Comm_find(MPIU_Context_id_t context_id, MPIR_Comm **comm);
+void MPIDI_CH3I_Comm_find(MPIR_Context_id_t context_id, MPIR_Comm **comm);
 
 /* The functions below allow channels to register functions to be
    called immediately after a communicator has been created, and
diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index 68cb155..1abc133 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -842,7 +842,7 @@ typedef struct MPIDI_CH3_Pkt_close {
 
 typedef struct MPIDI_CH3_Pkt_revoke {
     MPIDI_CH3_Pkt_type_t type;
-    MPIU_Context_id_t revoked_comm;
+    MPIR_Context_id_t revoked_comm;
 } MPIDI_CH3_Pkt_revoke_t;
 
 typedef union MPIDI_CH3_Pkt {
diff --git a/src/mpid/ch3/include/mpidpre.h b/src/mpid/ch3/include/mpidpre.h
index cb8eedf..7b05e75 100644
--- a/src/mpid/ch3/include/mpidpre.h
+++ b/src/mpid/ch3/include/mpidpre.h
@@ -107,7 +107,7 @@ typedef MPIDI_Rank_t MPID_Node_id_t;
 typedef struct MPIDI_Message_match_parts {
     int32_t tag;
     MPIDI_Rank_t rank;
-    MPIU_Context_id_t context_id;
+    MPIR_Context_id_t context_id;
 } MPIDI_Message_match_parts_t;
 typedef union {
     MPIDI_Message_match_parts_t parts;
@@ -163,7 +163,6 @@ typedef union {
  * by the channel instance.
  */
 
-#define HAVE_DEV_COMM_HOOK
 #define MPID_Dev_comm_create_hook(comm_) MPIDI_CH3I_Comm_create_hook(comm_)
 #define MPID_Dev_comm_destroy_hook(comm_) MPIDI_CH3I_Comm_destroy_hook(comm_)
 
diff --git a/src/mpid/ch3/include/mpidrma.h b/src/mpid/ch3/include/mpidrma.h
index 9a35ee0..6503918 100644
--- a/src/mpid/ch3/include/mpidrma.h
+++ b/src/mpid/ch3/include/mpidrma.h
@@ -24,8 +24,8 @@ static inline int send_lock_msg(int dest, int lock_type, MPIR_Win * win_ptr)
     MPIDI_CH3_Pkt_lock_t *lock_pkt = &upkt.lock;
     MPIR_Request *req = NULL;
     MPIDI_VC_t *vc;
-    MPIDI_STATE_DECL(MPID_STATE_SEND_LOCK_MSG);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_LOCK_MSG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SEND_LOCK_MSG);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_SEND_LOCK_MSG);
 
     MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
 
@@ -37,7 +37,7 @@ static inline int send_lock_msg(int dest, int lock_type, MPIR_Win * win_ptr)
     if (lock_type == MPI_LOCK_SHARED)
         lock_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED;
     else {
-        MPIU_Assert(lock_type == MPI_LOCK_EXCLUSIVE);
+        MPIR_Assert(lock_type == MPI_LOCK_EXCLUSIVE);
         lock_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE;
     }
 
@@ -52,7 +52,7 @@ static inline int send_lock_msg(int dest, int lock_type, MPIR_Win * win_ptr)
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_LOCK_MSG);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_SEND_LOCK_MSG);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -71,8 +71,8 @@ static inline int send_unlock_msg(int dest, MPIR_Win * win_ptr, MPIDI_CH3_Pkt_fl
     MPIDI_CH3_Pkt_unlock_t *unlock_pkt = &upkt.unlock;
     MPIR_Request *req = NULL;
     MPIDI_VC_t *vc;
-    MPIDI_STATE_DECL(MPID_STATE_SEND_UNLOCK_MSG);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_UNLOCK_MSG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SEND_UNLOCK_MSG);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_SEND_UNLOCK_MSG);
 
     MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
 
@@ -95,7 +95,7 @@ static inline int send_unlock_msg(int dest, MPIR_Win * win_ptr, MPIDI_CH3_Pkt_fl
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_UNLOCK_MSG);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_SEND_UNLOCK_MSG);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -117,11 +117,11 @@ static inline int MPIDI_CH3I_Send_lock_ack_pkt(MPIDI_VC_t * vc, MPIR_Win * win_p
     MPIDI_CH3_Pkt_lock_ack_t *lock_ack_pkt = &upkt.lock_ack;
     MPIR_Request *req = NULL;
     int mpi_errno;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
 
-    MPIU_Assert(!(source_win_handle != MPI_WIN_NULL && request_handle != MPI_REQUEST_NULL));
+    MPIR_Assert(!(source_win_handle != MPI_WIN_NULL && request_handle != MPI_REQUEST_NULL));
 
     /* send lock ack packet */
     MPIDI_Pkt_init(lock_ack_pkt, MPIDI_CH3_PKT_LOCK_ACK);
@@ -146,7 +146,7 @@ static inline int MPIDI_CH3I_Send_lock_ack_pkt(MPIDI_VC_t * vc, MPIR_Win * win_p
     }
 
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_ACK_PKT);
 
     return mpi_errno;
 }
@@ -164,11 +164,11 @@ static inline int MPIDI_CH3I_Send_lock_op_ack_pkt(MPIDI_VC_t * vc, MPIR_Win * wi
     MPIDI_CH3_Pkt_lock_op_ack_t *lock_op_ack_pkt = &upkt.lock_op_ack;
     MPIR_Request *req = NULL;
     int mpi_errno;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_LOCK_OP_ACK_PKT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_LOCK_OP_ACK_PKT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_OP_ACK_PKT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_OP_ACK_PKT);
 
-    MPIU_Assert(!(source_win_handle != MPI_WIN_NULL && request_handle != MPI_REQUEST_NULL));
+    MPIR_Assert(!(source_win_handle != MPI_WIN_NULL && request_handle != MPI_REQUEST_NULL));
 
     /* send lock ack packet */
     MPIDI_Pkt_init(lock_op_ack_pkt, MPIDI_CH3_PKT_LOCK_OP_ACK);
@@ -193,7 +193,7 @@ static inline int MPIDI_CH3I_Send_lock_op_ack_pkt(MPIDI_VC_t * vc, MPIR_Win * wi
     }
 
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_OP_ACK_PKT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_OP_ACK_PKT);
     return mpi_errno;
 }
 
@@ -209,9 +209,9 @@ static inline int MPIDI_CH3I_Send_ack_pkt(MPIDI_VC_t * vc, MPIR_Win * win_ptr,
     MPIDI_CH3_Pkt_ack_t *ack_pkt = &upkt.ack;
     MPIR_Request *req;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_ACK_PKT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_ACK_PKT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_ACK_PKT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_SEND_ACK_PKT);
 
     MPIDI_Pkt_init(ack_pkt, MPIDI_CH3_PKT_ACK);
     ack_pkt->source_win_handle = source_win_handle;
@@ -230,7 +230,7 @@ static inline int MPIDI_CH3I_Send_ack_pkt(MPIDI_VC_t * vc, MPIR_Win * win_ptr,
     }
 
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_ACK_PKT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_SEND_ACK_PKT);
     return mpi_errno;
 }
 
@@ -246,8 +246,8 @@ static inline int send_decr_at_cnt_msg(int dst, MPIR_Win * win_ptr, MPIDI_CH3_Pk
     MPIDI_VC_t *vc;
     MPIR_Request *request = NULL;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_SEND_DECR_AT_CNT_MSG);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_DECR_AT_CNT_MSG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SEND_DECR_AT_CNT_MSG);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_SEND_DECR_AT_CNT_MSG);
 
     MPIDI_Pkt_init(decr_at_cnt_pkt, MPIDI_CH3_PKT_DECR_AT_COUNTER);
     decr_at_cnt_pkt->target_win_handle = win_ptr->basic_info_table[dst].win_handle;
@@ -268,7 +268,7 @@ static inline int send_decr_at_cnt_msg(int dst, MPIR_Win * win_ptr, MPIDI_CH3_Pk
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_DECR_AT_CNT_MSG);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_SEND_DECR_AT_CNT_MSG);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -288,8 +288,8 @@ static inline int send_flush_msg(int dest, MPIR_Win * win_ptr)
     MPIDI_CH3_Pkt_flush_t *flush_pkt = &upkt.flush;
     MPIR_Request *req = NULL;
     MPIDI_VC_t *vc;
-    MPIDI_STATE_DECL(MPID_STATE_SEND_FLUSH_MSG);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_FLUSH_MSG);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SEND_FLUSH_MSG);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_SEND_FLUSH_MSG);
 
     MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
 
@@ -308,7 +308,7 @@ static inline int send_flush_msg(int dest, MPIR_Win * win_ptr)
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_FLUSH_MSG);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_SEND_FLUSH_MSG);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -393,7 +393,7 @@ static inline int enqueue_lock_origin(MPIR_Win * win_ptr, MPIDI_VC_t * vc,
         }
 
         if (flags & MPIDI_CH3_PKT_FLAG_RMA_STREAM) {
-            MPIU_Assert(pkt->type == MPIDI_CH3_PKT_ACCUMULATE ||
+            MPIR_Assert(pkt->type == MPIDI_CH3_PKT_ACCUMULATE ||
                         pkt->type == MPIDI_CH3_PKT_GET_ACCUM);
 
             /* Only basic datatype may contain piggyback lock.
@@ -456,7 +456,7 @@ static inline int enqueue_lock_origin(MPIR_Win * win_ptr, MPIDI_VC_t * vc,
 
         /* create request to receive upcoming requests */
         req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-        MPIU_Object_set_ref(req, 1);
+        MPIR_Object_set_ref(req, 1);
 
         /* fill in area in req that will be used in Receive_data_found() */
         if (lock_discarded || data_discarded) {
@@ -471,7 +471,7 @@ static inline int enqueue_lock_origin(MPIR_Win * win_ptr, MPIDI_VC_t * vc,
 
             data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
             data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
-            MPIU_Assert(req->dev.recv_data_sz >= 0);
+            MPIR_Assert(req->dev.recv_data_sz >= 0);
         }
         else {
             req->dev.user_buf = new_ptr->data;
@@ -484,7 +484,7 @@ static inline int enqueue_lock_origin(MPIR_Win * win_ptr, MPIDI_VC_t * vc,
 
             data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
             data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
-            MPIU_Assert(req->dev.recv_data_sz >= 0);
+            MPIR_Assert(req->dev.recv_data_sz >= 0);
         }
 
         mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
@@ -556,7 +556,7 @@ static inline int handle_lock_ack(MPIR_Win * win_ptr, int target_rank, MPIDI_CH3
     MPIDI_RMA_Target_t *t = NULL;
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
+    MPIR_Assert(win_ptr->states.access_state == MPIDI_RMA_PER_TARGET ||
                 win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_CALLED ||
                 win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED);
 
@@ -568,7 +568,7 @@ static inline int handle_lock_ack(MPIR_Win * win_ptr, int target_rank, MPIDI_CH3
             (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
             if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
                 win_ptr->outstanding_locks--;
-                MPIU_Assert(win_ptr->outstanding_locks >= 0);
+                MPIR_Assert(win_ptr->outstanding_locks >= 0);
             }
             else if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_DISCARDED) {
                 /* re-send lock request message. */
@@ -582,7 +582,7 @@ static inline int handle_lock_ack(MPIR_Win * win_ptr, int target_rank, MPIDI_CH3
     else if (win_ptr->states.access_state == MPIDI_RMA_LOCK_ALL_ISSUED) {
         if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
             win_ptr->outstanding_locks--;
-            MPIU_Assert(win_ptr->outstanding_locks >= 0);
+            MPIR_Assert(win_ptr->outstanding_locks >= 0);
             if (win_ptr->outstanding_locks == 0) {
                 win_ptr->states.access_state = MPIDI_RMA_LOCK_ALL_GRANTED;
 
@@ -606,7 +606,7 @@ static inline int handle_lock_ack(MPIR_Win * win_ptr, int target_rank, MPIDI_CH3
     mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &t);
     if (mpi_errno != MPI_SUCCESS)
         MPIR_ERR_POP(mpi_errno);
-    MPIU_Assert(t != NULL);
+    MPIR_Assert(t != NULL);
 
     if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
         t->access_state = MPIDI_RMA_LOCK_GRANTED;
@@ -646,9 +646,9 @@ static inline int check_and_set_req_completion(MPIR_Win * win_ptr, MPIDI_RMA_Tar
     int i, mpi_errno = MPI_SUCCESS;
     int incomplete_req_cnt = 0;
     MPIR_Request **req = NULL;
-    MPIDI_STATE_DECL(MPID_STATE_CHECK_AND_SET_REQ_COMPLETION);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CHECK_AND_SET_REQ_COMPLETION);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_CHECK_AND_SET_REQ_COMPLETION);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_CHECK_AND_SET_REQ_COMPLETION);
 
     (*op_completed) = FALSE;
 
@@ -699,14 +699,14 @@ static inline int check_and_set_req_completion(MPIR_Win * win_ptr, MPIDI_RMA_Tar
 
     if (target->pending_net_ops_list_head == NULL) {
         win_ptr->num_targets_with_pending_net_ops--;
-        MPIU_Assert(win_ptr->num_targets_with_pending_net_ops >= 0);
+        MPIR_Assert(win_ptr->num_targets_with_pending_net_ops >= 0);
         if (win_ptr->num_targets_with_pending_net_ops == 0) {
             MPIDI_CH3I_Win_set_inactive(win_ptr);
         }
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_CHECK_AND_SET_REQ_COMPLETION);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_CHECK_AND_SET_REQ_COMPLETION);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -725,15 +725,15 @@ static inline int handle_lock_ack_with_op(MPIR_Win * win_ptr,
     mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &target);
     if (mpi_errno != MPI_SUCCESS)
         MPIR_ERR_POP(mpi_errno);
-    MPIU_Assert(target != NULL);
+    MPIR_Assert(target != NULL);
 
     /* Here the next_op_to_issue pointer should still point to the OP piggybacked
      * with LOCK */
     op = target->next_op_to_issue;
-    MPIU_Assert(op != NULL);
+    MPIR_Assert(op != NULL);
 
     MPIDI_CH3_PKT_RMA_GET_FLAGS(op->pkt, op_flags, mpi_errno);
-    MPIU_Assert(op_flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
+    MPIR_Assert(op_flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED ||
                 op_flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE);
 
     if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED) {
@@ -768,13 +768,13 @@ static inline int handle_lock_ack_with_op(MPIR_Win * win_ptr,
          * the internal request and erase all flags in current
          * operation. */
         if (op->reqs_size == 1) {
-            MPIU_Assert(op->single_req != NULL);
+            MPIR_Assert(op->single_req != NULL);
             MPIR_Request_free(op->single_req);
             op->single_req = NULL;
             op->reqs_size = 0;
         }
         else if (op->reqs_size > 1) {
-            MPIU_Assert(op->multi_reqs != NULL && op->multi_reqs[0] != NULL);
+            MPIR_Assert(op->multi_reqs != NULL && op->multi_reqs[0] != NULL);
             MPIR_Request_free(op->multi_reqs[0]);
             /* free req array in this op */
             MPL_free(op->multi_reqs);
@@ -800,8 +800,8 @@ static inline int handle_lock_ack_with_op(MPIR_Win * win_ptr,
 static inline int acquire_local_lock(MPIR_Win * win_ptr, int lock_type)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_ACQUIRE_LOCAL_LOCK);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_ACQUIRE_LOCAL_LOCK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_ACQUIRE_LOCAL_LOCK);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_ACQUIRE_LOCAL_LOCK);
 
     MPIR_T_PVAR_TIMER_START(RMA, rma_winlock_getlocallock);
 
@@ -825,7 +825,7 @@ static inline int acquire_local_lock(MPIR_Win * win_ptr, int lock_type)
         if (lock_type == MPI_LOCK_SHARED)
             lock_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED;
         else {
-            MPIU_Assert(lock_type == MPI_LOCK_EXCLUSIVE);
+            MPIR_Assert(lock_type == MPI_LOCK_EXCLUSIVE);
             lock_pkt->flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE;
         }
 
@@ -846,7 +846,7 @@ static inline int acquire_local_lock(MPIR_Win * win_ptr, int lock_type)
 
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_winlock_getlocallock);
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_ACQUIRE_LOCAL_LOCK);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_ACQUIRE_LOCAL_LOCK);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -869,10 +869,10 @@ static inline int MPIDI_CH3I_RMA_Handle_ack(MPIR_Win * win_ptr, int target_rank)
         MPIR_ERR_POP(mpi_errno);
 
     t->sync.outstanding_acks--;
-    MPIU_Assert(t->sync.outstanding_acks >= 0);
+    MPIR_Assert(t->sync.outstanding_acks >= 0);
 
     win_ptr->outstanding_acks--;
-    MPIU_Assert(win_ptr->outstanding_acks >= 0);
+    MPIR_Assert(win_ptr->outstanding_acks >= 0);
 
   fn_exit:
     return mpi_errno;
@@ -893,16 +893,16 @@ static inline int do_accumulate_op(void *source_buf, int source_count, MPI_Datat
     MPI_User_function *uop = NULL;
     MPI_Aint source_dtp_size = 0, source_dtp_extent = 0;
     int is_empty_source = FALSE;
-    MPIDI_STATE_DECL(MPID_STATE_DO_ACCUMULATE_OP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_DO_ACCUMULATE_OP);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_DO_ACCUMULATE_OP);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_DO_ACCUMULATE_OP);
 
     /* first Judge if source buffer is empty */
     if (acc_op == MPI_NO_OP)
         is_empty_source = TRUE;
 
     if (is_empty_source == FALSE) {
-        MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(source_dtp));
+        MPIR_Assert(MPIR_DATATYPE_IS_PREDEFINED(source_dtp));
         MPIDU_Datatype_get_size_macro(source_dtp, source_dtp_size);
         MPIDU_Datatype_get_extent_macro(source_dtp, source_dtp_extent);
     }
@@ -927,7 +927,7 @@ static inline int do_accumulate_op(void *source_buf, int source_count, MPI_Datat
         void *curr_target_buf;
 
         if (is_empty_source == FALSE) {
-            MPIU_Assert(source_dtp == target_dtp);
+            MPIR_Assert(source_dtp == target_dtp);
             real_stream_offset = (stream_offset / source_dtp_size) * source_dtp_extent;
             curr_target_buf = (void *) ((char *) target_buf + real_stream_offset);
         }
@@ -956,7 +956,7 @@ static inline int do_accumulate_op(void *source_buf, int source_count, MPI_Datat
             mpi_errno =
                 MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__,
                                      MPI_ERR_OTHER, "**nomem", 0);
-            MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
+            MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
             return mpi_errno;
         }
         /* --END ERROR HANDLING-- */
@@ -974,7 +974,7 @@ static inline int do_accumulate_op(void *source_buf, int source_count, MPI_Datat
             mpi_errno =
                 MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__,
                                      MPI_ERR_OTHER, "**nomem", 0);
-            MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
+            MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
             return mpi_errno;
         }
         /* --END ERROR HANDLING-- */
@@ -982,9 +982,9 @@ static inline int do_accumulate_op(void *source_buf, int source_count, MPI_Datat
         MPIDU_Segment_pack_vector(segp, first, &last, dloop_vec, &vec_len);
 
         type = dtp->basic_type;
-        MPIU_Assert(type != MPI_DATATYPE_NULL);
+        MPIR_Assert(type != MPI_DATATYPE_NULL);
 
-        MPIU_Assert(type == source_dtp);
+        MPIR_Assert(type == source_dtp);
         type_size = source_dtp_size;
         type_extent = source_dtp_extent;
 
@@ -994,16 +994,16 @@ static inline int do_accumulate_op(void *source_buf, int source_count, MPI_Datat
         accumulated_count = 0;
         while (i != vec_len) {
             if (curr_len < type_size) {
-                MPIU_Assert(i != vec_len);
+                MPIR_Assert(i != vec_len);
                 i++;
                 curr_len += dloop_vec[i].DLOOP_VECTOR_LEN;
                 continue;
             }
 
-            MPIU_Assign_trunc(count, curr_len / type_size, int);
+            MPIR_Assign_trunc(count, curr_len / type_size, int);
 
             (*uop) ((char *) source_buf + type_extent * accumulated_count,
-                    (char *) target_buf + MPIU_PtrToAint(curr_loc), &count, &type);
+                    (char *) target_buf + MPIR_Ptr_to_aint(curr_loc), &count, &type);
 
             if (curr_len % type_size == 0) {
                 i++;
@@ -1025,7 +1025,7 @@ static inline int do_accumulate_op(void *source_buf, int source_count, MPI_Datat
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
 
     return mpi_errno;
   fn_fail:
@@ -1051,7 +1051,7 @@ static inline int check_piggyback_lock(MPIR_Win * win_ptr, MPIDI_VC_t * vc,
         if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED)
             lock_type = MPI_LOCK_SHARED;
         else {
-            MPIU_Assert(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE);
+            MPIR_Assert(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE);
             lock_type = MPI_LOCK_EXCLUSIVE;
         }
 
@@ -1083,7 +1083,7 @@ static inline int finish_op_on_target(MPIR_Win * win_ptr, MPIDI_VC_t * vc,
             MPIDI_CH3_Pkt_flags_t pkt_flags = MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED;
             if ((flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH) || (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK))
                 pkt_flags |= MPIDI_CH3_PKT_FLAG_RMA_ACK;
-            MPIU_Assert(source_win_handle != MPI_WIN_NULL);
+            MPIR_Assert(source_win_handle != MPI_WIN_NULL);
             mpi_errno = MPIDI_CH3I_Send_lock_op_ack_pkt(vc, win_ptr,
                                                         pkt_flags,
                                                         source_win_handle, MPI_REQUEST_NULL);
@@ -1104,7 +1104,7 @@ static inline int finish_op_on_target(MPIR_Win * win_ptr, MPIDI_VC_t * vc,
         }
         if (flags & MPIDI_CH3_PKT_FLAG_RMA_DECR_AT_COUNTER) {
             win_ptr->at_completion_counter--;
-            MPIU_Assert(win_ptr->at_completion_counter >= 0);
+            MPIR_Assert(win_ptr->at_completion_counter >= 0);
             /* Signal the local process when the op counter reaches 0. */
             if (win_ptr->at_completion_counter == 0)
                 MPIDI_CH3_Progress_signal_completion();
@@ -1136,7 +1136,7 @@ static inline int finish_op_on_target(MPIR_Win * win_ptr, MPIDI_VC_t * vc,
 
         if (flags & MPIDI_CH3_PKT_FLAG_RMA_DECR_AT_COUNTER) {
             win_ptr->at_completion_counter--;
-            MPIU_Assert(win_ptr->at_completion_counter >= 0);
+            MPIR_Assert(win_ptr->at_completion_counter >= 0);
             /* Signal the local process when the op counter reaches 0. */
             if (win_ptr->at_completion_counter == 0)
                 MPIDI_CH3_Progress_signal_completion();
@@ -1156,12 +1156,12 @@ static inline int fill_ranks_in_win_grp(MPIR_Win * win_ptr, MPIR_Group * group_p
     int mpi_errno = MPI_SUCCESS;
     int i, *ranks_in_grp;
     MPIR_Group *win_grp_ptr;
-    MPIU_CHKLMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
 
-    MPIU_CHKLMEM_MALLOC(ranks_in_grp, int *, group_ptr->size * sizeof(int),
+    MPIR_CHKLMEM_MALLOC(ranks_in_grp, int *, group_ptr->size * sizeof(int),
                         mpi_errno, "ranks_in_grp");
     for (i = 0; i < group_ptr->size; i++)
         ranks_in_grp[i] = i;
@@ -1180,8 +1180,8 @@ static inline int fill_ranks_in_win_grp(MPIR_Win * win_ptr, MPIR_Group * group_p
         MPIR_ERR_POP(mpi_errno);
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_FILL_RANKS_IN_WIN_GRP);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -1231,12 +1231,12 @@ static inline void MPIDI_CH3_ExtPkt_Accum_get_stream(MPIDI_CH3_Pkt_flags_t flags
                                                      MPI_Aint * stream_offset)
 {
     if ((flags & MPIDI_CH3_PKT_FLAG_RMA_STREAM) && is_derived_dt) {
-        MPIU_Assert(ext_hdr_ptr != NULL);
+        MPIR_Assert(ext_hdr_ptr != NULL);
         (*stream_offset) =
             ((MPIDI_CH3_Ext_pkt_accum_stream_derived_t *) ext_hdr_ptr)->stream_offset;
     }
     else if (flags & MPIDI_CH3_PKT_FLAG_RMA_STREAM) {
-        MPIU_Assert(ext_hdr_ptr != NULL);
+        MPIR_Assert(ext_hdr_ptr != NULL);
         (*stream_offset) = ((MPIDI_CH3_Ext_pkt_accum_stream_t *) ext_hdr_ptr)->stream_offset;
     }
 }
diff --git a/src/mpid/ch3/src/ch3u_buffer.c b/src/mpid/ch3/src/ch3u_buffer.c
index 414fa81..7af0dba 100644
--- a/src/mpid/ch3/src/ch3u_buffer.c
+++ b/src/mpid/ch3/src/ch3u_buffer.c
@@ -44,10 +44,10 @@ void MPIDI_CH3U_Buffer_copy(
     intptr_t rdata_sz;
     MPIDU_Datatype* sdt_ptr;
     MPIDU_Datatype* rdt_ptr;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_BUFFER_COPY);
-    MPIDI_STATE_DECL(MPID_STATE_MEMCPY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_BUFFER_COPY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MEMCPY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_BUFFER_COPY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_BUFFER_COPY);
     *smpi_errno = MPI_SUCCESS;
     *rmpi_errno = MPI_SUCCESS;
 
@@ -73,9 +73,9 @@ void MPIDI_CH3U_Buffer_copy(
     
     if (sdt_contig && rdt_contig)
     {
-	MPIDI_FUNC_ENTER(MPID_STATE_MEMCPY);
-	MPIU_Memcpy((char *)rbuf + rdt_true_lb, (const char *)sbuf + sdt_true_lb, sdata_sz);
-	MPIDI_FUNC_EXIT(MPID_STATE_MEMCPY);
+	MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MEMCPY);
+	MPIR_Memcpy((char *)rbuf + rdt_true_lb, (const char *)sbuf + sdt_true_lb, sdata_sz);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MEMCPY);
 	*rsz = sdata_sz;
     }
     else if (sdt_contig)
@@ -170,7 +170,7 @@ void MPIDI_CH3U_Buffer_copy(
                "post-pack first=%" PRIdPTR ", last=%" PRIdPTR,
                sfirst, last ));
 	    /* --BEGIN ERROR HANDLING-- */
-	    MPIU_Assert(last > sfirst);
+	    MPIR_Assert(last > sfirst);
 	    /* --END ERROR HANDLING-- */
 	    
 	    buf_end = buf + buf_off + (last - sfirst);
@@ -184,7 +184,7 @@ void MPIDI_CH3U_Buffer_copy(
              "post-unpack first=%" PRIdPTR ", last=%" PRIdPTR,
 						rfirst, last ));
 	    /* --BEGIN ERROR HANDLING-- */
-	    MPIU_Assert(last > rfirst);
+	    MPIR_Assert(last > rfirst);
 	    /* --END ERROR HANDLING-- */
 
 	    rfirst = last;
@@ -218,7 +218,7 @@ void MPIDI_CH3U_Buffer_copy(
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_BUFFER_COPY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_BUFFER_COPY);
 }
 
 
diff --git a/src/mpid/ch3/src/ch3u_comm.c b/src/mpid/ch3/src/ch3u_comm.c
index 234f4ec..a8c7502 100644
--- a/src/mpid/ch3/src/ch3u_comm.c
+++ b/src/mpid/ch3/src/ch3u_comm.c
@@ -58,11 +58,11 @@ int MPIDI_CH3I_Comm_init(void)
 {
     int mpi_errno = MPI_SUCCESS;
 #if defined HAVE_LIBHCOLL && MPID_CH3I_CH_HCOLL_BCOL
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 #endif
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_COMM_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_COMM_INIT);
 
     MPIR_Add_finalize(register_hook_finalize, NULL, MPIR_FINALIZE_CALLBACK_PRIO-1);
 
@@ -89,7 +89,7 @@ int MPIDI_CH3I_Comm_init(void)
             char *envstr;
             int size = strlen("HCOLL_BCOL=") + strlen(MPID_CH3I_CH_HCOLL_BCOL) + 1;
 
-            MPIU_CHKLMEM_MALLOC(envstr, char *, size, mpi_errno, "**malloc");
+            MPIR_CHKLMEM_MALLOC(envstr, char *, size, mpi_errno, "**malloc");
             MPL_snprintf(envstr, size, "HCOLL_BCOL=%s", MPID_CH3I_CH_HCOLL_BCOL);
 
             r = MPL_putenv(envstr);
@@ -108,9 +108,9 @@ int MPIDI_CH3I_Comm_init(void)
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_COMM_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_COMM_INIT);
 #if defined HAVE_LIBHCOLL && MPID_CH3I_CH_HCOLL_BCOL
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
 #endif
     return mpi_errno;
  fn_fail:
@@ -190,9 +190,9 @@ int MPIDI_CH3I_Comm_create_hook(MPIR_Comm *comm)
     MPIR_Comm *src_comm;
     int vcrt_size, vcrt_offset;
     
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_CREATE_HOOK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_CREATE_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_COMM_CREATE_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_COMM_CREATE_HOOK);
 
     /* initialize the is_disconnected variable to FALSE.  this will be
      * set to TRUE if the communicator is freed by an
@@ -202,10 +202,10 @@ int MPIDI_CH3I_Comm_create_hook(MPIR_Comm *comm)
     /* do some sanity checks */
     MPL_LL_FOREACH(comm->mapper_head, mapper) {
         if (mapper->src_comm->comm_kind == MPIR_COMM_KIND__INTRACOMM)
-            MPIU_Assert(mapper->dir == MPIR_COMM_MAP_DIR__L2L ||
+            MPIR_Assert(mapper->dir == MPIR_COMM_MAP_DIR__L2L ||
                         mapper->dir == MPIR_COMM_MAP_DIR__L2R);
         if (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM)
-            MPIU_Assert(mapper->dir == MPIR_COMM_MAP_DIR__L2L ||
+            MPIR_Assert(mapper->dir == MPIR_COMM_MAP_DIR__L2L ||
                         mapper->dir == MPIR_COMM_MAP_DIR__R2L);
     }
 
@@ -244,7 +244,7 @@ int MPIDI_CH3I_Comm_create_hook(MPIR_Comm *comm)
                          mapper->src_comm->local_size, vcrt_size, vcrt_offset);
         }
         else {  /* mapper->dir == MPIR_COMM_MAP_DIR__R2L */
-            MPIU_Assert(src_comm->comm_kind == MPIR_COMM_KIND__INTERCOMM);
+            MPIR_Assert(src_comm->comm_kind == MPIR_COMM_KIND__INTERCOMM);
             if (comm->comm_kind == MPIR_COMM_KIND__INTRACOMM) {
                 dup_vcrt(src_comm->dev.vcrt, &comm->dev.vcrt, mapper, mapper->src_comm->remote_size,
                          vcrt_size, vcrt_offset);
@@ -274,7 +274,7 @@ int MPIDI_CH3I_Comm_create_hook(MPIR_Comm *comm)
             mapper->dir == MPIR_COMM_MAP_DIR__R2L)
             continue;
 
-        MPIU_Assert(comm->comm_kind == MPIR_COMM_KIND__INTERCOMM);
+        MPIR_Assert(comm->comm_kind == MPIR_COMM_KIND__INTERCOMM);
 
         if (mapper->dir == MPIR_COMM_MAP_DIR__L2R) {
             if (src_comm->comm_kind == MPIR_COMM_KIND__INTRACOMM)
@@ -285,7 +285,7 @@ int MPIDI_CH3I_Comm_create_hook(MPIR_Comm *comm)
                          mapper->src_comm->local_size, vcrt_size, vcrt_offset);
         }
         else {  /* mapper->dir == MPIR_COMM_MAP_DIR__R2R */
-            MPIU_Assert(src_comm->comm_kind == MPIR_COMM_KIND__INTERCOMM);
+            MPIR_Assert(src_comm->comm_kind == MPIR_COMM_KIND__INTERCOMM);
             dup_vcrt(src_comm->dev.vcrt, &comm->dev.vcrt, mapper, mapper->src_comm->remote_size,
                      vcrt_size, vcrt_offset);
         }
@@ -306,7 +306,7 @@ int MPIDI_CH3I_Comm_create_hook(MPIR_Comm *comm)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_COMM_CREATE_HOOK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_COMM_CREATE_HOOK);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -320,9 +320,9 @@ int MPIDI_CH3I_Comm_destroy_hook(MPIR_Comm *comm)
 {
     int mpi_errno = MPI_SUCCESS;
     hook_elt *elt;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_DESTROY_HOOK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_DESTROY_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_COMM_DESTROY_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_COMM_DESTROY_HOOK);
 
     MPL_LL_FOREACH(destroy_hooks_head, elt) {
         mpi_errno = elt->hook_fn(comm, elt->param);
@@ -338,7 +338,7 @@ int MPIDI_CH3I_Comm_destroy_hook(MPIR_Comm *comm)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_COMM_DESTROY_HOOK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_COMM_DESTROY_HOOK);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -353,12 +353,12 @@ int MPIDI_CH3U_Comm_register_create_hook(int (*hook_fn)(struct MPIR_Comm *, void
 {
     int mpi_errno = MPI_SUCCESS;
     hook_elt *elt;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_CREATE_HOOK);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_CREATE_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_CREATE_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_CREATE_HOOK);
 
-    MPIU_CHKPMEM_MALLOC(elt, hook_elt *, sizeof(hook_elt), mpi_errno, "hook_elt");
+    MPIR_CHKPMEM_MALLOC(elt, hook_elt *, sizeof(hook_elt), mpi_errno, "hook_elt");
 
     elt->hook_fn = hook_fn;
     elt->param = param;
@@ -366,11 +366,11 @@ int MPIDI_CH3U_Comm_register_create_hook(int (*hook_fn)(struct MPIR_Comm *, void
     MPL_LL_PREPEND(create_hooks_head, create_hooks_tail, elt);
 
  fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_CREATE_HOOK);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_CREATE_HOOK);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -382,12 +382,12 @@ int MPIDI_CH3U_Comm_register_destroy_hook(int (*hook_fn)(struct MPIR_Comm *, voi
 {
     int mpi_errno = MPI_SUCCESS;
     hook_elt *elt;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_DESTROY_HOOK);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_DESTROY_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_DESTROY_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_DESTROY_HOOK);
 
-    MPIU_CHKPMEM_MALLOC(elt, hook_elt *, sizeof(hook_elt), mpi_errno, "hook_elt");
+    MPIR_CHKPMEM_MALLOC(elt, hook_elt *, sizeof(hook_elt), mpi_errno, "hook_elt");
 
     elt->hook_fn = hook_fn;
     elt->param = param;
@@ -395,10 +395,10 @@ int MPIDI_CH3U_Comm_register_destroy_hook(int (*hook_fn)(struct MPIR_Comm *, voi
     MPL_LL_PREPEND(destroy_hooks_head, destroy_hooks_tail, elt);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_DESTROY_HOOK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_DESTROY_HOOK);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -410,9 +410,9 @@ static int register_hook_finalize(void *param)
 {
     int mpi_errno = MPI_SUCCESS;
     hook_elt *elt, *tmp;
-    MPIDI_STATE_DECL(MPID_STATE_REGISTER_HOOK_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_REGISTER_HOOK_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_REGISTER_HOOK_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_REGISTER_HOOK_FINALIZE);
 
     MPL_LL_FOREACH_SAFE(create_hooks_head, elt, tmp) {
         MPL_LL_DELETE(create_hooks_head, create_hooks_tail, elt);
@@ -425,7 +425,7 @@ static int register_hook_finalize(void *param)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_REGISTER_HOOK_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_REGISTER_HOOK_FINALIZE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -439,9 +439,9 @@ static int register_hook_finalize(void *param)
 int comm_created(MPIR_Comm *comm, void *param)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_COMM_CREATED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_COMM_CREATED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_COMM_CREATED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_COMM_CREATED);
 
     comm->dev.anysource_enabled = TRUE;
 
@@ -454,7 +454,7 @@ int comm_created(MPIR_Comm *comm, void *param)
     COMM_ADD(comm);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_COMM_CREATED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_COMM_CREATED);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -467,16 +467,16 @@ int comm_created(MPIR_Comm *comm, void *param)
 int comm_destroyed(MPIR_Comm *comm, void *param)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_COMM_DESTROYED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_COMM_DESTROYED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_COMM_DESTROYED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_COMM_DESTROYED);
 
     COMM_DEL(comm);
     comm->dev.next = NULL;
     comm->dev.prev = NULL;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_COMM_DESTROYED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_COMM_DESTROYED);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -493,9 +493,9 @@ static int nonempty_intersection(MPIR_Comm *comm, MPIR_Group *group, int *flag)
     int mpi_errno = MPI_SUCCESS;
     int i_g, i_c;
     MPIDI_VC_t *vc_g, *vc_c;
-    MPIDI_STATE_DECL(MPID_STATE_NONEMPTY_INTERSECTION);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_NONEMPTY_INTERSECTION);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_NONEMPTY_INTERSECTION);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_NONEMPTY_INTERSECTION);
 
     /* handle common case fast */
     if (comm == MPIR_Process.comm_world || comm == MPIR_Process.icomm_world) {
@@ -522,7 +522,7 @@ static int nonempty_intersection(MPIR_Comm *comm, MPIR_Group *group, int *flag)
     }
     
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_NONEMPTY_INTERSECTION);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_NONEMPTY_INTERSECTION);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -538,9 +538,9 @@ int MPIDI_CH3I_Comm_handle_failed_procs(MPIR_Group *new_failed_procs)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm;
     int flag = FALSE;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_COMM_HANDLE_FAILED_PROCS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_COMM_HANDLE_FAILED_PROCS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_COMM_HANDLE_FAILED_PROCS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_COMM_HANDLE_FAILED_PROCS);
 
     /* mark communicators with new failed processes as collectively inactive and
        disable posting anysource receives */
@@ -566,16 +566,16 @@ int MPIDI_CH3I_Comm_handle_failed_procs(MPIR_Group *new_failed_procs)
     MPIDI_CH3_Progress_signal_completion();
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_COMM_HANDLE_FAILED_PROCS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_COMM_HANDLE_FAILED_PROCS);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
 }
 
-void MPIDI_CH3I_Comm_find(MPIU_Context_id_t context_id, MPIR_Comm **comm)
+void MPIDI_CH3I_Comm_find(MPIR_Context_id_t context_id, MPIR_Comm **comm)
 {
-    MPIDI_STATE_DECL(MPIDI_STATE_MPIDI_CH3I_COMM_FIND);
-    MPIDI_FUNC_ENTER(MPIDI_STATE_MPIDI_CH3I_COMM_FIND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPIDI_STATE_MPIDI_CH3I_COMM_FIND);
+    MPIR_FUNC_VERBOSE_ENTER(MPIDI_STATE_MPIDI_CH3I_COMM_FIND);
 
     COMM_FOREACH((*comm)) {
         if ((*comm)->context_id == context_id || ((*comm)->context_id + MPIR_CONTEXT_INTRA_COLL) == context_id ||
@@ -586,5 +586,5 @@ void MPIDI_CH3I_Comm_find(MPIU_Context_id_t context_id, MPIR_Comm **comm)
         }
     }
 
-    MPIDI_FUNC_EXIT(MPIDI_STATE_MPIDI_CH3I_COMM_FIND);
+    MPIR_FUNC_VERBOSE_EXIT(MPIDI_STATE_MPIDI_CH3I_COMM_FIND);
 }
diff --git a/src/mpid/ch3/src/ch3u_comm_spawn_multiple.c b/src/mpid/ch3/src/ch3u_comm_spawn_multiple.c
index b451ee6..ce9e54e 100644
--- a/src/mpid/ch3/src/ch3u_comm_spawn_multiple.c
+++ b/src/mpid/ch3/src/ch3u_comm_spawn_multiple.c
@@ -113,9 +113,9 @@ int MPIDI_Comm_spawn_multiple(int count, char **commands,
     PMI_keyval_t **info_keyval_vectors=0, preput_keyval_vector;
     int *pmi_errcodes = 0, pmi_errno;
     int total_num_processes, should_accept = 1;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE);
 
 
     if (comm_ptr->rank == root) {
@@ -142,13 +142,13 @@ int MPIDI_Comm_spawn_multiple(int count, char **commands,
 
 	/* Spawn the processes */
 #ifdef USE_PMI2_API
-        MPIU_Assert(count > 0);
+        MPIR_Assert(count > 0);
         {
             int *argcs = MPL_malloc(count*sizeof(int));
             struct MPIR_Info preput;
             struct MPIR_Info *preput_p[1] = { &preput };
 
-            MPIU_Assert(argcs);
+            MPIR_Assert(argcs);
             /*
             info_keyval_sizes = MPL_malloc(count * sizeof(int));
             */
@@ -297,7 +297,7 @@ int MPIDI_Comm_spawn_multiple(int count, char **commands,
     if (pmi_errcodes) {
 	MPL_free(pmi_errcodes);
     }
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/src/ch3u_eager.c b/src/mpid/ch3/src/ch3u_eager.c
index e5b332e..e89c1f0 100644
--- a/src/mpid/ch3/src/ch3u_eager.c
+++ b/src/mpid/ch3/src/ch3u_eager.c
@@ -26,9 +26,9 @@ int MPIDI_CH3_SendNoncontig_iov( MPIDI_VC_t *vc, MPIR_Request *sreq,
     int mpi_errno = MPI_SUCCESS;
     int iov_n;
     MPL_IOV iov[MPL_IOV_LIMIT];
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_SENDNONCONTIG_IOV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_SENDNONCONTIG_IOV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_SENDNONCONTIG_IOV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_SENDNONCONTIG_IOV);
 
     iov[0].MPL_IOV_BUF = header;
     iov[0].MPL_IOV_LEN = hdr_sz;
@@ -72,7 +72,7 @@ int MPIDI_CH3_SendNoncontig_iov( MPIDI_VC_t *vc, MPIR_Request *sreq,
 
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_SENDNONCONTIG_IOV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_SENDNONCONTIG_IOV);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/src/ch3u_handle_connection.c b/src/mpid/ch3/src/ch3u_handle_connection.c
index 4d276b0..ef5c371 100644
--- a/src/mpid/ch3/src/ch3u_handle_connection.c
+++ b/src/mpid/ch3/src/ch3u_handle_connection.c
@@ -49,9 +49,9 @@ int MPIDI_CH3U_Handle_connection(MPIDI_VC_t * vc, MPIDI_VC_Event_t event)
 {
     int inuse;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_CONNECTION);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_CONNECTION);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_CONNECTION);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_CONNECTION);
 
     switch (event)
     {
@@ -159,14 +159,14 @@ int MPIDI_CH3U_Handle_connection(MPIDI_VC_t * vc, MPIDI_VC_Event_t event)
             /* FIXME: Decrement the reference count?  Who increments? */
             /* FIXME: The reference count is often already 0.  But
                not always */
-            /* MPIU_Object_set_ref(vc, 0); ??? */
+            /* MPIR_Object_set_ref(vc, 0); ??? */
 
             /*
              * FIXME: The VC used in connect accept has a NULL 
              * process group
              */
             /* XXX DJG FIXME-MT should we be checking this ref_count? */
-            if (vc->pg != NULL && (MPIU_Object_get_ref(vc) == 0))
+            if (vc->pg != NULL && (MPIR_Object_get_ref(vc) == 0))
             {
                 /* FIXME: Who increments the reference count that
                    this is decrementing? */
@@ -191,7 +191,7 @@ int MPIDI_CH3U_Handle_connection(MPIDI_VC_t * vc, MPIDI_VC_Event_t event)
     }
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_CONNECTION);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_CONNECTION);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -218,13 +218,13 @@ int MPIDI_CH3U_VC_SendClose( MPIDI_VC_t *vc, int rank )
     MPIDI_CH3_Pkt_close_t * close_pkt = &upkt.close;
     MPIR_Request * sreq;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_VC_SENDCLOSE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_VC_SENDCLOSE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_VC_SENDCLOSE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_VC_SENDCLOSE);
 
     MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);
 
-    MPIU_Assert( vc->state == MPIDI_VC_STATE_ACTIVE ||
+    MPIR_Assert( vc->state == MPIDI_VC_STATE_ACTIVE ||
 		 vc->state == MPIDI_VC_STATE_REMOTE_CLOSE );
 
     MPIDI_Pkt_init(close_pkt, MPIDI_CH3_PKT_CLOSE);
@@ -248,7 +248,7 @@ int MPIDI_CH3U_VC_SendClose( MPIDI_VC_t *vc, int rank )
         MPIDI_CHANGE_VC_STATE(vc, LOCAL_CLOSE);
     }
     else {
-	MPIU_Assert( vc->state == MPIDI_VC_STATE_REMOTE_CLOSE );
+	MPIR_Assert( vc->state == MPIDI_VC_STATE_REMOTE_CLOSE );
         MPIDI_CHANGE_VC_STATE(vc, CLOSE_ACKED);
     }
 		
@@ -264,7 +264,7 @@ int MPIDI_CH3U_VC_SendClose( MPIDI_VC_t *vc, int rank )
  fn_exit:
     MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_VC_SENDCLOSE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_VC_SENDCLOSE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -322,7 +322,7 @@ int MPIDI_CH3_PktHandler_Close( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
                      "received close(FALSE) from %d, moving to REMOTE_CLOSE.",
 				   vc->pg_rank);
             }
-	    MPIU_Assert(vc->state == MPIDI_VC_STATE_ACTIVE);
+	    MPIR_Assert(vc->state == MPIDI_VC_STATE_ACTIVE);
             MPIDI_CHANGE_VC_STATE(vc, REMOTE_CLOSE);
 	}
     }
@@ -331,7 +331,7 @@ int MPIDI_CH3_PktHandler_Close( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 	MPL_DBG_MSG_D(MPIDI_CH3_DBG_DISCONNECT,TYPICAL,
                        "received close(TRUE) from %d, moving to CLOSED.", 
 			       vc->pg_rank);
-	MPIU_Assert (vc->state == MPIDI_VC_STATE_LOCAL_CLOSE || 
+	MPIR_Assert (vc->state == MPIDI_VC_STATE_LOCAL_CLOSE ||
 		     vc->state == MPIDI_VC_STATE_CLOSE_ACKED);
         MPIDI_CHANGE_VC_STATE(vc, CLOSED);
 	/* For example, with sockets, Connection_terminate will close
@@ -371,9 +371,9 @@ int MPIDI_CH3U_VC_WaitForClose( void )
 {
     MPID_Progress_state progress_state;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_VC_WAITFORCLOSE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_VC_WAITFORCLOSE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_VC_WAITFORCLOSE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_VC_WAITFORCLOSE);
 
     MPID_Progress_start(&progress_state);
     while(MPIDI_Outstanding_close_ops > 0) {
@@ -390,7 +390,7 @@ int MPIDI_CH3U_VC_WaitForClose( void )
     }
     MPID_Progress_end(&progress_state);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_VC_WAITFORCLOSE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_VC_WAITFORCLOSE);
     return mpi_errno;
 }
 
@@ -402,9 +402,9 @@ static int terminate_failed_VCs(MPIR_Group *new_failed_group)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_TERMINATE_FAILED_VCS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_TERMINATE_FAILED_VCS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_TERMINATE_FAILED_VCS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_TERMINATE_FAILED_VCS);
 
     for (i = 0; i < new_failed_group->size; ++i) {
         MPIDI_VC_t *vc;
@@ -416,7 +416,7 @@ static int terminate_failed_VCs(MPIR_Group *new_failed_group)
     }
     
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_TERMINATE_FAILED_VCS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_TERMINATE_FAILED_VCS);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -449,9 +449,9 @@ int MPIDI_CH3U_Get_failed_group(int last_rank, MPIR_Group **failed_group)
     int i, mpi_errno = MPI_SUCCESS, rank;
     UT_array *failed_procs = NULL;
     MPIR_Group *world_group;
-    MPIDI_STATE_DECL(MPID_STATE_GET_FAILED_GROUP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_GET_FAILED_GROUP);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_GET_FAILED_GROUP);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_GET_FAILED_GROUP);
 
     MPL_DBG_MSG_D(MPIDI_CH3_DBG_OTHER, VERBOSE, "Getting failed group with %d as last acknowledged\n", last_rank);
 
@@ -497,7 +497,7 @@ int MPIDI_CH3U_Get_failed_group(int last_rank, MPIR_Group **failed_group)
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_GET_FAILED_GROUP);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_GET_FAILED_GROUP);
     if (failed_procs)
         utarray_free(failed_procs);
     return mpi_errno;
@@ -518,9 +518,9 @@ int MPIDI_CH3U_Check_for_failed_procs(void)
     int len;
     char *kvsname;
     MPIR_Group *prev_failed_group, *new_failed_group;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS);
 
     /* FIXME: Currently this only handles failed processes in
        comm_world.  We need to fix hydra to include the pgid along
@@ -577,7 +577,7 @@ int MPIDI_CH3U_Check_for_failed_procs(void)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS);
     return mpi_errno;
 
  fn_oom: /* out-of-memory handler for utarray operations */
diff --git a/src/mpid/ch3/src/ch3u_handle_op_req.c b/src/mpid/ch3/src/ch3u_handle_op_req.c
index 6d54000..7e83418 100644
--- a/src/mpid/ch3/src/ch3u_handle_op_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_op_req.c
@@ -18,8 +18,8 @@ int MPIDI_CH3_Req_handler_rma_op_complete(MPIR_Request * sreq)
     MPIR_Request *ureq = NULL;
     MPIR_Win *win_ptr = NULL;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQ_HANDLER_RMA_OP_COMPLETE);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQ_HANDLER_RMA_OP_COMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQ_HANDLER_RMA_OP_COMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQ_HANDLER_RMA_OP_COMPLETE);
 
     if (sreq->dev.rma_target_ptr != NULL) {
         (sreq->dev.rma_target_ptr)->num_pkts_wait_for_local_completion--;
@@ -27,9 +27,9 @@ int MPIDI_CH3_Req_handler_rma_op_complete(MPIR_Request * sreq)
 
     /* get window, decrement active request cnt on window */
     MPIR_Win_get_ptr(sreq->dev.source_win_handle, win_ptr);
-    MPIU_Assert(win_ptr != NULL);
+    MPIR_Assert(win_ptr != NULL);
     MPIDI_CH3I_RMA_Active_req_cnt--;
-    MPIU_Assert(MPIDI_CH3I_RMA_Active_req_cnt >= 0);
+    MPIR_Assert(MPIDI_CH3I_RMA_Active_req_cnt >= 0);
 
     if (sreq->dev.request_handle != MPI_REQUEST_NULL) {
         /* get user request */
@@ -41,7 +41,7 @@ int MPIDI_CH3_Req_handler_rma_op_complete(MPIR_Request * sreq)
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQ_HANDLER_RMA_OP_COMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQ_HANDLER_RMA_OP_COMPLETE);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_pkt.c b/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
index a2ceb03..9f0673e 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
@@ -58,15 +58,15 @@ int MPIDI_CH3U_Handle_ordered_recv_pkt(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     int mpi_errno = MPI_SUCCESS;
     static MPIDI_CH3_PktHandler_Fcn *pktArray[MPIDI_CH3_PKT_END_CH3+1];
     static int needsInit = 1;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_ORDERED_RECV_PKT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_ORDERED_RECV_PKT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_ORDERED_RECV_PKT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_ORDERED_RECV_PKT);
 
     MPL_DBG_STMT(MPIDI_CH3_DBG_OTHER,VERBOSE,MPIDI_DBG_Print_packet(pkt));
 
     /* FIXME: We can turn this into something like
 
-       MPIU_Assert(pkt->type <= MAX_PACKET_TYPE);
+       MPIR_Assert(pkt->type <= MAX_PACKET_TYPE);
        mpi_errno = MPIDI_CH3_ProgressFunctions[pkt->type](vc,pkt,rreqp);
        
        in the progress engine itself.  Then this routine is not necessary.
@@ -77,10 +77,10 @@ int MPIDI_CH3U_Handle_ordered_recv_pkt(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 	needsInit = 0;
     }
     /* Packet type is an enum and hence >= 0 */
-    MPIU_Assert(pkt->type <= MPIDI_CH3_PKT_END_CH3);
+    MPIR_Assert(pkt->type <= MPIDI_CH3_PKT_END_CH3);
     mpi_errno = pktArray[pkt->type](vc, pkt, buflen, rreqp);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_ORDERED_RECV_PKT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_ORDERED_RECV_PKT);
     return mpi_errno;
 }
 
@@ -110,9 +110,9 @@ int MPIDI_CH3U_Receive_data_found(MPIR_Request *rreq, char *buf, intptr_t *bufle
     MPIDU_Datatype * dt_ptr = NULL;
     intptr_t data_sz;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_FOUND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_FOUND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_FOUND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_FOUND);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"posted request found");
 	
@@ -149,7 +149,7 @@ int MPIDI_CH3U_Receive_data_found(MPIR_Request *rreq, char *buf, intptr_t *bufle
             MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"Copying contiguous data to user buffer");
             /* copy data out of the receive buffer */
             if (rreq->dev.drop_data == FALSE) {
-                MPIU_Memcpy((char*)(rreq->dev.user_buf) + dt_true_lb, buf, data_sz);
+                MPIR_Memcpy((char*)(rreq->dev.user_buf) + dt_true_lb, buf, data_sz);
             }
             *buflen = data_sz;
             *complete = TRUE;
@@ -225,7 +225,7 @@ int MPIDI_CH3U_Receive_data_found(MPIR_Request *rreq, char *buf, intptr_t *bufle
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_FOUND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_FOUND);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -238,9 +238,9 @@ fn_fail:
 int MPIDI_CH3U_Receive_data_unexpected(MPIR_Request * rreq, char *buf, intptr_t *buflen, int *complete)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_UNEXPECTED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_UNEXPECTED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_UNEXPECTED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_UNEXPECTED);
 
     /* FIXME: to improve performance, allocate temporary buffer from a 
        specialized buffer pool. */
@@ -259,7 +259,7 @@ int MPIDI_CH3U_Receive_data_unexpected(MPIR_Request * rreq, char *buf, intptr_t
        now, otherwise build an iov and let the channel copy it */
     if (rreq->dev.recv_data_sz <= *buflen)
     {
-        MPIU_Memcpy(rreq->dev.tmpbuf, buf, rreq->dev.recv_data_sz);
+        MPIR_Memcpy(rreq->dev.tmpbuf, buf, rreq->dev.recv_data_sz);
         *buflen = rreq->dev.recv_data_sz;
         rreq->dev.recv_pending_count = 1;
         *complete = TRUE;
@@ -280,7 +280,7 @@ int MPIDI_CH3U_Receive_data_unexpected(MPIR_Request * rreq, char *buf, intptr_t
     rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_UnpackUEBufComplete;
 
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_UNEXPECTED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_UNEXPECTED);
     return mpi_errno;
 }
 
@@ -301,9 +301,9 @@ int MPIDI_CH3U_Post_data_receive_found(MPIR_Request * rreq)
     intptr_t userbuf_sz;
     MPIDU_Datatype * dt_ptr = NULL;
     intptr_t data_sz;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_FOUND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_FOUND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_FOUND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_FOUND);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"posted request found");
 	
@@ -360,7 +360,7 @@ int MPIDI_CH3U_Post_data_receive_found(MPIR_Request * rreq)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_FOUND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_FOUND);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -373,9 +373,9 @@ int MPIDI_CH3U_Post_data_receive_found(MPIR_Request * rreq)
 int MPIDI_CH3U_Post_data_receive_unexpected(MPIR_Request * rreq)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_UNEXPECTED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_UNEXPECTED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_UNEXPECTED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_UNEXPECTED);
 
     /* FIXME: to improve performance, allocate temporary buffer from a 
        specialized buffer pool. */
@@ -397,7 +397,7 @@ int MPIDI_CH3U_Post_data_receive_unexpected(MPIR_Request * rreq)
     rreq->dev.recv_pending_count = 2;
 
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_UNEXPECTED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_UNEXPECTED);
     return mpi_errno;
 }
 
@@ -416,9 +416,9 @@ int MPIDI_CH3U_Post_data_receive_unexpected(MPIR_Request * rreq)
 int MPIDI_CH3I_Try_acquire_win_lock(MPIR_Win *win_ptr, int requested_lock)
 {
     int existing_lock;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_TRY_ACQUIRE_WIN_LOCK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_TRY_ACQUIRE_WIN_LOCK);
     
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_TRY_ACQUIRE_WIN_LOCK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_TRY_ACQUIRE_WIN_LOCK);
 
     existing_lock = win_ptr->current_lock_type;
 
@@ -446,12 +446,12 @@ int MPIDI_CH3I_Try_acquire_win_lock(MPIR_Win *win_ptr, int requested_lock)
         if (requested_lock == MPI_LOCK_SHARED)
             win_ptr->shared_lock_ref_cnt++;
 
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_TRY_ACQUIRE_WIN_LOCK);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_TRY_ACQUIRE_WIN_LOCK);
         return 1;
     }
     else {
         /* do not grant lock */
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_TRY_ACQUIRE_WIN_LOCK);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_TRY_ACQUIRE_WIN_LOCK);
         return 0;
     }
 }
@@ -500,11 +500,11 @@ int MPIDI_CH3_PktHandler_EndCH3( MPIDI_VC_t *vc ATTRIBUTE((unused)),
 				 intptr_t *buflen ATTRIBUTE((unused)),
 				 MPIR_Request **rreqp ATTRIBUTE((unused)) )
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_ENDCH3);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_ENDCH3);
     
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_ENDCH3);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_ENDCH3);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_ENDCH3);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_ENDCH3);
 
     return MPI_SUCCESS;
 }
@@ -527,9 +527,9 @@ int MPIDI_CH3_PktHandler_Init( MPIDI_CH3_PktHandler_Fcn *pktArray[],
 			       int arraySize  )
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_INIT);
     
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_INIT);
 
     /* Check that the array is large enough */
     if (arraySize < MPIDI_CH3_PKT_END_CH3) {
@@ -626,7 +626,7 @@ int MPIDI_CH3_PktHandler_Init( MPIDI_CH3_PktHandler_Fcn *pktArray[],
         MPIDI_CH3_PktHandler_Revoke;
 
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_INIT);
     return mpi_errno;
 }
     
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_req.c b/src/mpid/ch3/src/ch3u_handle_recv_req.c
index c79082b..0d6dedb 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_req.c
@@ -19,16 +19,16 @@ int MPIDI_CH3U_Handle_recv_req(MPIDI_VC_t * vc, MPIR_Request * rreq, int *comple
     static int in_routine ATTRIBUTE((unused)) = FALSE;
     int mpi_errno = MPI_SUCCESS;
     int (*reqFn) (MPIDI_VC_t *, MPIR_Request *, int *);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ);
 
-    MPIU_Assert(in_routine == FALSE);
+    MPIR_Assert(in_routine == FALSE);
     in_routine = TRUE;
 
     reqFn = rreq->dev.OnDataAvail;
     if (!reqFn) {
-        MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_RECV);
+        MPIR_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_RECV);
         mpi_errno = MPID_Request_complete(rreq);
         if (mpi_errno != MPI_SUCCESS) {
             MPIR_ERR_POP(mpi_errno);
@@ -42,7 +42,7 @@ int MPIDI_CH3U_Handle_recv_req(MPIDI_VC_t * vc, MPIR_Request * rreq, int *comple
     in_routine = FALSE;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -96,9 +96,9 @@ int MPIDI_CH3_ReqHandler_PutRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq, i
     MPIR_Win *win_ptr;
     MPI_Win source_win_handle = rreq->dev.source_win_handle;
     MPIDI_CH3_Pkt_flags_t flags = rreq->dev.flags;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTRECVCOMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTRECVCOMPLETE);
 
     /* NOTE: It is possible that this request is already completed before
      * entering this handler. This happens when this req handler is called
@@ -138,7 +138,7 @@ int MPIDI_CH3_ReqHandler_PutRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq, i
     *complete = TRUE;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTRECVCOMPLETE);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -161,9 +161,9 @@ int MPIDI_CH3_ReqHandler_AccumRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq,
     MPI_Datatype basic_type;
     MPI_Aint predef_count, predef_dtp_size;
     MPI_Aint stream_offset;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMRECVCOMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMRECVCOMPLETE);
 
     /* NOTE: It is possible that this request is already completed before
      * entering this handler. This happens when this req handler is called
@@ -185,18 +185,18 @@ int MPIDI_CH3_ReqHandler_AccumRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq,
 
     MPIR_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);
 
-    MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_ACCUM_RECV);
+    MPIR_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_ACCUM_RECV);
 
     if (MPIR_DATATYPE_IS_PREDEFINED(rreq->dev.datatype))
         basic_type = rreq->dev.datatype;
     else {
         basic_type = rreq->dev.datatype_ptr->basic_type;
     }
-    MPIU_Assert(basic_type != MPI_DATATYPE_NULL);
+    MPIR_Assert(basic_type != MPI_DATATYPE_NULL);
 
     MPIDU_Datatype_get_size_macro(basic_type, predef_dtp_size);
     predef_count = rreq->dev.recv_data_sz / predef_dtp_size;
-    MPIU_Assert(predef_count > 0);
+    MPIR_Assert(predef_count > 0);
 
     stream_offset = 0;
     MPIDI_CH3_ExtPkt_Accum_get_stream(flags,
@@ -206,7 +206,7 @@ int MPIDI_CH3_ReqHandler_AccumRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq,
     if (win_ptr->shm_allocated == TRUE)
         MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
     /* accumulate data from tmp_buf into user_buf */
-    MPIU_Assert(predef_count == (int) predef_count);
+    MPIR_Assert(predef_count == (int) predef_count);
     mpi_errno = do_accumulate_op(rreq->dev.user_buf, (int) predef_count, basic_type,
                                  rreq->dev.real_user_buf, rreq->dev.user_count, rreq->dev.datatype,
                                  stream_offset, rreq->dev.op);
@@ -237,7 +237,7 @@ int MPIDI_CH3_ReqHandler_AccumRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq,
     *complete = TRUE;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMRECVCOMPLETE);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -268,10 +268,10 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq
     int is_empty_origin = FALSE;
     MPI_Aint extent, type_size;
     MPI_Aint stream_data_len, total_len;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMRECVCOMPLETE);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMRECVCOMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMRECVCOMPLETE);
 
     /* Judge if origin buffer is empty */
     if (rreq->dev.op == MPI_NO_OP) {
@@ -285,7 +285,7 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq
     else {
         basic_type = rreq->dev.datatype_ptr->basic_type;
     }
-    MPIU_Assert(basic_type != MPI_DATATYPE_NULL);
+    MPIR_Assert(basic_type != MPI_DATATYPE_NULL);
 
     stream_offset = 0;
     MPIDI_CH3_ExtPkt_Gaccum_get_stream(rreq->dev.flags,
@@ -301,7 +301,7 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq
                                (MPIDI_CH3U_SRBuf_size / extent) * predef_dtp_size);
 
     predef_count = stream_data_len / predef_dtp_size;
-    MPIU_Assert(predef_count > 0);
+    MPIR_Assert(predef_count > 0);
 
     MPIDI_Pkt_init(get_accum_resp_pkt, MPIDI_CH3_PKT_GET_ACCUM_RESP);
     get_accum_resp_pkt->request_handle = rreq->dev.resp_request_handle;
@@ -320,10 +320,10 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq
 
     resp_req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
     MPIR_ERR_CHKANDJUMP(resp_req == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");
-    MPIU_Object_set_ref(resp_req, 1);
+    MPIR_Object_set_ref(resp_req, 1);
     MPIDI_Request_set_type(resp_req, MPIDI_REQUEST_TYPE_GET_ACCUM_RESP);
 
-    MPIU_CHKPMEM_MALLOC(resp_req->dev.user_buf, void *, stream_data_len,
+    MPIR_CHKPMEM_MALLOC(resp_req->dev.user_buf, void *, stream_data_len,
                         mpi_errno, "GACC resp. buffer");
 
     /* NOTE: 'copy data + ACC' needs to be atomic */
@@ -334,7 +334,7 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq
     /* Copy data from target window to temporary buffer */
 
     if (is_contig) {
-        MPIU_Memcpy(resp_req->dev.user_buf,
+        MPIR_Memcpy(resp_req->dev.user_buf,
                     (void *) ((char *) rreq->dev.real_user_buf + dt_true_lb +
                               stream_offset), stream_data_len);
     }
@@ -356,7 +356,7 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq
     }
 
     /* accumulate data from tmp_buf into user_buf */
-    MPIU_Assert(predef_count == (int) predef_count);
+    MPIR_Assert(predef_count == (int) predef_count);
     mpi_errno = do_accumulate_op(rreq->dev.user_buf, (int) predef_count, basic_type,
                                  rreq->dev.real_user_buf, rreq->dev.user_count, rreq->dev.datatype,
                                  stream_offset, rreq->dev.op);
@@ -391,7 +391,7 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq
     /* Mark get portion as handled */
     rreq->dev.resp_request_handle = MPI_REQUEST_NULL;
 
-    MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_GET_ACCUM_RECV);
+    MPIR_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_GET_ACCUM_RECV);
 
     if (is_empty_origin == FALSE) {
         /* free the temporary buffer.
@@ -408,13 +408,13 @@ int MPIDI_CH3_ReqHandler_GaccumRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq
 
     *complete = TRUE;
   fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMRECVCOMPLETE);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMRECVCOMPLETE);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
@@ -436,17 +436,17 @@ int MPIDI_CH3_ReqHandler_FOPRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq, i
     MPIDI_CH3_Pkt_fop_resp_t *fop_resp_pkt = &upkt.fop_resp;
     int is_contig;
     int is_empty_origin = FALSE;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPRECVCOMPLETE);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPRECVCOMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPRECVCOMPLETE);
 
     /* Judge if origin buffer is empty */
     if (rreq->dev.op == MPI_NO_OP) {
         is_empty_origin = TRUE;
     }
 
-    MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_FOP_RECV);
+    MPIR_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_FOP_RECV);
 
     MPIR_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);
 
@@ -458,13 +458,13 @@ int MPIDI_CH3_ReqHandler_FOPRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq, i
     resp_req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
     MPIR_ERR_CHKANDJUMP(resp_req == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");
     MPIDI_Request_set_type(resp_req, MPIDI_REQUEST_TYPE_FOP_RESP);
-    MPIU_Object_set_ref(resp_req, 1);
+    MPIR_Object_set_ref(resp_req, 1);
     resp_req->dev.OnFinal = MPIDI_CH3_ReqHandler_FOPSendComplete;
     resp_req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_FOPSendComplete;
     resp_req->dev.target_win_handle = rreq->dev.target_win_handle;
     resp_req->dev.flags = rreq->dev.flags;
 
-    MPIU_CHKPMEM_MALLOC(resp_req->dev.user_buf, void *, type_size, mpi_errno, "FOP resp. buffer");
+    MPIR_CHKPMEM_MALLOC(resp_req->dev.user_buf, void *, type_size, mpi_errno, "FOP resp. buffer");
 
     /* here we increment the Active Target counter to guarantee the GET-like
      * operation are completed when counter reaches zero. */
@@ -477,7 +477,7 @@ int MPIDI_CH3_ReqHandler_FOPRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq, i
 
     /* Copy data into a temporary buffer in response request */
     if (is_contig) {
-        MPIU_Memcpy(resp_req->dev.user_buf, rreq->dev.real_user_buf, type_size);
+        MPIR_Memcpy(resp_req->dev.user_buf, rreq->dev.real_user_buf, type_size);
     }
     else {
         MPIDU_Segment *seg = MPIDU_Segment_alloc();
@@ -544,12 +544,12 @@ int MPIDI_CH3_ReqHandler_FOPRecvComplete(MPIDI_VC_t * vc, MPIR_Request * rreq, i
     *complete = TRUE;
 
   fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPRECVCOMPLETE);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPRECVCOMPLETE);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
@@ -565,9 +565,9 @@ int MPIDI_CH3_ReqHandler_PutDerivedDTRecvComplete(MPIDI_VC_t * vc ATTRIBUTE((unu
     int mpi_errno = MPI_SUCCESS;
     MPIDU_Datatype*new_dtp = NULL;
     MPIDI_RMA_dtype_info *dtype_info = NULL;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTDERIVEDDTRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTDERIVEDDTRECVCOMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTDERIVEDDTRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTDERIVEDDTRECVCOMPLETE);
 
     /* get data from extended header */
     dtype_info = &((MPIDI_CH3_Ext_pkt_put_derived_t *) rreq->dev.ext_hdr_ptr)->dtype_info;
@@ -599,7 +599,7 @@ int MPIDI_CH3_ReqHandler_PutDerivedDTRecvComplete(MPIDI_VC_t * vc ATTRIBUTE((unu
 
     *complete = FALSE;
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTDERIVEDDTRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_PUTDERIVEDDTRECVCOMPLETE);
     return mpi_errno;
 }
 
@@ -618,12 +618,12 @@ int MPIDI_CH3_ReqHandler_AccumMetadataRecvComplete(MPIDI_VC_t * vc ATTRIBUTE((un
     MPI_Aint stream_offset;
     MPI_Aint type_size;
     MPI_Datatype basic_dtp;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMMETADATARECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMMETADATARECVCOMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMMETADATARECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMMETADATARECVCOMPLETE);
 
     stream_offset = 0;
-    MPIU_Assert(rreq->dev.ext_hdr_ptr != NULL);
+    MPIR_Assert(rreq->dev.ext_hdr_ptr != NULL);
 
     if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_ACCUM_RECV_DERIVED_DT) {
         /* get data from extended header */
@@ -645,7 +645,7 @@ int MPIDI_CH3_ReqHandler_AccumMetadataRecvComplete(MPIDI_VC_t * vc ATTRIBUTE((un
         /* update new request to get the data */
         MPIDI_Request_set_type(rreq, MPIDI_REQUEST_TYPE_ACCUM_RECV);
 
-        MPIU_Assert(rreq->dev.datatype == MPI_DATATYPE_NULL);
+        MPIR_Assert(rreq->dev.datatype == MPI_DATATYPE_NULL);
         rreq->dev.datatype = new_dtp->handle;
         rreq->dev.datatype_ptr = new_dtp;
 
@@ -654,8 +654,8 @@ int MPIDI_CH3_ReqHandler_AccumMetadataRecvComplete(MPIDI_VC_t * vc ATTRIBUTE((un
         basic_dtp = new_dtp->basic_type;
     }
     else {
-        MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_ACCUM_RECV);
-        MPIU_Assert(rreq->dev.datatype != MPI_DATATYPE_NULL);
+        MPIR_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_ACCUM_RECV);
+        MPIR_Assert(rreq->dev.datatype != MPI_DATATYPE_NULL);
 
         /* get data from extended header */
         if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_STREAM) {
@@ -672,7 +672,7 @@ int MPIDI_CH3_ReqHandler_AccumMetadataRecvComplete(MPIDI_VC_t * vc ATTRIBUTE((un
     MPIDU_Datatype_get_size_macro(basic_dtp, basic_type_size);
     MPIDU_Datatype_get_extent_macro(basic_dtp, basic_type_extent);
 
-    MPIU_Assert(!MPIDI_Request_get_srbuf_flag(rreq));
+    MPIR_Assert(!MPIDI_Request_get_srbuf_flag(rreq));
     /* allocate a SRBuf for receiving stream unit */
     MPIDI_CH3U_SRBuf_alloc(rreq, MPIDI_CH3U_SRBuf_size);
     /* --BEGIN ERROR HANDLING-- */
@@ -713,7 +713,7 @@ int MPIDI_CH3_ReqHandler_AccumMetadataRecvComplete(MPIDI_VC_t * vc ATTRIBUTE((un
 
     *complete = FALSE;
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMMETADATARECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_ACCUMMETADATARECVCOMPLETE);
     return mpi_errno;
 }
 
@@ -734,9 +734,9 @@ int MPIDI_CH3_ReqHandler_GaccumMetadataRecvComplete(MPIDI_VC_t * vc,
     MPI_Aint type_size;
     MPI_Datatype basic_dtp;
     int is_empty_origin = FALSE;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMMETADATARECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMMETADATARECVCOMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMMETADATARECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMMETADATARECVCOMPLETE);
 
     /* Judge if origin buffer is empty */
     if (rreq->dev.op == MPI_NO_OP) {
@@ -744,7 +744,7 @@ int MPIDI_CH3_ReqHandler_GaccumMetadataRecvComplete(MPIDI_VC_t * vc,
     }
 
     stream_offset = 0;
-    MPIU_Assert(rreq->dev.ext_hdr_ptr != NULL);
+    MPIR_Assert(rreq->dev.ext_hdr_ptr != NULL);
 
     if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_GET_ACCUM_RECV_DERIVED_DT) {
         /* get data from extended header */
@@ -766,7 +766,7 @@ int MPIDI_CH3_ReqHandler_GaccumMetadataRecvComplete(MPIDI_VC_t * vc,
         /* update new request to get the data */
         MPIDI_Request_set_type(rreq, MPIDI_REQUEST_TYPE_GET_ACCUM_RECV);
 
-        MPIU_Assert(rreq->dev.datatype == MPI_DATATYPE_NULL);
+        MPIR_Assert(rreq->dev.datatype == MPI_DATATYPE_NULL);
         rreq->dev.datatype = new_dtp->handle;
         rreq->dev.datatype_ptr = new_dtp;
 
@@ -775,8 +775,8 @@ int MPIDI_CH3_ReqHandler_GaccumMetadataRecvComplete(MPIDI_VC_t * vc,
         basic_dtp = new_dtp->basic_type;
     }
     else {
-        MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_GET_ACCUM_RECV);
-        MPIU_Assert(rreq->dev.datatype != MPI_DATATYPE_NULL);
+        MPIR_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_GET_ACCUM_RECV);
+        MPIR_Assert(rreq->dev.datatype != MPI_DATATYPE_NULL);
 
         /* get data from extended header */
         if (rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_STREAM) {
@@ -802,7 +802,7 @@ int MPIDI_CH3_ReqHandler_GaccumMetadataRecvComplete(MPIDI_VC_t * vc,
         MPIDU_Datatype_get_size_macro(basic_dtp, basic_type_size);
         MPIDU_Datatype_get_extent_macro(basic_dtp, basic_type_extent);
 
-        MPIU_Assert(!MPIDI_Request_get_srbuf_flag(rreq));
+        MPIR_Assert(!MPIDI_Request_get_srbuf_flag(rreq));
         /* allocate a SRBuf for receiving stream unit */
         MPIDI_CH3U_SRBuf_alloc(rreq, MPIDI_CH3U_SRBuf_size);
         /* --BEGIN ERROR HANDLING-- */
@@ -845,7 +845,7 @@ int MPIDI_CH3_ReqHandler_GaccumMetadataRecvComplete(MPIDI_VC_t * vc,
     }
 
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMMETADATARECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMMETADATARECVCOMPLETE);
     return mpi_errno;
 }
 
@@ -865,13 +865,13 @@ int MPIDI_CH3_ReqHandler_GetDerivedDTRecvComplete(MPIDI_VC_t * vc,
     MPIDI_CH3_Pkt_get_resp_t *get_resp_pkt = &upkt.get_resp;
     MPIR_Request *sreq;
     MPIR_Win *win_ptr;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GETDERIVEDDTRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GETDERIVEDDTRECVCOMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GETDERIVEDDTRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GETDERIVEDDTRECVCOMPLETE);
 
     MPIR_Win_get_ptr(rreq->dev.target_win_handle, win_ptr);
 
-    MPIU_Assert(!(rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP));
+    MPIR_Assert(!(rreq->dev.flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP));
 
     /* get data from extended header */
     dtype_info = &((MPIDI_CH3_Ext_pkt_get_derived_t *) rreq->dev.ext_hdr_ptr)->dtype_info;
@@ -935,7 +935,7 @@ int MPIDI_CH3_ReqHandler_GetDerivedDTRecvComplete(MPIDI_VC_t * vc,
     *complete = TRUE;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_GETDERIVEDDTRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_GETDERIVEDDTRECVCOMPLETE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -951,9 +951,9 @@ int MPIDI_CH3_ReqHandler_UnpackUEBufComplete(MPIDI_VC_t * vc ATTRIBUTE((unused))
 {
     int recv_pending;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKUEBUFCOMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKUEBUFCOMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKUEBUFCOMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKUEBUFCOMPLETE);
 
     MPIDI_Request_decr_pending(rreq);
     MPIDI_Request_check_pending(rreq, &recv_pending);
@@ -977,7 +977,7 @@ int MPIDI_CH3_ReqHandler_UnpackUEBufComplete(MPIDI_VC_t * vc ATTRIBUTE((unused))
     *complete = TRUE;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKUEBUFCOMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKUEBUFCOMPLETE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -990,9 +990,9 @@ int MPIDI_CH3_ReqHandler_UnpackUEBufComplete(MPIDI_VC_t * vc ATTRIBUTE((unused))
 int MPIDI_CH3_ReqHandler_UnpackSRBufComplete(MPIDI_VC_t * vc, MPIR_Request * rreq, int *complete)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFCOMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFCOMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFCOMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFCOMPLETE);
 
     MPIDI_CH3U_Request_unpack_srbuf(rreq);
 
@@ -1019,7 +1019,7 @@ int MPIDI_CH3_ReqHandler_UnpackSRBufComplete(MPIDI_VC_t * vc, MPIR_Request * rre
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFCOMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFCOMPLETE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -1033,9 +1033,9 @@ int MPIDI_CH3_ReqHandler_UnpackSRBufReloadIOV(MPIDI_VC_t * vc ATTRIBUTE((unused)
                                               MPIR_Request * rreq, int *complete)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFRELOADIOV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFRELOADIOV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFRELOADIOV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFRELOADIOV);
 
     MPIDI_CH3U_Request_unpack_srbuf(rreq);
     mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
@@ -1044,7 +1044,7 @@ int MPIDI_CH3_ReqHandler_UnpackSRBufReloadIOV(MPIDI_VC_t * vc ATTRIBUTE((unused)
     }
     *complete = FALSE;
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFRELOADIOV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_UNPACKSRBUFRELOADIOV);
     return mpi_errno;
 }
 
@@ -1056,9 +1056,9 @@ int MPIDI_CH3_ReqHandler_ReloadIOV(MPIDI_VC_t * vc ATTRIBUTE((unused)),
                                    MPIR_Request * rreq, int *complete)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_RELOADIOV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_RELOADIOV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_RELOADIOV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_RELOADIOV);
 
     mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
     if (mpi_errno != MPI_SUCCESS) {
@@ -1066,7 +1066,7 @@ int MPIDI_CH3_ReqHandler_ReloadIOV(MPIDI_VC_t * vc ATTRIBUTE((unused)),
     }
     *complete = FALSE;
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_RELOADIOV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_RELOADIOV);
     return mpi_errno;
 }
 
@@ -1083,12 +1083,12 @@ static int create_derived_datatype(MPIR_Request * req, MPIDI_RMA_dtype_info * dt
     MPIDU_Datatype*new_dtp;
     int mpi_errno = MPI_SUCCESS;
     MPI_Aint ptrdiff;
-    MPIDI_STATE_DECL(MPID_STATE_CREATE_DERIVED_DATATYPE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CREATE_DERIVED_DATATYPE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CREATE_DERIVED_DATATYPE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CREATE_DERIVED_DATATYPE);
 
     /* allocate new datatype object and handle */
-    new_dtp = (MPIDU_Datatype*) MPIU_Handle_obj_alloc(&MPIDU_Datatype_mem);
+    new_dtp = (MPIDU_Datatype*) MPIR_Handle_obj_alloc(&MPIDU_Datatype_mem);
     if (!new_dtp) {
         MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s",
                              "MPIDU_Datatype_mem");
@@ -1096,8 +1096,8 @@ static int create_derived_datatype(MPIR_Request * req, MPIDI_RMA_dtype_info * dt
 
     *dtp = new_dtp;
 
-    /* Note: handle is filled in by MPIU_Handle_obj_alloc() */
-    MPIU_Object_set_ref(new_dtp, 1);
+    /* Note: handle is filled in by MPIR_Handle_obj_alloc() */
+    MPIR_Object_set_ref(new_dtp, 1);
     new_dtp->is_permanent = 0;
     new_dtp->is_committed = 1;
     new_dtp->attributes = 0;
@@ -1131,7 +1131,7 @@ static int create_derived_datatype(MPIR_Request * req, MPIDI_RMA_dtype_info * dt
     new_dtp->contents = NULL;
 
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_CREATE_DERIVED_DATATYPE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CREATE_DERIVED_DATATYPE);
 
     return mpi_errno;
 }
@@ -1144,10 +1144,10 @@ static inline int perform_put_in_lock_queue(MPIR_Win * win_ptr,
     int mpi_errno = MPI_SUCCESS;
 
     /* Piggyback candidate should have basic datatype for target datatype. */
-    MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(put_pkt->datatype));
+    MPIR_Assert(MPIR_DATATYPE_IS_PREDEFINED(put_pkt->datatype));
 
     /* Make sure that all data is received for this op. */
-    MPIU_Assert(target_lock_entry->all_data_recved == 1);
+    MPIR_Assert(target_lock_entry->all_data_recved == 1);
 
     if (put_pkt->type == MPIDI_CH3_PKT_PUT_IMMED) {
         /* all data fits in packet header */
@@ -1157,7 +1157,7 @@ static inline int perform_put_in_lock_queue(MPIR_Win * win_ptr,
             MPIR_ERR_POP(mpi_errno);
     }
     else {
-        MPIU_Assert(put_pkt->type == MPIDI_CH3_PKT_PUT);
+        MPIR_Assert(put_pkt->type == MPIDI_CH3_PKT_PUT);
 
         mpi_errno = MPIR_Localcopy(target_lock_entry->data, put_pkt->count, put_pkt->datatype,
                                    put_pkt->addr, put_pkt->count, put_pkt->datatype);
@@ -1193,16 +1193,16 @@ static inline int perform_get_in_lock_queue(MPIR_Win * win_ptr,
     int mpi_errno = MPI_SUCCESS;
 
     /* Piggyback candidate should have basic datatype for target datatype. */
-    MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(get_pkt->datatype));
+    MPIR_Assert(MPIR_DATATYPE_IS_PREDEFINED(get_pkt->datatype));
 
     /* Make sure that all data is received for this op. */
-    MPIU_Assert(target_lock_entry->all_data_recved == 1);
+    MPIR_Assert(target_lock_entry->all_data_recved == 1);
 
     sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
     if (sreq == NULL) {
         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**nomemreq");
     }
-    MPIU_Object_set_ref(sreq, 1);
+    MPIR_Object_set_ref(sreq, 1);
 
     MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_GET_RESP);
     sreq->kind = MPIR_REQUEST_KIND__SEND;
@@ -1234,7 +1234,7 @@ static inline int perform_get_in_lock_queue(MPIR_Win * win_ptr,
 
     /* length of target data */
     MPIDU_Datatype_get_size_macro(get_pkt->datatype, type_size);
-    MPIU_Assign_trunc(len, get_pkt->count * type_size, size_t);
+    MPIR_Assign_trunc(len, get_pkt->count * type_size, size_t);
 
     MPIDU_Datatype_is_contig(get_pkt->datatype, &is_contig);
 
@@ -1299,10 +1299,10 @@ static inline int perform_acc_in_lock_queue(MPIR_Win * win_ptr,
     MPIDI_CH3_Pkt_accum_t *acc_pkt = &((target_lock_entry->pkt).accum);
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(target_lock_entry->all_data_recved == 1);
+    MPIR_Assert(target_lock_entry->all_data_recved == 1);
 
     /* Piggyback candidate should have basic datatype for target datatype. */
-    MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(acc_pkt->datatype));
+    MPIR_Assert(MPIR_DATATYPE_IS_PREDEFINED(acc_pkt->datatype));
 
     if (win_ptr->shm_allocated == TRUE)
         MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
@@ -1314,7 +1314,7 @@ static inline int perform_acc_in_lock_queue(MPIR_Win * win_ptr,
                                      0, acc_pkt->op);
     }
     else {
-        MPIU_Assert(acc_pkt->type == MPIDI_CH3_PKT_ACCUMULATE);
+        MPIR_Assert(acc_pkt->type == MPIDI_CH3_PKT_ACCUMULATE);
         MPI_Aint type_size, type_extent;
         MPI_Aint total_len, recv_count;
 
@@ -1323,11 +1323,11 @@ static inline int perform_acc_in_lock_queue(MPIR_Win * win_ptr,
 
         total_len = type_size * acc_pkt->count;
         recv_count = MPL_MIN((total_len / type_size), (MPIDI_CH3U_SRBuf_size / type_extent));
-        MPIU_Assert(recv_count > 0);
+        MPIR_Assert(recv_count > 0);
 
         /* Note: here stream_offset is 0 because when piggybacking LOCK, we must use
          * the first stream unit. */
-        MPIU_Assert(recv_count == (int) recv_count);
+        MPIR_Assert(recv_count == (int) recv_count);
         mpi_errno = do_accumulate_op(target_lock_entry->data, (int) recv_count, acc_pkt->datatype,
                                      acc_pkt->addr, acc_pkt->count, acc_pkt->datatype,
                                      0, acc_pkt->op);
@@ -1369,16 +1369,16 @@ static inline int perform_get_acc_in_lock_queue(MPIR_Win * win_ptr,
     MPI_Aint total_len, recv_count;
 
     /* Piggyback candidate should have basic datatype for target datatype. */
-    MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(get_accum_pkt->datatype));
+    MPIR_Assert(MPIR_DATATYPE_IS_PREDEFINED(get_accum_pkt->datatype));
 
     /* Make sure that all data is received for this op. */
-    MPIU_Assert(target_lock_entry->all_data_recved == 1);
+    MPIR_Assert(target_lock_entry->all_data_recved == 1);
 
     sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
     if (sreq == NULL) {
         MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**nomemreq");
     }
-    MPIU_Object_set_ref(sreq, 1);
+    MPIR_Object_set_ref(sreq, 1);
 
     MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_GET_ACCUM_RESP);
     sreq->kind = MPIR_REQUEST_KIND__SEND;
@@ -1392,7 +1392,7 @@ static inline int perform_get_acc_in_lock_queue(MPIR_Win * win_ptr,
     MPIDU_Datatype_get_size_macro(get_accum_pkt->datatype, type_size);
 
     /* length of target data */
-    MPIU_Assign_trunc(len, get_accum_pkt->count * type_size, size_t);
+    MPIR_Assign_trunc(len, get_accum_pkt->count * type_size, size_t);
 
     if (get_accum_pkt->type == MPIDI_CH3_PKT_GET_ACCUM_IMMED) {
         MPIDI_Pkt_init(get_accum_resp_pkt, MPIDI_CH3_PKT_GET_ACCUM_RESP_IMMED);
@@ -1455,13 +1455,13 @@ static inline int perform_get_acc_in_lock_queue(MPIR_Win * win_ptr,
         goto fn_exit;
     }
 
-    MPIU_Assert(get_accum_pkt->type == MPIDI_CH3_PKT_GET_ACCUM);
+    MPIR_Assert(get_accum_pkt->type == MPIDI_CH3_PKT_GET_ACCUM);
 
     MPIDU_Datatype_get_extent_macro(get_accum_pkt->datatype, type_extent);
 
     total_len = type_size * get_accum_pkt->count;
     recv_count = MPL_MIN((total_len / type_size), (MPIDI_CH3U_SRBuf_size / type_extent));
-    MPIU_Assert(recv_count > 0);
+    MPIR_Assert(recv_count > 0);
 
     sreq->dev.user_buf = (void *) MPL_malloc(recv_count * type_size);
 
@@ -1478,7 +1478,7 @@ static inline int perform_get_acc_in_lock_queue(MPIR_Win * win_ptr,
      * the first stream unit. */
 
     if (is_contig) {
-        MPIU_Memcpy(sreq->dev.user_buf, get_accum_pkt->addr, recv_count * type_size);
+        MPIR_Memcpy(sreq->dev.user_buf, get_accum_pkt->addr, recv_count * type_size);
     }
     else {
         MPIDU_Segment *seg = MPIDU_Segment_alloc();
@@ -1499,7 +1499,7 @@ static inline int perform_get_acc_in_lock_queue(MPIR_Win * win_ptr,
 
     /* Perform ACCUMULATE OP */
 
-    MPIU_Assert(recv_count == (int) recv_count);
+    MPIR_Assert(recv_count == (int) recv_count);
     mpi_errno = do_accumulate_op(target_lock_entry->data, (int) recv_count, get_accum_pkt->datatype,
                                  get_accum_pkt->addr, get_accum_pkt->count, get_accum_pkt->datatype,
                                  0, get_accum_pkt->op);
@@ -1558,10 +1558,10 @@ static inline int perform_fop_in_lock_queue(MPIR_Win * win_ptr,
     int mpi_errno = MPI_SUCCESS;
 
     /* Piggyback candidate should have basic datatype for target datatype. */
-    MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(fop_pkt->datatype));
+    MPIR_Assert(MPIR_DATATYPE_IS_PREDEFINED(fop_pkt->datatype));
 
     /* Make sure that all data is received for this op. */
-    MPIU_Assert(target_lock_entry->all_data_recved == 1);
+    MPIR_Assert(target_lock_entry->all_data_recved == 1);
 
     /* FIXME: this function is same with PktHandler_FOP(), should
      * do code refactoring on both of them. */
@@ -1592,7 +1592,7 @@ static inline int perform_fop_in_lock_queue(MPIR_Win * win_ptr,
         if (resp_req == NULL) {
             MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**nomemreq");
         }
-        MPIU_Object_set_ref(resp_req, 1);
+        MPIR_Object_set_ref(resp_req, 1);
 
         resp_req->dev.OnDataAvail = MPIDI_CH3_ReqHandler_FOPSendComplete;
         resp_req->dev.OnFinal = MPIDI_CH3_ReqHandler_FOPSendComplete;
@@ -1625,7 +1625,7 @@ static inline int perform_fop_in_lock_queue(MPIR_Win * win_ptr,
         }
     }
     else if (is_contig) {
-        MPIU_Memcpy(resp_req->dev.user_buf, fop_pkt->addr, type_size);
+        MPIR_Memcpy(resp_req->dev.user_buf, fop_pkt->addr, type_size);
     }
     else {
         MPIDU_Segment *seg = MPIDU_Segment_alloc();
@@ -1726,10 +1726,10 @@ static inline int perform_cas_in_lock_queue(MPIR_Win * win_ptr,
     int mpi_errno = MPI_SUCCESS;
 
     /* Piggyback candidate should have basic datatype for target datatype. */
-    MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(cas_pkt->datatype));
+    MPIR_Assert(MPIR_DATATYPE_IS_PREDEFINED(cas_pkt->datatype));
 
     /* Make sure that all data is received for this op. */
-    MPIU_Assert(target_lock_entry->all_data_recved == 1);
+    MPIR_Assert(target_lock_entry->all_data_recved == 1);
 
     MPIDI_Pkt_init(cas_resp_pkt, MPIDI_CH3_PKT_CAS_RESP_IMMED);
     cas_resp_pkt->request_handle = cas_pkt->request_handle;
@@ -1744,16 +1744,16 @@ static inline int perform_cas_in_lock_queue(MPIR_Win * win_ptr,
 
     /* Copy old value into the response packet */
     MPIDU_Datatype_get_size_macro(cas_pkt->datatype, len);
-    MPIU_Assert(len <= sizeof(MPIDI_CH3_CAS_Immed_u));
+    MPIR_Assert(len <= sizeof(MPIDI_CH3_CAS_Immed_u));
 
     if (win_ptr->shm_allocated == TRUE)
         MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
 
-    MPIU_Memcpy((void *) &cas_resp_pkt->info.data, cas_pkt->addr, len);
+    MPIR_Memcpy((void *) &cas_resp_pkt->info.data, cas_pkt->addr, len);
 
     /* Compare and replace if equal */
     if (MPIR_Compare_equal(&cas_pkt->compare_data, cas_pkt->addr, cas_pkt->datatype)) {
-        MPIU_Memcpy(cas_pkt->addr, &cas_pkt->origin_data, len);
+        MPIR_Memcpy(cas_pkt->addr, &cas_pkt->origin_data, len);
     }
 
     if (win_ptr->shm_allocated == TRUE)
@@ -1891,9 +1891,9 @@ int MPIDI_CH3I_Release_lock(MPIR_Win * win_ptr)
 {
     MPIDI_RMA_Target_lock_entry_t *target_lock_entry, *target_lock_entry_next;
     int requested_lock, mpi_errno = MPI_SUCCESS, temp_entered_count;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RELEASE_LOCK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_RELEASE_LOCK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RELEASE_LOCK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_RELEASE_LOCK);
 
     if (win_ptr->current_lock_type == MPI_LOCK_SHARED) {
         /* decr ref cnt */
@@ -1944,7 +1944,7 @@ int MPIDI_CH3I_Release_lock(MPIR_Win * win_ptr)
                     if (flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED)
                         requested_lock = MPI_LOCK_SHARED;
                     else {
-                        MPIU_Assert(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE);
+                        MPIR_Assert(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE);
                         requested_lock = MPI_LOCK_EXCLUSIVE;
                     }
                     if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, requested_lock) == 1) {
@@ -1976,7 +1976,7 @@ int MPIDI_CH3I_Release_lock(MPIR_Win * win_ptr)
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_RELEASE_LOCK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_RELEASE_LOCK);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -1997,9 +1997,9 @@ int MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete(MPIDI_VC_t * vc,
     MPIDI_CH3_Pkt_flags_t flags;
     MPIDI_RMA_Target_lock_entry_t *target_lock_queue_entry = rreq->dev.target_lock_queue_entry;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_PIGGYBACKLOCKOPRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_PIGGYBACKLOCKOPRECVCOMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_PIGGYBACKLOCKOPRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_PIGGYBACKLOCKOPRECVCOMPLETE);
 
     /* This handler is triggered when we received all data of a lock queue
      * entry */
@@ -2020,7 +2020,7 @@ int MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete(MPIDI_VC_t * vc,
         if (flags & MPIDI_CH3_PKT_FLAG_RMA_STREAM &&
             (rreq->dev.target_lock_queue_entry)->data != NULL) {
 
-            MPIU_Assert(target_lock_queue_entry->pkt.type == MPIDI_CH3_PKT_ACCUMULATE ||
+            MPIR_Assert(target_lock_queue_entry->pkt.type == MPIDI_CH3_PKT_ACCUMULATE ||
                         target_lock_queue_entry->pkt.type == MPIDI_CH3_PKT_GET_ACCUM);
 
             int ext_hdr_sz;
@@ -2044,7 +2044,7 @@ int MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete(MPIDI_VC_t * vc,
             requested_lock = MPI_LOCK_SHARED;
         }
         else {
-            MPIU_Assert(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE);
+            MPIR_Assert(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE);
             requested_lock = MPI_LOCK_EXCLUSIVE;
         }
 
@@ -2077,7 +2077,7 @@ int MPIDI_CH3_ReqHandler_PiggybackLockOpRecvComplete(MPIDI_VC_t * vc,
     *complete = TRUE;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_PIGGYBACKLOCKOPRECVCOMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_PIGGYBACKLOCKOPRECVCOMPLETE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/src/ch3u_handle_send_req.c b/src/mpid/ch3/src/ch3u_handle_send_req.c
index 2de7dce..cbdad6e 100644
--- a/src/mpid/ch3/src/ch3u_handle_send_req.c
+++ b/src/mpid/ch3/src/ch3u_handle_send_req.c
@@ -15,15 +15,15 @@ int MPIDI_CH3U_Handle_send_req(MPIDI_VC_t * vc, MPIR_Request * sreq, int *comple
 {
     int mpi_errno = MPI_SUCCESS;
     int (*reqFn) (MPIDI_VC_t *, MPIR_Request *, int *);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ);
 
     /* Use the associated function rather than switching on the old ca field */
     /* Routines can call the attached function directly */
     reqFn = sreq->dev.OnDataAvail;
     if (!reqFn) {
-        MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
+        MPIR_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
         mpi_errno = MPID_Request_complete(sreq);
         *complete = 1;
     }
@@ -35,7 +35,7 @@ int MPIDI_CH3U_Handle_send_req(MPIDI_VC_t * vc, MPIR_Request * sreq, int *comple
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -78,7 +78,7 @@ int MPIDI_CH3_ReqHandler_GetSendComplete(MPIDI_VC_t * vc ATTRIBUTE((unused)),
     /* here we decrement the Active Target counter to guarantee the GET-like
      * operation are completed when counter reaches zero. */
     win_ptr->at_completion_counter--;
-    MPIU_Assert(win_ptr->at_completion_counter >= 0);
+    MPIR_Assert(win_ptr->at_completion_counter >= 0);
 
     /* mark data transfer as complete and decrement CC */
     mpi_errno = MPID_Request_complete(sreq);
@@ -112,9 +112,9 @@ int MPIDI_CH3_ReqHandler_GaccumSendComplete(MPIDI_VC_t * vc, MPIR_Request * rreq
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr;
     MPIDI_CH3_Pkt_flags_t flags = rreq->dev.flags;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMSENDCOMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMSENDCOMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMSENDCOMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMSENDCOMPLETE);
 
     /* NOTE: It is possible that this request is already completed before
      * entering this handler. This happens when this req handler is called
@@ -145,7 +145,7 @@ int MPIDI_CH3_ReqHandler_GaccumSendComplete(MPIDI_VC_t * vc, MPIR_Request * rreq
     /* here we decrement the Active Target counter to guarantee the GET-like
      * operation are completed when counter reaches zero. */
     win_ptr->at_completion_counter--;
-    MPIU_Assert(win_ptr->at_completion_counter >= 0);
+    MPIR_Assert(win_ptr->at_completion_counter >= 0);
 
     mpi_errno = MPID_Request_complete(rreq);
     if (mpi_errno != MPI_SUCCESS) {
@@ -164,7 +164,7 @@ int MPIDI_CH3_ReqHandler_GaccumSendComplete(MPIDI_VC_t * vc, MPIR_Request * rreq
     *complete = TRUE;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMSENDCOMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_GACCUMSENDCOMPLETE);
     return mpi_errno;
 
   fn_fail:
@@ -181,9 +181,9 @@ int MPIDI_CH3_ReqHandler_CASSendComplete(MPIDI_VC_t * vc, MPIR_Request * rreq, i
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr;
     MPIDI_CH3_Pkt_flags_t flags = rreq->dev.flags;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_CASSENDCOMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_CASSENDCOMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_CASSENDCOMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_CASSENDCOMPLETE);
 
     /* NOTE: It is possible that this request is already completed before
      * entering this handler. This happens when this req handler is called
@@ -214,7 +214,7 @@ int MPIDI_CH3_ReqHandler_CASSendComplete(MPIDI_VC_t * vc, MPIR_Request * rreq, i
     /* here we decrement the Active Target counter to guarantee the GET-like
      * operation are completed when counter reaches zero. */
     win_ptr->at_completion_counter--;
-    MPIU_Assert(win_ptr->at_completion_counter >= 0);
+    MPIR_Assert(win_ptr->at_completion_counter >= 0);
 
     mpi_errno = MPID_Request_complete(rreq);
     if (mpi_errno != MPI_SUCCESS) {
@@ -233,7 +233,7 @@ int MPIDI_CH3_ReqHandler_CASSendComplete(MPIDI_VC_t * vc, MPIR_Request * rreq, i
     *complete = TRUE;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_CASSENDCOMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_CASSENDCOMPLETE);
     return mpi_errno;
 
   fn_fail:
@@ -249,9 +249,9 @@ int MPIDI_CH3_ReqHandler_FOPSendComplete(MPIDI_VC_t * vc, MPIR_Request * rreq, i
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr;
     MPIDI_CH3_Pkt_flags_t flags = rreq->dev.flags;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPSENDCOMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPSENDCOMPLETE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPSENDCOMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPSENDCOMPLETE);
 
     /* NOTE: It is possible that this request is already completed before
      * entering this handler. This happens when this req handler is called
@@ -282,7 +282,7 @@ int MPIDI_CH3_ReqHandler_FOPSendComplete(MPIDI_VC_t * vc, MPIR_Request * rreq, i
     /* here we decrement the Active Target counter to guarantee the GET-like
      * operation are completed when counter reaches zero. */
     win_ptr->at_completion_counter--;
-    MPIU_Assert(win_ptr->at_completion_counter >= 0);
+    MPIR_Assert(win_ptr->at_completion_counter >= 0);
 
     mpi_errno = MPID_Request_complete(rreq);
     if (mpi_errno != MPI_SUCCESS) {
@@ -301,7 +301,7 @@ int MPIDI_CH3_ReqHandler_FOPSendComplete(MPIDI_VC_t * vc, MPIR_Request * rreq, i
     *complete = TRUE;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPSENDCOMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQHANDLER_FOPSENDCOMPLETE);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpid/ch3/src/ch3u_port.c b/src/mpid/ch3/src/ch3u_port.c
index 7887f69..ac38034 100644
--- a/src/mpid/ch3/src/ch3u_port.c
+++ b/src/mpid/ch3/src/ch3u_port.c
@@ -121,9 +121,9 @@ static int MPIDI_Create_inter_root_communicator_connect(const char *port_name,
     MPIR_Comm *tmp_comm;
     MPIDI_VC_t *connect_vc = NULL;
     int port_name_tag;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_CONNECT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_CONNECT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_CONNECT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_CONNECT);
 
     /* Connect to the root on the other side. Create a
        temporary intercommunicator between the two roots so that
@@ -149,7 +149,7 @@ static int MPIDI_Create_inter_root_communicator_connect(const char *port_name,
     *vc_pptr = connect_vc;
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_CONNECT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_CONNECT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -171,9 +171,9 @@ static int MPIDI_Create_inter_root_communicator_accept(const char *port_name,
     MPIDI_VC_t *new_vc = NULL;
     MPID_Progress_state progress_state;
     int port_name_tag;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_ACCEPT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_ACCEPT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_ACCEPT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_ACCEPT);
 
     /* extract the tag from the port_name */
     mpi_errno = MPIDI_GetTagFromPort( port_name, &port_name_tag);
@@ -220,7 +220,7 @@ static int MPIDI_Create_inter_root_communicator_accept(const char *port_name,
 		  "new_vc=%p", new_vc));
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_ACCEPT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_ACCEPT);
     return mpi_errno;
 
 fn_fail:
@@ -239,9 +239,9 @@ static int MPIDI_CH3I_Initialize_tmp_comm(MPIR_Comm **comm_pptr,
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *tmp_comm, *commself_ptr;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_INITIALIZE_TMP_COMM);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_INITIALIZE_TMP_COMM);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_INITIALIZE_TMP_COMM);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_INITIALIZE_TMP_COMM);
 
     MPIR_Comm_get_ptr( MPI_COMM_SELF, commself_ptr );
 
@@ -264,8 +264,8 @@ static int MPIDI_CH3I_Initialize_tmp_comm(MPIR_Comm **comm_pptr,
 
     /* sanity: the INVALID context ID value could potentially conflict with the
      * dynamic proccess space */
-    MPIU_Assert(tmp_comm->context_id     != MPIU_INVALID_CONTEXT_ID);
-    MPIU_Assert(tmp_comm->recvcontext_id != MPIU_INVALID_CONTEXT_ID);
+    MPIR_Assert(tmp_comm->context_id     != MPIR_INVALID_CONTEXT_ID);
+    MPIR_Assert(tmp_comm->recvcontext_id != MPIR_INVALID_CONTEXT_ID);
 
     /* FIXME - we probably need a unique context_id. */
     tmp_comm->remote_size = 1;
@@ -307,7 +307,7 @@ static int MPIDI_CH3I_Initialize_tmp_comm(MPIR_Comm **comm_pptr,
     *comm_pptr = tmp_comm;
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_INITIALIZE_TMP_COMM);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_INITIALIZE_TMP_COMM);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -343,12 +343,12 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root,
     pg_translation *local_translation = NULL, *remote_translation = NULL;
     pg_node *pg_list = NULL;
     MPIDI_PG_t **remote_pg = NULL;
-    MPIU_Context_id_t recvcontext_id = MPIU_INVALID_CONTEXT_ID;
+    MPIR_Context_id_t recvcontext_id = MPIR_INVALID_CONTEXT_ID;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPIU_CHKLMEM_DECL(3);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_COMM_CONNECT);
+    MPIR_CHKLMEM_DECL(3);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_COMM_CONNECT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_COMM_CONNECT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_COMM_CONNECT);
 
     /* Get the context ID here because we need to send it to the remote side */
     mpi_errno = MPIR_Get_contextid_sparse( comm_ptr, &recvcontext_id, FALSE );
@@ -369,7 +369,7 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root,
 
 	/* Make an array to translate local ranks to process group index 
 	   and rank */
-	MPIU_CHKLMEM_MALLOC(local_translation,pg_translation*,
+	MPIR_CHKLMEM_MALLOC(local_translation,pg_translation*,
 			    local_comm_size*sizeof(pg_translation),
 			    mpi_errno,"local_translation");
 
@@ -416,10 +416,10 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root,
     remote_comm_size = recv_ints[1];
     context_id	     = recv_ints[2];
 
-    MPIU_CHKLMEM_MALLOC(remote_pg,MPIDI_PG_t**,
+    MPIR_CHKLMEM_MALLOC(remote_pg,MPIDI_PG_t**,
 			n_remote_pgs * sizeof(MPIDI_PG_t*),
 			mpi_errno,"remote_pg");
-    MPIU_CHKLMEM_MALLOC(remote_translation,pg_translation*,
+    MPIR_CHKLMEM_MALLOC(remote_translation,pg_translation*,
 			remote_comm_size * sizeof(pg_translation),
 			mpi_errno,"remote_translation");
     MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT,VERBOSE,"allocated remote process groups");
@@ -517,8 +517,8 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root,
 
  fn_exit: 
     MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT,VERBOSE,"Exiting ch3u_comm_connect");
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_COMM_CONNECT);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_COMM_CONNECT);
     return mpi_errno;
  fn_fail:
     {
@@ -528,7 +528,7 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root,
             if (mpi_errno2) MPIR_ERR_SET(mpi_errno2, MPI_ERR_OTHER, "**fail");
         }
 
-        if (recvcontext_id != MPIU_INVALID_CONTEXT_ID)
+        if (recvcontext_id != MPIR_INVALID_CONTEXT_ID)
             MPIR_Free_contextid(recvcontext_id);
         
         if (mpi_errno2) MPIR_ERR_ADD(mpi_errno, mpi_errno2);
@@ -540,7 +540,7 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root,
         int mpi_errno2 = MPI_SUCCESS;
 
        /* broadcast error notification to other processes */
-        MPIU_Assert(rank == root);
+        MPIR_Assert(rank == root);
         recv_ints[0] = -1;
         recv_ints[1] = -1;
         recv_ints[2] = -1;
@@ -581,10 +581,10 @@ static int ExtractLocalPGInfo( MPIR_Comm *comm_p,
 {
     pg_node        *pg_list = 0, *pg_iter, *pg_trailer;
     int            i, cur_index = 0, local_comm_size, mpi_errno = 0;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_EXTRACTLOCALPGINFO);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_EXTRACTLOCALPGINFO);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_EXTRACTLOCALPGINFO);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_EXTRACTLOCALPGINFO);
 
     /* If we are in the case of singleton-init, we may need to reset the
        id string for comm world.  We do this before doing anything else */
@@ -598,14 +598,14 @@ static int ExtractLocalPGInfo( MPIR_Comm *comm_p,
        group id, size and all its KVS values */
     
     cur_index = 0;
-    MPIU_CHKPMEM_MALLOC(pg_list,pg_node*,sizeof(pg_node),mpi_errno,
+    MPIR_CHKPMEM_MALLOC(pg_list,pg_node*,sizeof(pg_node),mpi_errno,
 			"pg_list");
     
     pg_list->pg_id = MPL_strdup(comm_p->dev.vcrt->vcr_table[0]->pg->id);
     pg_list->index = cur_index++;
     pg_list->next = NULL;
     /* XXX DJG FIXME-MT should we be checking this?  the add/release macros already check this */
-    MPIU_Assert( MPIU_Object_get_ref(comm_p->dev.vcrt->vcr_table[0]->pg));
+    MPIR_Assert( MPIR_Object_get_ref(comm_p->dev.vcrt->vcr_table[0]->pg));
     mpi_errno = MPIDI_PG_To_string(comm_p->dev.vcrt->vcr_table[0]->pg, &pg_list->str,
 				   &pg_list->lenStr );
     if (mpi_errno != MPI_SUCCESS) {
@@ -621,7 +621,7 @@ static int ExtractLocalPGInfo( MPIR_Comm *comm_p,
 	while (pg_iter != NULL) {
 	    /* Check to ensure pg is (probably) valid */
             /* XXX DJG FIXME-MT should we be checking this?  the add/release macros already check this */
-	    MPIU_Assert(MPIU_Object_get_ref(comm_p->dev.vcrt->vcr_table[i]->pg) != 0);
+	    MPIR_Assert(MPIR_Object_get_ref(comm_p->dev.vcrt->vcr_table[i]->pg) != 0);
 	    if (MPIDI_PG_Id_compare(comm_p->dev.vcrt->vcr_table[i]->pg->id, pg_iter->pg_id)) {
 		local_translation[i].pg_index = pg_iter->index;
 		local_translation[i].pg_rank  = comm_p->dev.vcrt->vcr_table[i]->pg_rank;
@@ -665,10 +665,10 @@ static int ExtractLocalPGInfo( MPIR_Comm *comm_p,
 
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_EXTRACTLOCALPGINFO);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_EXTRACTLOCALPGINFO);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -691,9 +691,9 @@ static int ReceivePGAndDistribute( MPIR_Comm *tmp_comm, MPIR_Comm *comm_ptr,
     int  mpi_errno = 0;
     int  recvtag = *recvtag_p;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPIDI_STATE_DECL(MPID_STATE_RECEIVEPGANDDISTRIBUTE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_RECEIVEPGANDDISTRIBUTE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_RECEIVEPGANDDISTRIBUTE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_RECEIVEPGANDDISTRIBUTE);
 
     for (i=0; i<n_remote_pgs; i++) {
 
@@ -746,7 +746,7 @@ static int ReceivePGAndDistribute( MPIR_Comm *tmp_comm, MPIR_Comm *comm_ptr,
 	MPL_free(pg_str);
     }
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_RECEIVEPGANDDISTRIBUTE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_RECEIVEPGANDDISTRIBUTE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -767,12 +767,12 @@ int MPID_PG_BCast( MPIR_Comm *peercomm_p, MPIR_Comm *comm_p, int root )
     pg_node *pg_list, *pg_next, *pg_head = 0;
     int rank, i, peer_comm_size;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     peer_comm_size = comm_p->local_size;
     rank            = comm_p->rank;
 
-    MPIU_CHKLMEM_MALLOC(local_translation,pg_translation*,
+    MPIR_CHKLMEM_MALLOC(local_translation,pg_translation*,
 			peer_comm_size*sizeof(pg_translation),
 			mpi_errno,"local_translation");
     
@@ -810,7 +810,7 @@ int MPID_PG_BCast( MPIR_Comm *peercomm_p, MPIR_Comm *comm_p, int root )
 	if (rank != root) {
 	    pg_str = (char *)MPL_malloc(len);
             if (!pg_str) {
-                MPIU_CHKMEM_SETERR(mpi_errno, len, "pg_str");
+                MPIR_CHKMEM_SETERR(mpi_errno, len, "pg_str");
                 goto fn_exit;
             }
 	}
@@ -852,7 +852,7 @@ int MPID_PG_BCast( MPIR_Comm *peercomm_p, MPIR_Comm *comm_p, int root )
     }
 
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -871,9 +871,9 @@ static int SendPGtoPeerAndFree( MPIR_Comm *tmp_comm, int *sendtag_p,
     int sendtag = *sendtag_p, i;
     pg_node *pg_iter;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPIDI_STATE_DECL(MPID_STATE_SENDPGTOPEERANDFREE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SENDPGTOPEERANDFREE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_SENDPGTOPEERANDFREE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_SENDPGTOPEERANDFREE);
 
     while (pg_list != NULL) {
 	pg_iter = pg_list;
@@ -900,7 +900,7 @@ static int SendPGtoPeerAndFree( MPIR_Comm *tmp_comm, int *sendtag_p,
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_SENDPGTOPEERANDFREE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_SENDPGTOPEERANDFREE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -940,10 +940,10 @@ int MPIDI_Comm_accept(const char *port_name, MPIR_Info *info, int root,
     pg_node *pg_list = NULL;
     MPIDI_PG_t **remote_pg = NULL;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPIU_CHKLMEM_DECL(3);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_COMM_ACCEPT);
+    MPIR_CHKLMEM_DECL(3);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_COMM_ACCEPT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_COMM_ACCEPT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_COMM_ACCEPT);
 
     /* Create the new intercommunicator here. We need to send the
        context id to the other side. */
@@ -971,7 +971,7 @@ int MPIDI_Comm_accept(const char *port_name, MPIR_Info *info, int root,
 
 	/* Make an array to translate local ranks to process group index and 
 	   rank */
-	MPIU_CHKLMEM_MALLOC(local_translation,pg_translation*,
+	MPIR_CHKLMEM_MALLOC(local_translation,pg_translation*,
 			    local_comm_size*sizeof(pg_translation),
 			    mpi_errno,"local_translation");
 
@@ -1009,10 +1009,10 @@ int MPIDI_Comm_accept(const char *port_name, MPIR_Info *info, int root,
     n_remote_pgs     = recv_ints[0];
     remote_comm_size = recv_ints[1];
     context_id       = recv_ints[2];
-    MPIU_CHKLMEM_MALLOC(remote_pg,MPIDI_PG_t**,
+    MPIR_CHKLMEM_MALLOC(remote_pg,MPIDI_PG_t**,
 			n_remote_pgs * sizeof(MPIDI_PG_t*),
 			mpi_errno,"remote_pg");
-    MPIU_CHKLMEM_MALLOC(remote_translation,pg_translation*,
+    MPIR_CHKLMEM_MALLOC(remote_translation,pg_translation*,
 			remote_comm_size * sizeof(pg_translation),
 			mpi_errno, "remote_translation");
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,TERSE,(MPL_DBG_FDEST,"[%d]accept:remote process groups: %d\nremote comm size: %d\n", rank, n_remote_pgs, remote_comm_size));
@@ -1111,8 +1111,8 @@ int MPIDI_Comm_accept(const char *port_name, MPIR_Info *info, int root,
     }
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_COMM_ACCEPT);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_COMM_ACCEPT);
     return mpi_errno;
 
 fn_fail:
@@ -1270,9 +1270,9 @@ int MPIDI_CH3I_Acceptq_enqueue(MPIDI_VC_t * vc, int port_name_tag )
 {
     int mpi_errno=MPI_SUCCESS;
     MPIDI_CH3I_Acceptq_t *q_item;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_ACCEPTQ_ENQUEUE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_ACCEPTQ_ENQUEUE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_ACCEPTQ_ENQUEUE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_ACCEPTQ_ENQUEUE);
 
     /* FIXME: Use CHKPMEM */
     q_item = (MPIDI_CH3I_Acceptq_t *)
@@ -1299,7 +1299,7 @@ int MPIDI_CH3I_Acceptq_enqueue(MPIDI_VC_t * vc, int port_name_tag )
     acceptq_head = q_item;
     
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_ACCEPTQ_ENQUEUE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_ACCEPTQ_ENQUEUE);
     return mpi_errno;
 }
 
@@ -1314,9 +1314,9 @@ int MPIDI_CH3I_Acceptq_dequeue(MPIDI_VC_t ** vc, int port_name_tag)
 {
     int mpi_errno=MPI_SUCCESS;
     MPIDI_CH3I_Acceptq_t *q_item, *prev;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_ACCEPTQ_DEQUEUE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_ACCEPTQ_DEQUEUE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_ACCEPTQ_DEQUEUE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_ACCEPTQ_DEQUEUE);
 
     *vc = NULL;
     q_item = acceptq_head;
@@ -1350,7 +1350,7 @@ int MPIDI_CH3I_Acceptq_dequeue(MPIDI_VC_t ** vc, int port_name_tag)
 	      (MPL_DBG_FDEST,"vc=%p:Dequeuing accept connection with tag %d",
 	       *vc,port_name_tag));
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_ACCEPTQ_DEQUEUE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_ACCEPTQ_DEQUEUE);
     return mpi_errno;
 }
 
diff --git a/src/mpid/ch3/src/ch3u_recvq.c b/src/mpid/ch3/src/ch3u_recvq.c
index fc28c7d..8eced2f 100644
--- a/src/mpid/ch3/src/ch3u_recvq.c
+++ b/src/mpid/ch3/src/ch3u_recvq.c
@@ -216,9 +216,9 @@ int MPIDI_CH3U_Recvq_FU(int source, int tag, int context_id, MPI_Status *s)
     MPIR_Request * rreq;
     int found = 0;
     MPIDI_Message_match match, mask;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FU);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FU);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FU);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FU);
 
     rreq = recvq_unexpected_head;
 
@@ -270,7 +270,7 @@ int MPIDI_CH3U_Recvq_FU(int source, int tag, int context_id, MPI_Status *s)
 	found = 1;
     }
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FU);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FU);
     return found;
 }
 
@@ -299,9 +299,9 @@ MPIR_Request * MPIDI_CH3U_Recvq_FDU(MPI_Request sreq_id,
     MPIR_Request * matching_prev_rreq;
     MPIR_Request * matching_cur_rreq;
     MPIDI_Message_match mask;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FDU);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FDU);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FDU);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FDU);
 
     matching_prev_rreq = NULL;
     matching_cur_rreq = NULL;
@@ -361,7 +361,7 @@ MPIR_Request * MPIDI_CH3U_Recvq_FDU(MPI_Request sreq_id,
 	rreq = NULL;
     }
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FDU);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FDU);
     return rreq;
 }
 
@@ -379,9 +379,9 @@ MPIR_Request * MPIDI_CH3U_Recvq_FDU_matchonly(int source, int tag, int context_i
     MPIR_Request *rreq, *prev_rreq;
     MPIDI_Message_match match;
     MPIDI_Message_match mask;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_MATCHONLY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_MATCHONLY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_MATCHONLY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_MATCHONLY);
 
     /* Store how much time is spent traversing the queue */
     MPIR_T_PVAR_TIMER_START(RECVQ, time_matching_unexpectedq);
@@ -468,7 +468,7 @@ lock_exit:
 
     *foundp = found;
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_MATCHONLY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_MATCHONLY);
     return rreq;
 }
 
@@ -499,9 +499,9 @@ MPIR_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag,
     MPIR_Request *rreq, *prev_rreq;
     MPIDI_Message_match match;
     MPIDI_Message_match mask;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP);
 
     /* Store how much time is spent traversing the queue */
     MPIR_T_PVAR_TIMER_START(RECVQ, time_matching_unexpectedq);
@@ -652,7 +652,7 @@ MPIR_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag,
     if (found)
         MPIR_T_PVAR_TIMER_END(RECVQ, time_matching_unexpectedq);
     
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP);
     return rreq;
 }
 
@@ -675,9 +675,9 @@ int MPIDI_CH3U_Recvq_DP(MPIR_Request * rreq)
     MPIR_Request * cur_rreq;
     MPIR_Request * prev_rreq;
     int dequeue_failed;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_DP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_DP);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_DP);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_DP);
 
     found = FALSE;
     prev_rreq = NULL;
@@ -715,7 +715,7 @@ int MPIDI_CH3U_Recvq_DP(MPIR_Request * rreq)
 
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_DP);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_DP);
     return found;
 }
 
@@ -750,9 +750,9 @@ MPIR_Request * MPIDI_CH3U_Recvq_FDP_or_AEU(MPIDI_Message_match * match,
     MPIR_Request * prev_rreq;
     int channel_matched;
     int error_bit_masked = 0, proc_failure_bit_masked = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FDP_OR_AEU);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FDP_OR_AEU);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FDP_OR_AEU);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FDP_OR_AEU);
 
     /* Unset the error bit if it is set on the incoming packet so we don't
      * have to mask it every time. It will get reset at the end of the loop or
@@ -815,7 +815,7 @@ MPIR_Request * MPIDI_CH3U_Recvq_FDP_or_AEU(MPIDI_Message_match * match,
                         comm_ptr->revoked && MPIR_TAG_MASK_ERROR_BITS(match->parts.tag) != MPIR_SHRINK_TAG) {
             *foundp = FALSE;
             MPIDI_Request_create_null_rreq( rreq, mpi_errno, found=FALSE;goto lock_exit );
-            MPIU_Assert(mpi_errno == MPI_SUCCESS);
+            MPIR_Assert(mpi_errno == MPI_SUCCESS);
 
             MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER, VERBOSE,
                 (MPL_DBG_FDEST, "RECEIVED MESSAGE FOR REVOKED COMM (tag=%d,src=%d,cid=%d)\n", MPIR_TAG_MASK_ERROR_BITS(match->parts.tag), match->parts.rank, comm_ptr->context_id));
@@ -829,7 +829,7 @@ MPIR_Request * MPIDI_CH3U_Recvq_FDP_or_AEU(MPIDI_Message_match * match,
         int mpi_errno ATTRIBUTE((unused)) = 0;
 	MPIDI_Request_create_rreq( rreq, mpi_errno, 
 				   found=FALSE;goto lock_exit );
-        MPIU_Assert(mpi_errno == 0);
+        MPIR_Assert(mpi_errno == 0);
         rreq->dev.recv_pending_count = 1;
         /* Reset the error bits if we unset it earlier. */
         if (error_bit_masked) MPIR_TAG_SET_ERROR_BIT(match->parts.tag);
@@ -856,7 +856,7 @@ MPIR_Request * MPIDI_CH3U_Recvq_FDP_or_AEU(MPIDI_Message_match * match,
 
     *foundp = found;
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FDP_OR_AEU);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FDP_OR_AEU);
     return rreq;
 }
 
@@ -919,9 +919,9 @@ int MPIDI_CH3U_Clean_recvq(MPIR_Comm *comm_ptr)
     MPIR_Request *rreq, *prev_rreq = NULL;
     MPIDI_Message_match match;
     MPIDI_Message_match mask;
-    MPIDI_STATE_DECL(MPIDI_CH3U_CLEAN_RECVQ);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPIDI_CH3U_CLEAN_RECVQ);
 
-    MPIDI_FUNC_ENTER(MPIDI_CH3U_CLEAN_RECVQ);
+    MPIR_FUNC_VERBOSE_ENTER(MPIDI_CH3U_CLEAN_RECVQ);
 
     MPIR_ERR_SETSIMPLE(error, MPIX_ERR_REVOKED, "**revoked");
 
@@ -1114,7 +1114,7 @@ int MPIDI_CH3U_Clean_recvq(MPIR_Comm *comm_ptr)
         rreq = rreq->dev.next;
     }
 
-    MPIDI_FUNC_EXIT(MPIDI_CH3U_CLEAN_RECVQ);
+    MPIR_FUNC_VERBOSE_EXIT(MPIDI_CH3U_CLEAN_RECVQ);
 
     return mpi_errno;
 }
@@ -1128,9 +1128,9 @@ int MPIDI_CH3U_Complete_posted_with_error(MPIDI_VC_t *vc)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *req, *prev_req;
     int error = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_COMPLETE_POSTED_WITH_ERROR);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_COMPLETE_POSTED_WITH_ERROR);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_COMPLETE_POSTED_WITH_ERROR);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_COMPLETE_POSTED_WITH_ERROR);
 
     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX);
 
@@ -1152,7 +1152,7 @@ int MPIDI_CH3U_Complete_posted_with_error(MPIDI_VC_t *vc)
  fn_exit:
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_COMPLETE_POSTED_WITH_ERROR);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_COMPLETE_POSTED_WITH_ERROR);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/src/ch3u_request.c b/src/mpid/ch3/src/ch3u_request.c
index 2d3090c..2fec8f3 100644
--- a/src/mpid/ch3/src/ch3u_request.c
+++ b/src/mpid/ch3/src/ch3u_request.c
@@ -29,9 +29,9 @@
 #define FCNAME MPL_QUOTE(FUNCNAME)
 void MPID_Request_init(MPIR_Request *req)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPID_REQUEST_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_REQUEST_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_REQUEST_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_REQUEST_INIT);
     
     req->dev.datatype_ptr	   = NULL;
     req->dev.segment_ptr	   = NULL;
@@ -88,23 +88,23 @@ int MPIDI_CH3U_Request_load_send_iov(MPIR_Request * const sreq,
 {
     MPI_Aint last;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV);
-    MPIU_Assert(sreq->dev.segment_ptr != NULL);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV);
+    MPIR_Assert(sreq->dev.segment_ptr != NULL);
     last = sreq->dev.segment_size;
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,(MPL_DBG_FDEST,
      "pre-pv: first=%" PRIdPTR ", last=%" PRIdPTR ", iov_n=%d",
 		      sreq->dev.segment_first, last, *iov_n));
-    MPIU_Assert(sreq->dev.segment_first < last);
-    MPIU_Assert(last > 0);
-    MPIU_Assert(*iov_n > 0 && *iov_n <= MPL_IOV_LIMIT);
+    MPIR_Assert(sreq->dev.segment_first < last);
+    MPIR_Assert(last > 0);
+    MPIR_Assert(*iov_n > 0 && *iov_n <= MPL_IOV_LIMIT);
     MPIDU_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, 
 			     &last, iov, iov_n);
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,(MPL_DBG_FDEST,
     "post-pv: first=%" PRIdPTR ", last=%" PRIdPTR ", iov_n=%d",
 		      sreq->dev.segment_first, last, *iov_n));
-    MPIU_Assert(*iov_n > 0 && *iov_n <= MPL_IOV_LIMIT);
+    MPIR_Assert(*iov_n > 0 && *iov_n <= MPL_IOV_LIMIT);
     
     if (last == sreq->dev.segment_size)
     {
@@ -143,7 +143,7 @@ int MPIDI_CH3U_Request_load_send_iov(MPIR_Request * const sreq,
 
 	iov_data_copied = 0;
 	for (i = 0; i < *iov_n; i++) {
-	    MPIU_Memcpy((char*) sreq->dev.tmpbuf + iov_data_copied, 
+	    MPIR_Memcpy((char*) sreq->dev.tmpbuf + iov_data_copied,
 		   iov[i].MPL_IOV_BUF, iov[i].MPL_IOV_LEN);
 	    iov_data_copied += iov[i].MPL_IOV_LEN;
 	}
@@ -177,7 +177,7 @@ int MPIDI_CH3U_Request_load_send_iov(MPIR_Request * const sreq,
     }
     
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV);
     return mpi_errno;
 }
 
@@ -200,9 +200,9 @@ int MPIDI_CH3U_Request_load_recv_iov(MPIR_Request * const rreq)
     MPI_Aint last;
     static intptr_t orig_segment_first = MPIDI_LOAD_RECV_IOV_ORIG_SEGMENT_FIRST_UNSET;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV);
 
     if (orig_segment_first == MPIDI_LOAD_RECV_IOV_ORIG_SEGMENT_FIRST_UNSET) {
         orig_segment_first = rreq->dev.segment_first;
@@ -228,7 +228,7 @@ int MPIDI_CH3U_Request_load_recv_iov(MPIR_Request * const rreq)
 	    
 	    data_sz = rreq->dev.segment_size - rreq->dev.segment_first - 
 		rreq->dev.tmpbuf_off;
-	    MPIU_Assert(data_sz > 0);
+	    MPIR_Assert(data_sz > 0);
 	    tmpbuf_sz = rreq->dev.tmpbuf_sz - rreq->dev.tmpbuf_off;
 	    if (data_sz > tmpbuf_sz)
 	    {
@@ -240,7 +240,7 @@ int MPIDI_CH3U_Request_load_recv_iov(MPIR_Request * const rreq)
 	    rreq->dev.iov[0].MPL_IOV_LEN = data_sz;
             rreq->dev.iov_offset = 0;
 	    rreq->dev.iov_count = 1;
-	    MPIU_Assert(rreq->dev.segment_first - orig_segment_first + data_sz +
+	    MPIR_Assert(rreq->dev.segment_first - orig_segment_first + data_sz +
 			rreq->dev.tmpbuf_off <= rreq->dev.recv_data_sz);
 	    if (rreq->dev.segment_first - orig_segment_first + data_sz + rreq->dev.tmpbuf_off ==
 		rreq->dev.recv_data_sz)
@@ -265,15 +265,15 @@ int MPIDI_CH3U_Request_load_recv_iov(MPIR_Request * const rreq)
 	MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,(MPL_DBG_FDEST,
    "pre-upv: first=%" PRIdPTR ", last=%" PRIdPTR ", iov_n=%d",
 			  rreq->dev.segment_first, last, rreq->dev.iov_count));
-	MPIU_Assert(rreq->dev.segment_first < last);
-	MPIU_Assert(last > 0);
+	MPIR_Assert(rreq->dev.segment_first < last);
+	MPIR_Assert(last > 0);
 	MPIDU_Segment_unpack_vector(rreq->dev.segment_ptr, 
 				   rreq->dev.segment_first,
 				   &last, &rreq->dev.iov[0], &rreq->dev.iov_count);
 	MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,(MPL_DBG_FDEST,
    "post-upv: first=%" PRIdPTR ", last=%" PRIdPTR ", iov_n=%d, iov_offset=%lld",
 			  rreq->dev.segment_first, last, rreq->dev.iov_count, (long long)rreq->dev.iov_offset));
-	MPIU_Assert(rreq->dev.iov_count >= 0 && rreq->dev.iov_count <= 
+	MPIR_Assert(rreq->dev.iov_count >= 0 && rreq->dev.iov_count <=
 		    MPL_IOV_LIMIT);
 
 	/* --BEGIN ERROR HANDLING-- */
@@ -293,7 +293,7 @@ int MPIDI_CH3U_Request_load_recv_iov(MPIR_Request * const rreq)
 	}
         else
         {
-            MPIU_Assert(rreq->dev.iov_offset < rreq->dev.iov_count);
+            MPIR_Assert(rreq->dev.iov_offset < rreq->dev.iov_count);
         }
 	/* --END ERROR HANDLING-- */
 
@@ -320,7 +320,7 @@ int MPIDI_CH3U_Request_load_recv_iov(MPIR_Request * const rreq)
 	    /* Too little data would have been received using an IOV.  
 	       We will start receiving data into a SRBuf and unpacking it
 	       later. */
-	    MPIU_Assert(MPIDI_Request_get_srbuf_flag(rreq) == FALSE);
+	    MPIR_Assert(MPIDI_Request_get_srbuf_flag(rreq) == FALSE);
 	    
 	    MPIDI_CH3U_SRBuf_alloc(rreq, 
 			    rreq->dev.segment_size - rreq->dev.segment_first);
@@ -372,7 +372,7 @@ int MPIDI_CH3U_Request_load_recv_iov(MPIR_Request * const rreq)
 	    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
 	    "updating rreq to read overflow data into the SRBuf and complete");
 	    rreq->dev.iov[0].MPL_IOV_LEN = data_sz;
-	    MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_RECV);
+	    MPIR_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_RECV);
 	    /* Eventually, use OnFinal for this instead */
 	    rreq->dev.OnDataAvail = rreq->dev.OnFinal;
             orig_segment_first = MPIDI_LOAD_RECV_IOV_ORIG_SEGMENT_FIRST_UNSET;
@@ -391,7 +391,7 @@ int MPIDI_CH3U_Request_load_recv_iov(MPIR_Request * const rreq)
     }
     
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV);
     return mpi_errno;
 }
 
@@ -409,9 +409,9 @@ int MPIDI_CH3U_Request_unpack_srbuf(MPIR_Request * rreq)
     MPI_Aint last;
     int tmpbuf_last;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF);
     
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF);
 
     tmpbuf_last = (int)(rreq->dev.segment_first + rreq->dev.tmpbuf_sz);
     if (rreq->dev.segment_size < tmpbuf_last)
@@ -468,7 +468,7 @@ int MPIDI_CH3U_Request_unpack_srbuf(MPIR_Request * rreq)
 	rreq->dev.segment_first = last;
     }
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF);
     return mpi_errno;
 }
 
@@ -489,10 +489,10 @@ int MPIDI_CH3U_Request_unpack_uebuf(MPIR_Request * rreq)
     MPIDU_Datatype * dt_ptr;
     intptr_t unpack_sz;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF);
-    MPIDI_STATE_DECL(MPID_STATE_MEMCPY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MEMCPY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF);
 
     MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, 
 			    dt_contig, userbuf_sz, dt_ptr, dt_true_lb);
@@ -525,10 +525,10 @@ int MPIDI_CH3U_Request_unpack_uebuf(MPIR_Request * rreq)
 	       In other words, if we were to use Segment_unpack()
 	       would last = unpack?  If not we should return an error 
 	       (unless configured with --enable-fast) */
-	    MPIDI_FUNC_ENTER(MPID_STATE_MEMCPY);
-	    MPIU_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, rreq->dev.tmpbuf,
+	    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MEMCPY);
+	    MPIR_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, rreq->dev.tmpbuf,
 		   unpack_sz);
-	    MPIDI_FUNC_EXIT(MPID_STATE_MEMCPY);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MEMCPY);
 	}
 	else
 	{
@@ -554,7 +554,7 @@ int MPIDI_CH3U_Request_unpack_uebuf(MPIR_Request * rreq)
 	}
     }
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF);
     return mpi_errno;
 }
 
@@ -564,7 +564,7 @@ int MPID_Request_complete(MPIR_Request *req)
     int mpi_errno = MPI_SUCCESS;
     static int called_cnt = 0;
 
-    MPIU_Assert(called_cnt <= REQUEST_CB_DEPTH);
+    MPIR_Assert(called_cnt <= REQUEST_CB_DEPTH);
     called_cnt++;
 
     MPIDI_CH3U_Request_decrement_cc(req, &incomplete);
diff --git a/src/mpid/ch3/src/ch3u_rma_ops.c b/src/mpid/ch3/src/ch3u_rma_ops.c
index 6d20ba2..028a4ea 100644
--- a/src/mpid/ch3/src/ch3u_rma_ops.c
+++ b/src/mpid/ch3/src/ch3u_rma_ops.c
@@ -50,9 +50,9 @@ int MPIDI_CH3I_Put(const void *origin_addr, int origin_count, MPI_Datatype
     intptr_t data_sz;
     MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
     int made_progress = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PUT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PUT);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PUT);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_PUT);
 
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
@@ -202,7 +202,7 @@ int MPIDI_CH3I_Put(const void *origin_addr, int origin_count, MPI_Datatype
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PUT);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_PUT);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -227,9 +227,9 @@ int MPIDI_CH3I_Get(void *origin_addr, int origin_count, MPI_Datatype
     MPIDU_Datatype*dtp;
     MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
     int made_progress = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_GET);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_GET);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_GET);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_GET);
 
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
@@ -318,7 +318,7 @@ int MPIDI_CH3I_Get(void *origin_addr, int origin_count, MPI_Datatype
         MPIDU_Datatype_is_contig(target_datatype, &is_target_contig);
 
         MPIDU_Datatype_get_size_macro(target_datatype, target_type_size);
-        MPIU_Assign_trunc(target_data_sz, target_count * target_type_size, intptr_t);
+        MPIR_Assign_trunc(target_data_sz, target_count * target_type_size, intptr_t);
 
         /* Judge if we can use IMMED data response packet */
         if (MPIR_DATATYPE_IS_PREDEFINED(origin_datatype) &&
@@ -371,7 +371,7 @@ int MPIDI_CH3I_Get(void *origin_addr, int origin_count, MPI_Datatype
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_GET);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_GET);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -397,9 +397,9 @@ int MPIDI_CH3I_Accumulate(const void *origin_addr, int origin_count, MPI_Datatyp
     MPIDU_Datatype*dtp;
     MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
     int made_progress = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_ACCUMULATE);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_ACCUMULATE);
 
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
@@ -491,19 +491,19 @@ int MPIDI_CH3I_Accumulate(const void *origin_addr, int origin_count, MPI_Datatyp
             MPIDU_Datatype_get_extent_macro(origin_datatype, predefined_dtp_extent);
         }
         else {
-            MPIU_Assert(origin_dtp->basic_type != MPI_DATATYPE_NULL);
+            MPIR_Assert(origin_dtp->basic_type != MPI_DATATYPE_NULL);
             MPIDU_Datatype_get_size_macro(origin_dtp->basic_type, predefined_dtp_size);
             predefined_dtp_count = data_sz / predefined_dtp_size;
             MPIDU_Datatype_get_extent_macro(origin_dtp->basic_type, predefined_dtp_extent);
         }
-        MPIU_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 &&
+        MPIR_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 &&
                     predefined_dtp_extent > 0);
 
         /* Calculate number of predefined elements in each stream unit, and
          * total number of stream units. */
         stream_elem_count = MPIDI_CH3U_Acc_stream_size / predefined_dtp_extent;
         stream_unit_count = (predefined_dtp_count - 1) / stream_elem_count + 1;
-        MPIU_Assert(stream_elem_count > 0 && stream_unit_count > 0);
+        MPIR_Assert(stream_elem_count > 0 && stream_unit_count > 0);
 
         for (i = 0; i < stream_unit_count; i++) {
             if (origin_dtp != NULL) {
@@ -582,7 +582,7 @@ int MPIDI_CH3I_Accumulate(const void *origin_addr, int origin_count, MPI_Datatyp
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_ACCUMULATE);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -610,9 +610,9 @@ int MPIDI_CH3I_Get_accumulate(const void *origin_addr, int origin_count,
     MPIDU_Datatype*dtp;
     MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
     int made_progress = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_GET_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_GET_ACCUMULATE);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_GET_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_GET_ACCUMULATE);
 
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
@@ -715,7 +715,7 @@ int MPIDI_CH3I_Get_accumulate(const void *origin_addr, int origin_count,
 
         if (is_empty_origin == FALSE) {
             MPIDU_Datatype_get_size_macro(origin_datatype, origin_type_size);
-            MPIU_Assign_trunc(orig_data_sz, origin_count * origin_type_size, intptr_t);
+            MPIR_Assign_trunc(orig_data_sz, origin_count * origin_type_size, intptr_t);
         }
         else {
             /* If origin buffer is empty, set origin data size to 0 */
@@ -731,19 +731,19 @@ int MPIDI_CH3I_Get_accumulate(const void *origin_addr, int origin_count,
             MPIDU_Datatype_get_extent_macro(target_datatype, predefined_dtp_extent);
         }
         else {
-            MPIU_Assert(target_dtp->basic_type != MPI_DATATYPE_NULL);
+            MPIR_Assert(target_dtp->basic_type != MPI_DATATYPE_NULL);
             MPIDU_Datatype_get_size_macro(target_dtp->basic_type, predefined_dtp_size);
             predefined_dtp_count = target_data_sz / predefined_dtp_size;
             MPIDU_Datatype_get_extent_macro(target_dtp->basic_type, predefined_dtp_extent);
         }
-        MPIU_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 &&
+        MPIR_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 &&
                     predefined_dtp_extent > 0);
 
         /* Calculate number of predefined elements in each stream unit, and
          * total number of stream units. */
         stream_elem_count = MPIDI_CH3U_Acc_stream_size / predefined_dtp_extent;
         stream_unit_count = (predefined_dtp_count - 1) / stream_elem_count + 1;
-        MPIU_Assert(stream_elem_count > 0 && stream_unit_count > 0);
+        MPIR_Assert(stream_elem_count > 0 && stream_unit_count > 0);
 
         for (i = 0; i < stream_unit_count; i++) {
             if (origin_dtp != NULL) {
@@ -834,7 +834,7 @@ int MPIDI_CH3I_Get_accumulate(const void *origin_addr, int origin_count,
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_GET_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_GET_ACCUMULATE);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -854,15 +854,15 @@ int MPID_Put(const void *origin_addr, int origin_count, MPI_Datatype
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_PUT);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_PUT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_PUT);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_PUT);
 
     mpi_errno = MPIDI_CH3I_Put(origin_addr, origin_count, origin_datatype,
                                target_rank, target_disp, target_count, target_datatype,
                                win_ptr, NULL);
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_PUT);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_PUT);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -881,15 +881,15 @@ int MPID_Get(void *origin_addr, int origin_count, MPI_Datatype
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_GET);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_GET);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_GET);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_GET);
 
     mpi_errno = MPIDI_CH3I_Get(origin_addr, origin_count, origin_datatype,
                                target_rank, target_disp, target_count, target_datatype,
                                win_ptr, NULL);
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_GET);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_GET);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -908,15 +908,15 @@ int MPID_Accumulate(const void *origin_addr, int origin_count, MPI_Datatype
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_ACCUMULATE);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_ACCUMULATE);
 
     mpi_errno = MPIDI_CH3I_Accumulate(origin_addr, origin_count, origin_datatype,
                                       target_rank, target_disp, target_count, target_datatype,
                                       op, win_ptr, NULL);
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_ACCUMULATE);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -937,8 +937,8 @@ int MPID_Get_accumulate(const void *origin_addr, int origin_count,
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_GET_ACCUMULATE);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_GET_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_GET_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_GET_ACCUMULATE);
 
     mpi_errno = MPIDI_CH3I_Get_accumulate(origin_addr, origin_count, origin_datatype,
                                           result_addr, result_count, result_datatype,
@@ -946,7 +946,7 @@ int MPID_Get_accumulate(const void *origin_addr, int origin_count,
                                           target_datatype, op, win_ptr, NULL);
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_GET_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_GET_ACCUMULATE);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -969,9 +969,9 @@ int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
     MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
     int made_progress = 0;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_COMPARE_AND_SWAP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_COMPARE_AND_SWAP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_COMPARE_AND_SWAP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_COMPARE_AND_SWAP);
 
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
@@ -1048,7 +1048,7 @@ int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
         /* REQUIRE: All datatype arguments must be of the same, builtin
          * type and counts must be 1. */
         MPIDU_Datatype_get_size_macro(datatype, type_size);
-        MPIU_Assert(type_size <= sizeof(MPIDI_CH3_CAS_Immed_u));
+        MPIR_Assert(type_size <= sizeof(MPIDI_CH3_CAS_Immed_u));
 
         src = (void *) origin_addr, dest = (void *) (&(cas_pkt->origin_data));
         mpi_errno = immed_copy(src, dest, type_size);
@@ -1081,7 +1081,7 @@ int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_COMPARE_AND_SWAP);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_COMPARE_AND_SWAP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1103,9 +1103,9 @@ int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
     MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
     int made_progress = 0;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_FETCH_AND_OP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_FETCH_AND_OP);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_FETCH_AND_OP);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_FETCH_AND_OP);
 
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
@@ -1170,7 +1170,7 @@ int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
         /************** Setting packet struct areas in operation ****************/
 
         MPIDU_Datatype_get_size_macro(datatype, type_size);
-        MPIU_Assert(type_size <= sizeof(MPIDI_CH3_FOP_Immed_u));
+        MPIR_Assert(type_size <= sizeof(MPIDI_CH3_FOP_Immed_u));
 
         MPIDU_Datatype_is_contig(datatype, &is_contig);
 
@@ -1223,7 +1223,7 @@ int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_FETCH_AND_OP);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_FETCH_AND_OP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
diff --git a/src/mpid/ch3/src/ch3u_rma_pkthandler.c b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
index 3923ffa..099ad51 100644
--- a/src/mpid/ch3/src/ch3u_rma_pkthandler.c
+++ b/src/mpid/ch3/src/ch3u_rma_pkthandler.c
@@ -188,8 +188,8 @@ static int MPIDI_CH3_ExtPktHandler_Accumulate(MPIDI_CH3_Pkt_flags_t flags,
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_EXTPKTHANDLER_ACCUMULATE);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_EXTPKTHANDLER_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_EXTPKTHANDLER_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_EXTPKTHANDLER_ACCUMULATE);
 
     if ((flags & MPIDI_CH3_PKT_FLAG_RMA_STREAM) && is_derived_dt) {
         (*ext_hdr_sz) = sizeof(MPIDI_CH3_Ext_pkt_accum_stream_derived_t);
@@ -217,7 +217,7 @@ static int MPIDI_CH3_ExtPktHandler_Accumulate(MPIDI_CH3_Pkt_flags_t flags,
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_EXTPKTHANDLER_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_EXTPKTHANDLER_ACCUMULATE);
     return mpi_errno;
   fn_fail:
     if ((*ext_hdr_ptr) != NULL)
@@ -236,11 +236,11 @@ static int MPIDI_CH3_ExtPktHandler_GetAccumulate(MPIDI_CH3_Pkt_flags_t flags,
                                                  MPI_Aint * ext_hdr_sz)
 {
     /* Check if get_accum still reuses accum' extended packet header. */
-    MPIU_Assert(sizeof(MPIDI_CH3_Ext_pkt_accum_stream_derived_t) ==
+    MPIR_Assert(sizeof(MPIDI_CH3_Ext_pkt_accum_stream_derived_t) ==
                 sizeof(MPIDI_CH3_Ext_pkt_get_accum_stream_derived_t));
-    MPIU_Assert(sizeof(MPIDI_CH3_Ext_pkt_accum_derived_t) ==
+    MPIR_Assert(sizeof(MPIDI_CH3_Ext_pkt_accum_derived_t) ==
                 sizeof(MPIDI_CH3_Ext_pkt_get_accum_derived_t));
-    MPIU_Assert(sizeof(MPIDI_CH3_Ext_pkt_accum_stream_t) ==
+    MPIR_Assert(sizeof(MPIDI_CH3_Ext_pkt_accum_stream_t) ==
                 sizeof(MPIDI_CH3_Ext_pkt_get_accum_stream_t));
 
     return MPIDI_CH3_ExtPktHandler_Accumulate(flags, is_derived_dt, ext_hdr_ptr, ext_hdr_sz);
@@ -268,15 +268,15 @@ int MPIDI_CH3_PktHandler_Put(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIR_Win *win_ptr;
     int acquire_lock_fail = 0;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_PUT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_PUT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_PUT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_PUT);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "received put pkt");
 
     MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_put);
 
-    MPIU_Assert(put_pkt->target_win_handle != MPI_WIN_NULL);
+    MPIR_Assert(put_pkt->target_win_handle != MPI_WIN_NULL);
     MPIR_Win_get_ptr(put_pkt->target_win_handle, win_ptr);
 
     mpi_errno = check_piggyback_lock(win_ptr, vc, pkt, buflen, &acquire_lock_fail, &req);
@@ -292,12 +292,12 @@ int MPIDI_CH3_PktHandler_Put(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         MPI_Aint type_size;
 
         /* Immed packet type is used when target datatype is predefined datatype. */
-        MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(put_pkt->datatype));
+        MPIR_Assert(MPIR_DATATYPE_IS_PREDEFINED(put_pkt->datatype));
 
         MPIDU_Datatype_get_size_macro(put_pkt->datatype, type_size);
 
         /* copy data from packet header to target buffer */
-        MPIU_Memcpy(put_pkt->addr, put_pkt->info.data, put_pkt->count * type_size);
+        MPIR_Memcpy(put_pkt->addr, put_pkt->info.data, put_pkt->count * type_size);
 
         /* trigger final action */
         mpi_errno = finish_op_on_target(win_ptr, vc, FALSE /* has no response data */ ,
@@ -309,14 +309,14 @@ int MPIDI_CH3_PktHandler_Put(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         *rreqp = NULL;
     }
     else {
-        MPIU_Assert(pkt->type == MPIDI_CH3_PKT_PUT);
+        MPIR_Assert(pkt->type == MPIDI_CH3_PKT_PUT);
 
         /* get start location of data and length of data */
         data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
         data_buf = (char *) pkt + sizeof(MPIDI_CH3_Pkt_t);
 
         req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-        MPIU_Object_set_ref(req, 1);
+        MPIR_Object_set_ref(req, 1);
 
         req->dev.user_buf = put_pkt->addr;
         req->dev.user_count = put_pkt->count;
@@ -334,7 +334,7 @@ int MPIDI_CH3_PktHandler_Put(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
             MPIDU_Datatype_get_size_macro(put_pkt->datatype, type_size);
 
             req->dev.recv_data_sz = type_size * put_pkt->count;
-            MPIU_Assert(req->dev.recv_data_sz > 0);
+            MPIR_Assert(req->dev.recv_data_sz > 0);
 
             mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
             MPIR_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
@@ -380,8 +380,8 @@ int MPIDI_CH3_PktHandler_Put(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
              * channel copy it */
             if (data_len >= req->dev.ext_hdr_sz + put_pkt->info.dataloop_size) {
                 /* Copy extended header */
-                MPIU_Memcpy(req->dev.ext_hdr_ptr, data_buf, req->dev.ext_hdr_sz);
-                MPIU_Memcpy(req->dev.dataloop, data_buf + req->dev.ext_hdr_sz,
+                MPIR_Memcpy(req->dev.ext_hdr_ptr, data_buf, req->dev.ext_hdr_sz);
+                MPIR_Memcpy(req->dev.dataloop, data_buf + req->dev.ext_hdr_sz,
                             put_pkt->info.dataloop_size);
 
                 *buflen = sizeof(MPIDI_CH3_Pkt_t) + req->dev.ext_hdr_sz +
@@ -421,7 +421,7 @@ int MPIDI_CH3_PktHandler_Put(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_put);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_PUT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_PUT);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -444,15 +444,15 @@ int MPIDI_CH3_PktHandler_Get(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     int mpi_errno = MPI_SUCCESS;
     MPI_Aint type_size;
     int acquire_lock_fail = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "received get pkt");
 
     MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_get);
 
-    MPIU_Assert(get_pkt->target_win_handle != MPI_WIN_NULL);
+    MPIR_Assert(get_pkt->target_win_handle != MPI_WIN_NULL);
     MPIR_Win_get_ptr(get_pkt->target_win_handle, win_ptr);
 
     mpi_errno = check_piggyback_lock(win_ptr, vc, pkt, buflen, &acquire_lock_fail, &req);
@@ -477,7 +477,7 @@ int MPIDI_CH3_PktHandler_Get(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     win_ptr->at_completion_counter++;
 
     if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP) {
-        MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(get_pkt->datatype));
+        MPIR_Assert(MPIR_DATATYPE_IS_PREDEFINED(get_pkt->datatype));
     }
 
     if (MPIR_DATATYPE_IS_PREDEFINED(get_pkt->datatype)) {
@@ -515,7 +515,7 @@ int MPIDI_CH3_PktHandler_Get(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         MPIDU_Datatype_is_contig(get_pkt->datatype, &is_contig);
 
         if (get_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_IMMED_RESP) {
-            MPIU_Assign_trunc(len, get_pkt->count * type_size, size_t);
+            MPIR_Assign_trunc(len, get_pkt->count * type_size, size_t);
             void *src = (void *) (get_pkt->addr), *dest = (void *) (get_resp_pkt->info.data);
             mpi_errno = immed_copy(src, dest, len);
             if (mpi_errno != MPI_SUCCESS)
@@ -607,8 +607,8 @@ int MPIDI_CH3_PktHandler_Get(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
          * channel copy it */
         if (data_len >= req->dev.ext_hdr_sz + get_pkt->info.dataloop_size) {
             /* Copy extended header */
-            MPIU_Memcpy(req->dev.ext_hdr_ptr, data_buf, req->dev.ext_hdr_sz);
-            MPIU_Memcpy(req->dev.dataloop, data_buf + req->dev.ext_hdr_sz,
+            MPIR_Memcpy(req->dev.ext_hdr_ptr, data_buf, req->dev.ext_hdr_sz);
+            MPIR_Memcpy(req->dev.dataloop, data_buf + req->dev.ext_hdr_sz,
                         get_pkt->info.dataloop_size);
 
             *buflen = sizeof(MPIDI_CH3_Pkt_t) + req->dev.ext_hdr_sz + get_pkt->info.dataloop_size;
@@ -634,7 +634,7 @@ int MPIDI_CH3_PktHandler_Get(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     }
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_get);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -658,15 +658,15 @@ int MPIDI_CH3_PktHandler_Accumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     int mpi_errno = MPI_SUCCESS;
     MPI_Aint type_size;
     MPI_Aint stream_elem_count, total_len;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACCUMULATE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACCUMULATE);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "received accumulate pkt");
 
     MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_acc);
 
-    MPIU_Assert(accum_pkt->target_win_handle != MPI_WIN_NULL);
+    MPIR_Assert(accum_pkt->target_win_handle != MPI_WIN_NULL);
     MPIR_Win_get_ptr(accum_pkt->target_win_handle, win_ptr);
 
     mpi_errno = check_piggyback_lock(win_ptr, vc, pkt, buflen, &acquire_lock_fail, &req);
@@ -680,7 +680,7 @@ int MPIDI_CH3_PktHandler_Accumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
     if (pkt->type == MPIDI_CH3_PKT_ACCUMULATE_IMMED) {
         /* Immed packet type is used when target datatype is predefined datatype. */
-        MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(accum_pkt->datatype));
+        MPIR_Assert(MPIR_DATATYPE_IS_PREDEFINED(accum_pkt->datatype));
 
         if (win_ptr->shm_allocated == TRUE)
             MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
@@ -703,10 +703,10 @@ int MPIDI_CH3_PktHandler_Accumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         *rreqp = NULL;
     }
     else {
-        MPIU_Assert(pkt->type == MPIDI_CH3_PKT_ACCUMULATE);
+        MPIR_Assert(pkt->type == MPIDI_CH3_PKT_ACCUMULATE);
 
         req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-        MPIU_Object_set_ref(req, 1);
+        MPIR_Object_set_ref(req, 1);
         *rreqp = req;
 
         req->dev.user_count = accum_pkt->count;
@@ -750,7 +750,7 @@ int MPIDI_CH3_PktHandler_Accumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
                 MPIDU_Datatype_get_extent_macro(accum_pkt->datatype, extent);
 
-                MPIU_Assert(!MPIDI_Request_get_srbuf_flag(req));
+                MPIR_Assert(!MPIDI_Request_get_srbuf_flag(req));
                 /* allocate a SRBuf for receiving stream unit */
                 MPIDI_CH3U_SRBuf_alloc(req, MPIDI_CH3U_SRBuf_size);
                 /* --BEGIN ERROR HANDLING-- */
@@ -772,7 +772,7 @@ int MPIDI_CH3_PktHandler_Accumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
                 stream_elem_count = MPIDI_CH3U_SRBuf_size / extent;
 
                 req->dev.recv_data_sz = MPL_MIN(total_len, stream_elem_count * type_size);
-                MPIU_Assert(req->dev.recv_data_sz > 0);
+                MPIR_Assert(req->dev.recv_data_sz > 0);
 
                 mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
                 MPIR_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
@@ -807,8 +807,8 @@ int MPIDI_CH3_PktHandler_Accumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
             if (data_len >= req->dev.ext_hdr_sz + accum_pkt->info.dataloop_size) {
                 /* Copy extended header */
-                MPIU_Memcpy(req->dev.ext_hdr_ptr, data_buf, req->dev.ext_hdr_sz);
-                MPIU_Memcpy(req->dev.dataloop, data_buf + req->dev.ext_hdr_sz,
+                MPIR_Memcpy(req->dev.ext_hdr_ptr, data_buf, req->dev.ext_hdr_sz);
+                MPIR_Memcpy(req->dev.dataloop, data_buf + req->dev.ext_hdr_sz,
                             accum_pkt->info.dataloop_size);
 
                 *buflen = sizeof(MPIDI_CH3_Pkt_t) + req->dev.ext_hdr_sz +
@@ -845,7 +845,7 @@ int MPIDI_CH3_PktHandler_Accumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_acc);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACCUMULATE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -869,15 +869,15 @@ int MPIDI_CH3_PktHandler_GetAccumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     int acquire_lock_fail = 0;
     int mpi_errno = MPI_SUCCESS;
     MPI_Aint stream_elem_count, total_len;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_GETACCUMULATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_GETACCUMULATE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_GETACCUMULATE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_GETACCUMULATE);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "received accumulate pkt");
 
     MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_get_accum);
 
-    MPIU_Assert(get_accum_pkt->target_win_handle != MPI_WIN_NULL);
+    MPIR_Assert(get_accum_pkt->target_win_handle != MPI_WIN_NULL);
     MPIR_Win_get_ptr(get_accum_pkt->target_win_handle, win_ptr);
 
     mpi_errno = check_piggyback_lock(win_ptr, vc, pkt, buflen, &acquire_lock_fail, &req);
@@ -903,7 +903,7 @@ int MPIDI_CH3_PktHandler_GetAccumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         *rreqp = NULL;
 
         /* Immed packet type is used when target datatype is predefined datatype. */
-        MPIU_Assert(MPIR_DATATYPE_IS_PREDEFINED(get_accum_pkt->datatype));
+        MPIR_Assert(MPIR_DATATYPE_IS_PREDEFINED(get_accum_pkt->datatype));
 
         resp_req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
         resp_req->dev.target_win_handle = get_accum_pkt->target_win_handle;
@@ -920,7 +920,7 @@ int MPIDI_CH3_PktHandler_GetAccumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
         /* Calculate the length of reponse data, ensure that it fits into immed packet. */
         MPIDU_Datatype_get_size_macro(get_accum_pkt->datatype, type_size);
-        MPIU_Assign_trunc(len, get_accum_pkt->count * type_size, size_t);
+        MPIR_Assign_trunc(len, get_accum_pkt->count * type_size, size_t);
 
         MPIDI_Pkt_init(get_accum_resp_pkt, MPIDI_CH3_PKT_GET_ACCUM_RESP_IMMED);
         get_accum_resp_pkt->request_handle = get_accum_pkt->request_handle;
@@ -976,10 +976,10 @@ int MPIDI_CH3_PktHandler_GetAccumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     else {
         int is_derived_dt = 0;
 
-        MPIU_Assert(pkt->type == MPIDI_CH3_PKT_GET_ACCUM);
+        MPIR_Assert(pkt->type == MPIDI_CH3_PKT_GET_ACCUM);
 
         req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-        MPIU_Object_set_ref(req, 1);
+        MPIR_Object_set_ref(req, 1);
         *rreqp = req;
 
         req->dev.user_count = get_accum_pkt->count;
@@ -1037,7 +1037,7 @@ int MPIDI_CH3_PktHandler_GetAccumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
                 else {
                     MPIDU_Datatype_get_extent_macro(get_accum_pkt->datatype, extent);
 
-                    MPIU_Assert(!MPIDI_Request_get_srbuf_flag(req));
+                    MPIR_Assert(!MPIDI_Request_get_srbuf_flag(req));
                     /* allocate a SRBuf for receiving stream unit */
                     MPIDI_CH3U_SRBuf_alloc(req, MPIDI_CH3U_SRBuf_size);
                     /* --BEGIN ERROR HANDLING-- */
@@ -1058,7 +1058,7 @@ int MPIDI_CH3_PktHandler_GetAccumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
                     stream_elem_count = MPIDI_CH3U_SRBuf_size / extent;
 
                     req->dev.recv_data_sz = MPL_MIN(total_len, stream_elem_count * type_size);
-                    MPIU_Assert(req->dev.recv_data_sz > 0);
+                    MPIR_Assert(req->dev.recv_data_sz > 0);
 
                     mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
                     MPIR_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
@@ -1094,8 +1094,8 @@ int MPIDI_CH3_PktHandler_GetAccumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
             if (data_len >= req->dev.ext_hdr_sz + get_accum_pkt->info.dataloop_size) {
                 /* Copy extended header */
-                MPIU_Memcpy(req->dev.ext_hdr_ptr, data_buf, req->dev.ext_hdr_sz);
-                MPIU_Memcpy(req->dev.dataloop, data_buf + req->dev.ext_hdr_sz,
+                MPIR_Memcpy(req->dev.ext_hdr_ptr, data_buf, req->dev.ext_hdr_sz);
+                MPIR_Memcpy(req->dev.dataloop, data_buf + req->dev.ext_hdr_sz,
                             get_accum_pkt->info.dataloop_size);
 
                 *buflen = sizeof(MPIDI_CH3_Pkt_t) + req->dev.ext_hdr_sz +
@@ -1132,7 +1132,7 @@ int MPIDI_CH3_PktHandler_GetAccumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_get_accum);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_GETACCUMULATE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_GETACCUMULATE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -1156,21 +1156,21 @@ int MPIDI_CH3_PktHandler_CAS(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIR_Request *rreq = NULL;
     MPI_Aint len;
     int acquire_lock_fail = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_CAS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_CAS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_CAS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_CAS);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "received CAS pkt");
 
     MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_cas);
 
-    MPIU_Assert(cas_pkt->target_win_handle != MPI_WIN_NULL);
+    MPIR_Assert(cas_pkt->target_win_handle != MPI_WIN_NULL);
     MPIR_Win_get_ptr(cas_pkt->target_win_handle, win_ptr);
 
     mpi_errno = check_piggyback_lock(win_ptr, vc, pkt, buflen, &acquire_lock_fail, &rreq);
     if (mpi_errno != MPI_SUCCESS)
         MPIR_ERR_POP(mpi_errno);
-    MPIU_Assert(rreq == NULL);  /* CAS should not have request because all data
+    MPIR_Assert(rreq == NULL);  /* CAS should not have request because all data
                                  * can fit in packet header */
 
     if (acquire_lock_fail) {
@@ -1196,16 +1196,16 @@ int MPIDI_CH3_PktHandler_CAS(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
     /* Copy old value into the response packet */
     MPIDU_Datatype_get_size_macro(cas_pkt->datatype, len);
-    MPIU_Assert(len <= sizeof(MPIDI_CH3_CAS_Immed_u));
+    MPIR_Assert(len <= sizeof(MPIDI_CH3_CAS_Immed_u));
 
     if (win_ptr->shm_allocated == TRUE)
         MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
 
-    MPIU_Memcpy((void *) &cas_resp_pkt->info.data, cas_pkt->addr, len);
+    MPIR_Memcpy((void *) &cas_resp_pkt->info.data, cas_pkt->addr, len);
 
     /* Compare and replace if equal */
     if (MPIR_Compare_equal(&cas_pkt->compare_data, cas_pkt->addr, cas_pkt->datatype)) {
-        MPIU_Memcpy(cas_pkt->addr, &cas_pkt->origin_data, len);
+        MPIR_Memcpy(cas_pkt->addr, &cas_pkt->origin_data, len);
     }
 
     if (win_ptr->shm_allocated == TRUE)
@@ -1244,7 +1244,7 @@ int MPIDI_CH3_PktHandler_CAS(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_cas);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_CAS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_CAS);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -1266,9 +1266,9 @@ int MPIDI_CH3_PktHandler_CASResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
     MPI_Aint len;
     MPIR_Win *win_ptr;
     int target_rank = cas_resp_pkt->target_rank;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_CASRESP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_CASRESP);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_CASRESP);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_CASRESP);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "received CAS response pkt");
 
@@ -1295,7 +1295,7 @@ int MPIDI_CH3_PktHandler_CASResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 
     MPIDU_Datatype_get_size_macro(req->dev.datatype, len);
 
-    MPIU_Memcpy(req->dev.user_buf, (void *) &cas_resp_pkt->info.data, len);
+    MPIR_Memcpy(req->dev.user_buf, (void *) &cas_resp_pkt->info.data, len);
 
     mpi_errno = MPID_Request_complete(req);
     if (mpi_errno != MPI_SUCCESS) {
@@ -1307,7 +1307,7 @@ int MPIDI_CH3_PktHandler_CASResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_cas_resp);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_CASRESP);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_CASRESP);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -1330,9 +1330,9 @@ int MPIDI_CH3_PktHandler_FOP(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     int acquire_lock_fail = 0;
     MPIR_Win *win_ptr = NULL;
     MPI_Aint type_size;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOP);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOP);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOP);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "received FOP pkt");
 
@@ -1423,7 +1423,7 @@ int MPIDI_CH3_PktHandler_FOP(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
             MPIR_ERR_POP(mpi_errno);
     }
     else {
-        MPIU_Assert(pkt->type == MPIDI_CH3_PKT_FOP);
+        MPIR_Assert(pkt->type == MPIDI_CH3_PKT_FOP);
 
         MPIR_Request *req = NULL;
         char *data_buf = NULL;
@@ -1437,7 +1437,7 @@ int MPIDI_CH3_PktHandler_FOP(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
             is_empty_origin = TRUE;
 
         req = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
-        MPIU_Object_set_ref(req, 1);
+        MPIR_Object_set_ref(req, 1);
         MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_FOP_RECV);
         *rreqp = req;
 
@@ -1470,7 +1470,7 @@ int MPIDI_CH3_PktHandler_FOP(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
             }
 
             req->dev.recv_data_sz = type_size;
-            MPIU_Assert(req->dev.recv_data_sz > 0);
+            MPIR_Assert(req->dev.recv_data_sz > 0);
 
             mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
             MPIR_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
@@ -1493,7 +1493,7 @@ int MPIDI_CH3_PktHandler_FOP(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_fop);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOP);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1519,9 +1519,9 @@ int MPIDI_CH3_PktHandler_FOPResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
     char *data_buf = NULL;
     int complete = 0;
     int target_rank = fop_resp_pkt->target_rank;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOPRESP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOPRESP);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOPRESP);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOPRESP);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "received FOP response pkt");
 
@@ -1556,14 +1556,14 @@ int MPIDI_CH3_PktHandler_FOPResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
     *rreqp = req;
 
     if (fop_resp_pkt->type == MPIDI_CH3_PKT_FOP_RESP_IMMED) {
-        MPIU_Memcpy(req->dev.user_buf, fop_resp_pkt->info.data, req->dev.recv_data_sz);
+        MPIR_Memcpy(req->dev.user_buf, fop_resp_pkt->info.data, req->dev.recv_data_sz);
 
         /* return the number of bytes processed in this function */
         *buflen = sizeof(MPIDI_CH3_Pkt_t);
         complete = 1;
     }
     else {
-        MPIU_Assert(fop_resp_pkt->type == MPIDI_CH3_PKT_FOP_RESP);
+        MPIR_Assert(fop_resp_pkt->type == MPIDI_CH3_PKT_FOP_RESP);
 
         mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
         MPIR_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
@@ -1583,7 +1583,7 @@ int MPIDI_CH3_PktHandler_FOPResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_fop_resp);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOPRESP);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_FOPRESP);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1608,9 +1608,9 @@ int MPIDI_CH3_PktHandler_Get_AccumResp(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPI_Aint type_size;
     MPIR_Win *win_ptr;
     int target_rank = get_accum_resp_pkt->target_rank;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET_ACCUM_RESP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET_ACCUM_RESP);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET_ACCUM_RESP);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET_ACCUM_RESP);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "received Get-Accumulate response pkt");
 
@@ -1644,13 +1644,13 @@ int MPIDI_CH3_PktHandler_Get_AccumResp(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     if (get_accum_resp_pkt->type == MPIDI_CH3_PKT_GET_ACCUM_RESP_IMMED) {
         req->dev.recv_data_sz = type_size * req->dev.user_count;
 
-        MPIU_Memcpy(req->dev.user_buf, get_accum_resp_pkt->info.data, req->dev.recv_data_sz);
+        MPIR_Memcpy(req->dev.user_buf, get_accum_resp_pkt->info.data, req->dev.recv_data_sz);
         /* return the number of bytes processed in this function */
         *buflen = sizeof(MPIDI_CH3_Pkt_t);
         complete = 1;
     }
     else {
-        MPIU_Assert(pkt->type == MPIDI_CH3_PKT_GET_ACCUM_RESP);
+        MPIR_Assert(pkt->type == MPIDI_CH3_PKT_GET_ACCUM_RESP);
 
         MPI_Datatype basic_type;
         MPI_Aint basic_type_extent, basic_type_size;
@@ -1663,7 +1663,7 @@ int MPIDI_CH3_PktHandler_Get_AccumResp(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
             basic_type = req->dev.datatype;
         }
         else {
-            MPIU_Assert(req->dev.datatype_ptr != NULL);
+            MPIR_Assert(req->dev.datatype_ptr != NULL);
             basic_type = req->dev.datatype_ptr->basic_type;
         }
 
@@ -1723,7 +1723,7 @@ int MPIDI_CH3_PktHandler_Get_AccumResp(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_get_accum_resp);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET_ACCUM_RESP);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_GET_ACCUM_RESP);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -1741,9 +1741,9 @@ int MPIDI_CH3_PktHandler_Lock(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIR_Win *win_ptr = NULL;
     int lock_type;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCK);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "received lock pkt");
 
@@ -1756,7 +1756,7 @@ int MPIDI_CH3_PktHandler_Lock(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     if (lock_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED)
         lock_type = MPI_LOCK_SHARED;
     else {
-        MPIU_Assert(lock_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE);
+        MPIR_Assert(lock_pkt->flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE);
         lock_type = MPI_LOCK_EXCLUSIVE;
     }
 
@@ -1774,13 +1774,13 @@ int MPIDI_CH3_PktHandler_Lock(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         mpi_errno = enqueue_lock_origin(win_ptr, vc, pkt, buflen, &req);
         if (mpi_errno != MPI_SUCCESS)
             MPIR_ERR_POP(mpi_errno);
-        MPIU_Assert(req == NULL);
+        MPIR_Assert(req == NULL);
     }
 
     *rreqp = NULL;
   fn_fail:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_lock);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCK);
     return mpi_errno;
 }
 
@@ -1802,9 +1802,9 @@ int MPIDI_CH3_PktHandler_GetResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
     MPI_Aint type_size;
     MPIR_Win *win_ptr;
     int target_rank = get_resp_pkt->target_rank;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_GETRESP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_GETRESP);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_GETRESP);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_GETRESP);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "received get response pkt");
 
@@ -1838,14 +1838,14 @@ int MPIDI_CH3_PktHandler_GetResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
     *rreqp = req;
 
     if (get_resp_pkt->type == MPIDI_CH3_PKT_GET_RESP_IMMED) {
-        MPIU_Memcpy(req->dev.user_buf, get_resp_pkt->info.data, req->dev.recv_data_sz);
+        MPIR_Memcpy(req->dev.user_buf, get_resp_pkt->info.data, req->dev.recv_data_sz);
 
         /* return the number of bytes processed in this function */
         *buflen = sizeof(MPIDI_CH3_Pkt_t);
         complete = 1;
     }
     else {
-        MPIU_Assert(get_resp_pkt->type == MPIDI_CH3_PKT_GET_RESP);
+        MPIR_Assert(get_resp_pkt->type == MPIDI_CH3_PKT_GET_RESP);
 
         mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len, &complete);
         MPIR_ERR_CHKANDJUMP1(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|postrecv",
@@ -1866,7 +1866,7 @@ int MPIDI_CH3_PktHandler_GetResp(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_get_resp);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_GETRESP);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_GETRESP);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -1883,9 +1883,9 @@ int MPIDI_CH3_PktHandler_LockAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIR_Win *win_ptr = NULL;
     int target_rank = lock_ack_pkt->target_rank;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKACK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKACK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKACK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKACK);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "received lock ack pkt");
 
@@ -1897,11 +1897,11 @@ int MPIDI_CH3_PktHandler_LockAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         MPIR_Win_get_ptr(lock_ack_pkt->source_win_handle, win_ptr);
     }
     else {
-        MPIU_Assert(lock_ack_pkt->request_handle != MPI_REQUEST_NULL);
+        MPIR_Assert(lock_ack_pkt->request_handle != MPI_REQUEST_NULL);
 
         MPIR_Request *req_ptr = NULL;
         MPIR_Request_get_ptr(lock_ack_pkt->request_handle, req_ptr);
-        MPIU_Assert(req_ptr->dev.source_win_handle != MPI_REQUEST_NULL);
+        MPIR_Assert(req_ptr->dev.source_win_handle != MPI_REQUEST_NULL);
         MPIR_Win_get_ptr(req_ptr->dev.source_win_handle, win_ptr);
     }
 
@@ -1913,7 +1913,7 @@ int MPIDI_CH3_PktHandler_LockAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIDI_CH3_Progress_signal_completion();
 
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_lock_ack);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKACK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKACK);
   fn_exit:
     return MPI_SUCCESS;
   fn_fail:
@@ -1932,9 +1932,9 @@ int MPIDI_CH3_PktHandler_LockOpAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     int target_rank = lock_op_ack_pkt->target_rank;
     MPIDI_CH3_Pkt_flags_t flags = lock_op_ack_pkt->flags;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKOPACK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKOPACK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKOPACK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKOPACK);
 
     *buflen = sizeof(MPIDI_CH3_Pkt_t);
 
@@ -1942,11 +1942,11 @@ int MPIDI_CH3_PktHandler_LockOpAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         MPIR_Win_get_ptr(lock_op_ack_pkt->source_win_handle, win_ptr);
     }
     else {
-        MPIU_Assert(lock_op_ack_pkt->request_handle != MPI_REQUEST_NULL);
+        MPIR_Assert(lock_op_ack_pkt->request_handle != MPI_REQUEST_NULL);
 
         MPIR_Request *req_ptr = NULL;
         MPIR_Request_get_ptr(lock_op_ack_pkt->request_handle, req_ptr);
-        MPIU_Assert(req_ptr->dev.source_win_handle != MPI_REQUEST_NULL);
+        MPIR_Assert(req_ptr->dev.source_win_handle != MPI_REQUEST_NULL);
         MPIR_Win_get_ptr(req_ptr->dev.source_win_handle, win_ptr);
     }
 
@@ -1959,7 +1959,7 @@ int MPIDI_CH3_PktHandler_LockOpAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
         MPIR_ERR_POP(mpi_errno);
 
     if (flags & MPIDI_CH3_PKT_FLAG_RMA_ACK) {
-        MPIU_Assert(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED);
+        MPIR_Assert(flags & MPIDI_CH3_PKT_FLAG_RMA_LOCK_GRANTED);
         mpi_errno = MPIDI_CH3I_RMA_Handle_ack(win_ptr, target_rank);
         if (mpi_errno != MPI_SUCCESS)
             MPIR_ERR_POP(mpi_errno);
@@ -1968,7 +1968,7 @@ int MPIDI_CH3_PktHandler_LockOpAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     *rreqp = NULL;
     MPIDI_CH3_Progress_signal_completion();
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKOPACK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_LOCKOPACK);
   fn_exit:
     return MPI_SUCCESS;
   fn_fail:
@@ -1986,9 +1986,9 @@ int MPIDI_CH3_PktHandler_Ack(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIR_Win *win_ptr = NULL;
     int mpi_errno = MPI_SUCCESS;
     int target_rank = ack_pkt->target_rank;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACK);
 
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "received shared lock ops done pkt");
 
@@ -2007,7 +2007,7 @@ int MPIDI_CH3_PktHandler_Ack(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     MPIDI_CH3_Progress_signal_completion();
 
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_ack);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACK);
   fn_exit:
     return MPI_SUCCESS;
   fn_fail:
@@ -2026,16 +2026,16 @@ int MPIDI_CH3_PktHandler_DecrAtCnt(MPIDI_VC_t * vc ATTRIBUTE((unused)),
     MPIDI_CH3_Pkt_decr_at_counter_t *decr_at_cnt_pkt = &pkt->decr_at_cnt;
     MPIR_Win *win_ptr;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_DECRATCNT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_DECRATCNT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_DECRATCNT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_DECRATCNT);
 
     MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_decr_at_cnt);
 
     MPIR_Win_get_ptr(decr_at_cnt_pkt->target_win_handle, win_ptr);
 
     win_ptr->at_completion_counter--;
-    MPIU_Assert(win_ptr->at_completion_counter >= 0);
+    MPIR_Assert(win_ptr->at_completion_counter >= 0);
 
     *buflen = sizeof(MPIDI_CH3_Pkt_t);
     *rreqp = NULL;
@@ -2050,7 +2050,7 @@ int MPIDI_CH3_PktHandler_DecrAtCnt(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_decr_at_cnt);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_DECRATCNT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_DECRATCNT);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -2068,9 +2068,9 @@ int MPIDI_CH3_PktHandler_Unlock(MPIDI_VC_t * vc ATTRIBUTE((unused)),
     int mpi_errno = MPI_SUCCESS;
     MPIDI_CH3_Pkt_unlock_t *unlock_pkt = &pkt->unlock;
     MPIR_Win *win_ptr = NULL;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_UNLOCK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_UNLOCK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_UNLOCK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_UNLOCK);
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "received unlock pkt");
 
     MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_unlock);
@@ -2092,7 +2092,7 @@ int MPIDI_CH3_PktHandler_Unlock(MPIDI_VC_t * vc ATTRIBUTE((unused)),
 
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_unlock);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_UNLOCK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_UNLOCK);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -2111,9 +2111,9 @@ int MPIDI_CH3_PktHandler_Flush(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
     int mpi_errno = MPI_SUCCESS;
     MPIDI_CH3_Pkt_flush_t *flush_pkt = &pkt->flush;
     MPIR_Win *win_ptr = NULL;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_FLUSH);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_FLUSH);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_FLUSH);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_FLUSH);
     MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "received flush pkt");
 
     MPIR_T_PVAR_TIMER_START(RMA, rma_rmapkt_flush);
@@ -2129,7 +2129,7 @@ int MPIDI_CH3_PktHandler_Flush(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
 
   fn_exit:
     MPIR_T_PVAR_TIMER_END(RMA, rma_rmapkt_flush);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_FLUSH);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_FLUSH);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
diff --git a/src/mpid/ch3/src/ch3u_rma_progress.c b/src/mpid/ch3/src/ch3u_rma_progress.c
index 3b6b2bc..1b561d0 100644
--- a/src/mpid/ch3/src/ch3u_rma_progress.c
+++ b/src/mpid/ch3/src/ch3u_rma_progress.c
@@ -137,9 +137,9 @@ static inline int check_and_switch_target_state(MPIR_Win * win_ptr, MPIDI_RMA_Ta
                 /* if we reach WIN_UNLOCK and there is still operation existing
                  * in pending list, this operation must be the only operation
                  * and it is prepared to piggyback LOCK and UNLOCK. */
-                MPIU_Assert(MPIR_CVAR_CH3_RMA_DELAY_ISSUING_FOR_PIGGYBACKING);
-                MPIU_Assert(target->pending_net_ops_list_head->next == NULL);
-                MPIU_Assert(target->pending_net_ops_list_head->piggyback_lock_candidate);
+                MPIR_Assert(MPIR_CVAR_CH3_RMA_DELAY_ISSUING_FOR_PIGGYBACKING);
+                MPIR_Assert(target->pending_net_ops_list_head->next == NULL);
+                MPIR_Assert(target->pending_net_ops_list_head->piggyback_lock_candidate);
             }
         }
         break;
@@ -277,11 +277,11 @@ static inline int issue_ops_target(MPIR_Win * win_ptr, MPIDI_RMA_Target_t * targ
         if (first_op) {
             /* piggyback on first OP. */
             if (target->access_state == MPIDI_RMA_LOCK_CALLED) {
-                MPIU_Assert(curr_op->piggyback_lock_candidate);
+                MPIR_Assert(curr_op->piggyback_lock_candidate);
                 if (target->lock_type == MPI_LOCK_SHARED)
                     flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_SHARED;
                 else {
-                    MPIU_Assert(target->lock_type == MPI_LOCK_EXCLUSIVE);
+                    MPIR_Assert(target->lock_type == MPI_LOCK_EXCLUSIVE);
                     flags |= MPIDI_CH3_PKT_FLAG_RMA_LOCK_EXCLUSIVE;
                 }
                 target->access_state = MPIDI_RMA_LOCK_ISSUED;
diff --git a/src/mpid/ch3/src/ch3u_rma_reqops.c b/src/mpid/ch3/src/ch3u_rma_reqops.c
index 920d66a..9398184 100644
--- a/src/mpid/ch3/src/ch3u_rma_reqops.c
+++ b/src/mpid/ch3/src/ch3u_rma_reqops.c
@@ -23,9 +23,9 @@ int MPID_Rput(const void *origin_addr, int origin_count,
     MPI_Aint dt_true_lb ATTRIBUTE((unused));
     intptr_t data_sz;
     MPIR_Request *ureq;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_RPUT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_RPUT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_RPUT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_RPUT);
 
     /* request-based RMA operations are only valid within a passive epoch */
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
@@ -42,7 +42,7 @@ int MPID_Rput(const void *origin_addr, int origin_count,
     ureq->kind = MPIR_REQUEST_KIND__RMA;
 
     /* This request is referenced by user and ch3 by default. */
-    MPIU_Object_set_ref(ureq, 2);
+    MPIR_Object_set_ref(ureq, 2);
 
     /* Enqueue or perform the RMA operation */
     if (target_rank != MPI_PROC_NULL && data_sz != 0) {
@@ -64,7 +64,7 @@ int MPID_Rput(const void *origin_addr, int origin_count,
     *request = ureq;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_RPUT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_RPUT);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -86,9 +86,9 @@ int MPID_Rget(void *origin_addr, int origin_count,
     MPI_Aint dt_true_lb ATTRIBUTE((unused));
     intptr_t data_sz;
     MPIR_Request *ureq;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_RGET);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_RGET);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_RGET);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_RGET);
 
     /* request-based RMA operations are only valid within a passive epoch */
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
@@ -105,7 +105,7 @@ int MPID_Rget(void *origin_addr, int origin_count,
     ureq->kind = MPIR_REQUEST_KIND__RMA;
 
     /* This request is referenced by user and ch3 by default. */
-    MPIU_Object_set_ref(ureq, 2);
+    MPIR_Object_set_ref(ureq, 2);
 
     /* Enqueue or perform the RMA operation */
     if (target_rank != MPI_PROC_NULL && data_sz != 0) {
@@ -127,7 +127,7 @@ int MPID_Rget(void *origin_addr, int origin_count,
     *request = ureq;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_RGET);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_RGET);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -149,9 +149,9 @@ int MPID_Raccumulate(const void *origin_addr, int origin_count,
     MPI_Aint dt_true_lb ATTRIBUTE((unused));
     intptr_t data_sz;
     MPIR_Request *ureq;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_RACCUMULATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_RACCUMULATE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_RACCUMULATE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_RACCUMULATE);
 
     /* request-based RMA operations are only valid within a passive epoch */
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
@@ -166,7 +166,7 @@ int MPID_Raccumulate(const void *origin_addr, int origin_count,
     ureq->kind = MPIR_REQUEST_KIND__RMA;
 
     /* This request is referenced by user and ch3 by default. */
-    MPIU_Object_set_ref(ureq, 2);
+    MPIR_Object_set_ref(ureq, 2);
 
     MPIDI_Datatype_get_info(origin_count, origin_datatype, dt_contig, data_sz, dtp, dt_true_lb);
 
@@ -190,7 +190,7 @@ int MPID_Raccumulate(const void *origin_addr, int origin_count,
     *request = ureq;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_RACCUMULATE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_RACCUMULATE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -213,9 +213,9 @@ int MPID_Rget_accumulate(const void *origin_addr, int origin_count,
     MPI_Aint dt_true_lb ATTRIBUTE((unused));
     intptr_t data_sz, trg_data_sz;
     MPIR_Request *ureq;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_RGET_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_RGET_ACCUMULATE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_RGET_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_RGET_ACCUMULATE);
 
     /* request-based RMA operations are only valid within a passive epoch */
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
@@ -230,7 +230,7 @@ int MPID_Rget_accumulate(const void *origin_addr, int origin_count,
     ureq->kind = MPIR_REQUEST_KIND__RMA;
 
     /* This request is referenced by user and ch3 by default. */
-    MPIU_Object_set_ref(ureq, 2);
+    MPIR_Object_set_ref(ureq, 2);
 
     /* Note that GACC is only a no-op if no data goes in both directions */
     MPIDI_Datatype_get_info(origin_count, origin_datatype, dt_contig, data_sz, dtp, dt_true_lb);
@@ -257,7 +257,7 @@ int MPID_Rget_accumulate(const void *origin_addr, int origin_count,
     *request = ureq;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_RGET_ACCUMULATE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_RGET_ACCUMULATE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 10a5248..80a482a 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -335,9 +335,9 @@ static inline int flush_local_all(MPIR_Win * win_ptr)
     int local_completed = 0;
     MPIDI_RMA_Target_t *curr_target = NULL;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_FLUSH_LOCAL_ALL);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_FLUSH_LOCAL_ALL);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_FLUSH_LOCAL_ALL);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_FLUSH_LOCAL_ALL);
 
     /* Set sync_flag in sync struct. */
     for (i = 0; i < win_ptr->num_slots; i++) {
@@ -368,7 +368,7 @@ static inline int flush_local_all(MPIR_Win * win_ptr)
     } while (!local_completed);
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_FLUSH_LOCAL_ALL);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_FLUSH_LOCAL_ALL);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -386,9 +386,9 @@ static inline int flush_all(MPIR_Win * win_ptr)
     int remote_completed = 0;
     MPIDI_RMA_Target_t *curr_target = NULL;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_FLUSH_ALL);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_FLUSH_ALL);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_FLUSH_ALL);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_FLUSH_ALL);
 
     /* Set sync_flag in sync struct. */
     for (i = 0; i < win_ptr->num_slots; i++) {
@@ -418,7 +418,7 @@ static inline int flush_all(MPIR_Win * win_ptr)
     } while (!remote_completed);
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_FLUSH_ALL);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_FLUSH_ALL);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -436,15 +436,15 @@ static int fence_barrier_complete(MPIR_Request * sreq)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
 
-    MPIDI_STATE_DECL(MPID_STATE_FENCE_BARRIER_COMPLETE);
-    MPIDI_FUNC_ENTER(MPID_STATE_FENCE_BARRIER_COMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_FENCE_BARRIER_COMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_FENCE_BARRIER_COMPLETE);
 
     MPIR_Win_get_ptr(sreq->dev.source_win_handle, win_ptr);
-    MPIU_Assert(win_ptr != NULL);
+    MPIR_Assert(win_ptr != NULL);
 
     /* decrement incomplete ibarrier request counter */
     win_ptr->sync_request_cnt--;
-    MPIU_Assert(win_ptr->sync_request_cnt >= 0);
+    MPIR_Assert(win_ptr->sync_request_cnt >= 0);
 
     if (win_ptr->sync_request_cnt == 0) {
         if (win_ptr->states.access_state == MPIDI_RMA_FENCE_ISSUED) {
@@ -460,7 +460,7 @@ static int fence_barrier_complete(MPIR_Request * sreq)
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_FENCE_BARRIER_COMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_FENCE_BARRIER_COMPLETE);
     return mpi_errno;
 
   fn_fail:
@@ -486,10 +486,10 @@ int MPID_Win_fence(int assert, MPIR_Win * win_ptr)
     int scalable_fence_enabled = 0;
     int *rma_target_marks = NULL;
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKLMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_FENCE);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_FENCE);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_FENCE);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_FENCE);
 
     MPIR_ERR_CHKANDJUMP((win_ptr->states.access_state != MPIDI_RMA_NONE &&
                          win_ptr->states.access_state != MPIDI_RMA_FENCE_ISSUED &&
@@ -557,7 +557,7 @@ int MPID_Win_fence(int assert, MPIR_Win * win_ptr)
 
     /* Perform basic algorithm by calling reduce-scatter */
     if (!scalable_fence_enabled) {
-        MPIU_CHKLMEM_MALLOC(rma_target_marks, int *, comm_size * sizeof(int),
+        MPIR_CHKLMEM_MALLOC(rma_target_marks, int *, comm_size * sizeof(int),
                             mpi_errno, "rma_target_marks");
         for (i = 0; i < comm_size; i++)
             rma_target_marks[i] = 0;
@@ -581,7 +581,7 @@ int MPID_Win_fence(int assert, MPIR_Win * win_ptr)
 
         win_ptr->at_completion_counter -= comm_size;
         win_ptr->at_completion_counter += rma_target_marks[0];
-        MPIU_Assert(win_ptr->at_completion_counter >= 0);
+        MPIR_Assert(win_ptr->at_completion_counter >= 0);
 
         win_ptr->states.access_state = MPIDI_RMA_FENCE_GRANTED;
     }
@@ -692,8 +692,8 @@ int MPID_Win_fence(int assert, MPIR_Win * win_ptr)
     }
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_FENCE);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_FENCE);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -710,10 +710,10 @@ int MPID_Win_post(MPIR_Group * post_grp_ptr, int assert, MPIR_Win * win_ptr)
 {
     int *post_ranks_in_win_grp;
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKLMEM_DECL(3);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_POST);
+    MPIR_CHKLMEM_DECL(3);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_POST);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_POST);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_POST);
 
     /* Note that here we cannot distinguish if this exposure epoch is overlapped
      * with an exposure epoch of FENCE (which is not allowed), since FENCE may be
@@ -745,15 +745,15 @@ int MPID_Win_post(MPIR_Group * post_grp_ptr, int assert, MPIR_Win * win_ptr)
         win_comm_ptr = win_ptr->comm_ptr;
         rank = win_ptr->comm_ptr->rank;
 
-        MPIU_CHKLMEM_MALLOC(post_ranks_in_win_grp, int *,
+        MPIR_CHKLMEM_MALLOC(post_ranks_in_win_grp, int *,
                             post_grp_size * sizeof(int), mpi_errno, "post_ranks_in_win_grp");
         mpi_errno = fill_ranks_in_win_grp(win_ptr, post_grp_ptr, post_ranks_in_win_grp);
         if (mpi_errno != MPI_SUCCESS)
             MPIR_ERR_POP(mpi_errno);
 
-        MPIU_CHKLMEM_MALLOC(req, MPI_Request *, post_grp_size * sizeof(MPI_Request),
+        MPIR_CHKLMEM_MALLOC(req, MPI_Request *, post_grp_size * sizeof(MPI_Request),
                             mpi_errno, "req");
-        MPIU_CHKLMEM_MALLOC(status, MPI_Status *, post_grp_size * sizeof(MPI_Status),
+        MPIR_CHKLMEM_MALLOC(status, MPI_Status *, post_grp_size * sizeof(MPI_Status),
                             mpi_errno, "status");
 
         /* Send a 0-byte message to the source processes */
@@ -790,8 +790,8 @@ int MPID_Win_post(MPIR_Group * post_grp_ptr, int assert, MPIR_Win * win_ptr)
     }
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_POST);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_POST);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -808,14 +808,14 @@ static int start_req_complete(MPIR_Request * req)
     int mpi_errno = MPI_SUCCESS;
     MPIR_Win *win_ptr = NULL;
 
-    MPIDI_STATE_DECL(MPID_STATE_START_REQ_COMPLETE);
-    MPIDI_FUNC_ENTER(MPID_STATE_START_REQ_COMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_START_REQ_COMPLETE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_START_REQ_COMPLETE);
 
     MPIR_Win_get_ptr(req->dev.source_win_handle, win_ptr);
-    MPIU_Assert(win_ptr != NULL);
+    MPIR_Assert(win_ptr != NULL);
 
     win_ptr->sync_request_cnt--;
-    MPIU_Assert(win_ptr->sync_request_cnt >= 0);
+    MPIR_Assert(win_ptr->sync_request_cnt >= 0);
 
     if (win_ptr->sync_request_cnt == 0) {
         win_ptr->states.access_state = MPIDI_RMA_PSCW_GRANTED;
@@ -825,7 +825,7 @@ static int start_req_complete(MPIR_Request * req)
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_START_REQ_COMPLETE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_START_REQ_COMPLETE);
     return mpi_errno;
 
   fn_fail:
@@ -839,11 +839,11 @@ static int start_req_complete(MPIR_Request * req)
 int MPID_Win_start(MPIR_Group * group_ptr, int assert, MPIR_Win * win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKLMEM_DECL(2);
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_START);
+    MPIR_CHKLMEM_DECL(2);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_START);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_START);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_START);
 
     /* Note that here we cannot distinguish if this access epoch is overlapped
      * with an access epoch of FENCE (which is not allowed), since FENCE may be
@@ -857,7 +857,7 @@ int MPID_Win_start(MPIR_Group * group_ptr, int assert, MPIR_Win * win_ptr)
 
     win_ptr->start_grp_size = group_ptr->size;
 
-    MPIU_CHKPMEM_MALLOC(win_ptr->start_ranks_in_win_grp, int *,
+    MPIR_CHKPMEM_MALLOC(win_ptr->start_ranks_in_win_grp, int *,
                         win_ptr->start_grp_size * sizeof(int),
                         mpi_errno, "win_ptr->start_ranks_in_win_grp");
 
@@ -877,9 +877,9 @@ int MPID_Win_start(MPIR_Group * group_ptr, int assert, MPIR_Win * win_ptr)
         /* post IRECVs */
         if (win_ptr->shm_allocated == TRUE) {
             int node_comm_size = comm_ptr->node_comm->local_size;
-            MPIU_CHKLMEM_MALLOC(intra_start_req, MPI_Request *,
+            MPIR_CHKLMEM_MALLOC(intra_start_req, MPI_Request *,
                                 node_comm_size * sizeof(MPI_Request), mpi_errno, "intra_start_req");
-            MPIU_CHKLMEM_MALLOC(intra_start_status, MPI_Status *,
+            MPIR_CHKLMEM_MALLOC(intra_start_status, MPI_Status *,
                                 node_comm_size * sizeof(MPI_Status),
                                 mpi_errno, "intra_start_status");
         }
@@ -945,11 +945,11 @@ int MPID_Win_start(MPIR_Group * group_ptr, int assert, MPIR_Win * win_ptr)
     }
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_START);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_START);
     return mpi_errno;
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -964,9 +964,9 @@ int MPID_Win_complete(MPIR_Win * win_ptr)
     int mpi_errno = MPI_SUCCESS;
     int i, dst, rank = win_ptr->comm_ptr->rank;
     MPIDI_RMA_Target_t *curr_target;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_COMPLETE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_COMPLETE);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_COMPLETE);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_COMPLETE);
 
     /* Access epochs on the same window must be disjoint. */
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PSCW_ISSUED &&
@@ -990,7 +990,7 @@ int MPID_Win_complete(MPIR_Win * win_ptr)
         dst = win_ptr->start_ranks_in_win_grp[i];
         if (dst == rank) {
             win_ptr->at_completion_counter--;
-            MPIU_Assert(win_ptr->at_completion_counter >= 0);
+            MPIR_Assert(win_ptr->at_completion_counter >= 0);
             continue;
         }
 
@@ -1033,7 +1033,7 @@ int MPID_Win_complete(MPIR_Win * win_ptr)
     win_ptr->start_ranks_in_win_grp = NULL;
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_COMPLETE);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_COMPLETE);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1050,9 +1050,9 @@ int MPID_Win_complete(MPIR_Win * win_ptr)
 int MPID_Win_wait(MPIR_Win * win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_WAIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_WAIT);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_WAIT);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_WAIT);
 
     MPIR_ERR_CHKANDJUMP(win_ptr->states.exposure_state != MPIDI_RMA_PSCW_EXPO,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
@@ -1073,7 +1073,7 @@ int MPID_Win_wait(MPIR_Win * win_ptr)
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_WAIT);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_WAIT);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1089,9 +1089,9 @@ int MPID_Win_wait(MPIR_Win * win_ptr)
 int MPID_Win_test(MPIR_Win * win_ptr, int *flag)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_TEST);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_TEST);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_TEST);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_TEST);
 
     MPIR_ERR_CHKANDJUMP(win_ptr->states.exposure_state != MPIDI_RMA_PSCW_EXPO,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
@@ -1113,7 +1113,7 @@ int MPID_Win_test(MPIR_Win * win_ptr, int *flag)
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_TEST);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_TEST);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1140,9 +1140,9 @@ int MPID_Win_lock(int lock_type, int dest, int assert, MPIR_Win * win_ptr)
     MPIDI_RMA_Target_t *target = NULL;
     MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_LOCK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_LOCK);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_LOCK);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_LOCK);
 
     /* Note that here we cannot distinguish if this access epoch is overlapped
      * with an access epoch of FENCE (which is not allowed), since FENCE may be
@@ -1230,7 +1230,7 @@ int MPID_Win_lock(int lock_type, int dest, int assert, MPIR_Win * win_ptr)
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_LOCK);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_LOCK);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1249,9 +1249,9 @@ int MPID_Win_unlock(int dest, MPIR_Win * win_ptr)
     MPIDI_RMA_Target_t *target = NULL;
     enum MPIDI_RMA_sync_types sync_flag;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_UNLOCK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_UNLOCK);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_UNLOCK);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_UNLOCK);
 
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET,
                         mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");
@@ -1328,7 +1328,7 @@ int MPID_Win_unlock(int dest, MPIR_Win * win_ptr)
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_UNLOCK);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_UNLOCK);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1348,9 +1348,9 @@ int MPID_Win_flush(int dest, MPIR_Win * win_ptr)
     int rank = win_ptr->comm_ptr->rank;
     MPIDI_RMA_Target_t *target = NULL;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_FLUSH);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_FLUSH);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_FLUSH);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_FLUSH);
 
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
                         win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
@@ -1419,7 +1419,7 @@ int MPID_Win_flush(int dest, MPIR_Win * win_ptr)
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_FLUSH);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_FLUSH);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1439,9 +1439,9 @@ int MPID_Win_flush_local(int dest, MPIR_Win * win_ptr)
     int rank = win_ptr->comm_ptr->rank;
     MPIDI_RMA_Target_t *target = NULL;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_FLUSH_LOCAL);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_FLUSH_LOCAL);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_FLUSH_LOCAL);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_FLUSH_LOCAL);
 
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
                         win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
@@ -1495,7 +1495,7 @@ int MPID_Win_flush_local(int dest, MPIR_Win * win_ptr)
     } while (!local_completed);
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_FLUSH_LOCAL);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_FLUSH_LOCAL);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1512,9 +1512,9 @@ int MPID_Win_lock_all(int assert, MPIR_Win * win_ptr)
 {
     int i, rank = win_ptr->comm_ptr->rank;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_LOCK_ALL);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_LOCK_ALL);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_LOCK_ALL);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_LOCK_ALL);
 
     /* Note that here we cannot distinguish if this access epoch is overlapped
      * with an access epoch of FENCE (which is not allowed), since FENCE may be
@@ -1534,7 +1534,7 @@ int MPID_Win_lock_all(int assert, MPIR_Win * win_ptr)
 
     win_ptr->lock_all_assert = assert;
 
-    MPIU_Assert(win_ptr->outstanding_locks == 0);
+    MPIR_Assert(win_ptr->outstanding_locks == 0);
 
     /* Acquire the lock on myself and the lock on processes on SHM.
      * No need to create a target for them. */
@@ -1574,7 +1574,7 @@ int MPID_Win_lock_all(int assert, MPIR_Win * win_ptr)
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_LOCK_ALL);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_LOCK_ALL);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1595,9 +1595,9 @@ int MPID_Win_unlock_all(MPIR_Win * win_ptr)
     MPIDI_RMA_Target_t *curr_target = NULL;
     enum MPIDI_RMA_sync_types sync_flag;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_UNLOCK_ALL);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_UNLOCK_ALL);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_UNLOCK_ALL);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_UNLOCK_ALL);
 
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
                         win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_ISSUED &&
@@ -1706,7 +1706,7 @@ int MPID_Win_unlock_all(MPIR_Win * win_ptr)
     win_ptr->lock_all_assert = 0;
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_UNLOCK_ALL);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_UNLOCK_ALL);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1722,9 +1722,9 @@ int MPID_Win_unlock_all(MPIR_Win * win_ptr)
 int MPID_Win_flush_all(MPIR_Win * win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPIDI_STATE_MPID_WIN_FLUSH_ALL);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPIDI_STATE_MPID_WIN_FLUSH_ALL);
 
-    MPIDI_RMA_FUNC_ENTER(MPIDI_STATE_MPID_WIN_FLUSH_ALL);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPIDI_STATE_MPID_WIN_FLUSH_ALL);
 
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
                         win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
@@ -1742,7 +1742,7 @@ int MPID_Win_flush_all(MPIR_Win * win_ptr)
         MPIR_ERR_POP(mpi_errno);
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPIDI_STATE_MPID_WIN_FLUSH_ALL);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPIDI_STATE_MPID_WIN_FLUSH_ALL);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1758,9 +1758,9 @@ int MPID_Win_flush_all(MPIR_Win * win_ptr)
 int MPID_Win_flush_local_all(MPIR_Win * win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_FLUSH_LOCAL_ALL);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_FLUSH_LOCAL_ALL);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_FLUSH_LOCAL_ALL);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_FLUSH_LOCAL_ALL);
 
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
                         win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
@@ -1778,7 +1778,7 @@ int MPID_Win_flush_local_all(MPIR_Win * win_ptr)
         MPIR_ERR_POP(mpi_errno);
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_FLUSH_LOCAL_ALL);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_FLUSH_LOCAL_ALL);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -1794,9 +1794,9 @@ int MPID_Win_flush_local_all(MPIR_Win * win_ptr)
 int MPID_Win_sync(MPIR_Win * win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_SYNC);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_SYNC);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_SYNC);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_SYNC);
 
     MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
                         win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
@@ -1807,7 +1807,7 @@ int MPID_Win_sync(MPIR_Win * win_ptr)
     OPA_read_write_barrier();
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_SYNC);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_SYNC);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
diff --git a/src/mpid/ch3/src/ch3u_win_fns.c b/src/mpid/ch3/src/ch3u_win_fns.c
index fac96df..f3324e0 100644
--- a/src/mpid/ch3/src/ch3u_win_fns.c
+++ b/src/mpid/ch3/src/ch3u_win_fns.c
@@ -17,9 +17,9 @@ extern MPIR_T_pvar_timer_t PVAR_TIMER_rma_wincreate_allgather ATTRIBUTE((unused)
 int MPIDI_Win_fns_init(MPIDI_CH3U_Win_fns_t * win_fns)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FNS_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_WIN_FNS_INIT);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_FNS_INIT);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_WIN_FNS_INIT);
 
     win_fns->create = MPIDI_CH3U_Win_create;
     win_fns->allocate = MPIDI_CH3U_Win_allocate;
@@ -28,7 +28,7 @@ int MPIDI_Win_fns_init(MPIDI_CH3U_Win_fns_t * win_fns)
     win_fns->gather_info = MPIDI_CH3U_Win_gather_info;
     win_fns->shared_query = MPIDI_CH3U_Win_shared_query;
 
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_FNS_INIT);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_WIN_FNS_INIT);
 
     return mpi_errno;
 }
@@ -44,11 +44,11 @@ int MPIDI_CH3U_Win_gather_info(void *base, MPI_Aint size, int disp_unit,
     int mpi_errno = MPI_SUCCESS, i, k, comm_size, rank;
     MPI_Aint *tmp_buf;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPIU_CHKPMEM_DECL(1);
-    MPIU_CHKLMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_WIN_GATHER_INFO);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_WIN_GATHER_INFO);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_WIN_GATHER_INFO);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3U_WIN_GATHER_INFO);
 
     comm_size = (*win_ptr)->comm_ptr->local_size;
     rank = (*win_ptr)->comm_ptr->rank;
@@ -56,19 +56,19 @@ int MPIDI_CH3U_Win_gather_info(void *base, MPI_Aint size, int disp_unit,
     MPIR_T_PVAR_TIMER_START(RMA, rma_wincreate_allgather);
     /* allocate memory for the base addresses, disp_units, and
      * completion counters of all processes */
-    MPIU_CHKPMEM_MALLOC((*win_ptr)->basic_info_table, MPIDI_Win_basic_info_t *,
+    MPIR_CHKPMEM_MALLOC((*win_ptr)->basic_info_table, MPIDI_Win_basic_info_t *,
                         comm_size * sizeof(MPIDI_Win_basic_info_t),
                         mpi_errno, "(*win_ptr)->basic_info_table");
 
     /* get the addresses of the windows, window objects, and completion
      * counters of all processes.  allocate temp. buffer for communication */
-    MPIU_CHKLMEM_MALLOC(tmp_buf, MPI_Aint *, 4 * comm_size * sizeof(MPI_Aint),
+    MPIR_CHKLMEM_MALLOC(tmp_buf, MPI_Aint *, 4 * comm_size * sizeof(MPI_Aint),
                         mpi_errno, "tmp_buf");
 
     /* FIXME: This needs to be fixed for heterogeneous systems */
     /* FIXME: If we wanted to validate the transfer as within range at the
      * origin, we'd also need the window size. */
-    tmp_buf[4 * rank] = MPIU_PtrToAint(base);
+    tmp_buf[4 * rank] = MPIR_Ptr_to_aint(base);
     tmp_buf[4 * rank + 1] = size;
     tmp_buf[4 * rank + 2] = (MPI_Aint) disp_unit;
     tmp_buf[4 * rank + 3] = (MPI_Aint) (*win_ptr)->handle;
@@ -83,19 +83,19 @@ int MPIDI_CH3U_Win_gather_info(void *base, MPI_Aint size, int disp_unit,
 
     k = 0;
     for (i = 0; i < comm_size; i++) {
-        (*win_ptr)->basic_info_table[i].base_addr = MPIU_AintToPtr(tmp_buf[k++]);
+        (*win_ptr)->basic_info_table[i].base_addr = MPIR_Aint_to_ptr(tmp_buf[k++]);
         (*win_ptr)->basic_info_table[i].size = tmp_buf[k++];
         (*win_ptr)->basic_info_table[i].disp_unit = (int) tmp_buf[k++];
         (*win_ptr)->basic_info_table[i].win_handle = (MPI_Win) tmp_buf[k++];
     }
 
   fn_exit:
-    MPIU_CHKLMEM_FREEALL();
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_WIN_GATHER_INFO);
+    MPIR_CHKLMEM_FREEALL();
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3U_WIN_GATHER_INFO);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
@@ -109,9 +109,9 @@ int MPIDI_CH3U_Win_create(void *base, MPI_Aint size, int disp_unit, MPIR_Info *
                           MPIR_Comm * comm_ptr, MPIR_Win ** win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_WIN_CREATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_WIN_CREATE);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_WIN_CREATE);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3U_WIN_CREATE);
 
     mpi_errno = MPIDI_CH3U_Win_fns.gather_info(base, size, disp_unit, info, comm_ptr, win_ptr);
     if (mpi_errno != MPI_SUCCESS)
@@ -127,7 +127,7 @@ int MPIDI_CH3U_Win_create(void *base, MPI_Aint size, int disp_unit, MPIR_Info *
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_WIN_CREATE);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3U_WIN_CREATE);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -143,16 +143,16 @@ int MPIDI_CH3U_Win_create(void *base, MPI_Aint size, int disp_unit, MPIR_Info *
 int MPIDI_CH3U_Win_create_dynamic(MPIR_Info * info, MPIR_Comm * comm_ptr, MPIR_Win ** win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_WIN_CREATE_DYNAMIC);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_WIN_CREATE_DYNAMIC);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_WIN_CREATE_DYNAMIC);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3U_WIN_CREATE_DYNAMIC);
 
     mpi_errno = MPIDI_CH3U_Win_fns.gather_info(MPI_BOTTOM, 0, 1, info, comm_ptr, win_ptr);
     if (mpi_errno != MPI_SUCCESS)
         MPIR_ERR_POP(mpi_errno);
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_WIN_CREATE_DYNAMIC);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3U_WIN_CREATE_DYNAMIC);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -168,13 +168,13 @@ int MPID_Win_attach(MPIR_Win * win, void *base, MPI_Aint size)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_ATTACH);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_ATTACH);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_ATTACH);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_ATTACH);
 
     /* no op, all of memory is exposed */
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_ATTACH);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_ATTACH);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -191,13 +191,13 @@ int MPID_Win_detach(MPIR_Win * win, const void *base)
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_DETACH);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_DETACH);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_DETACH);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_DETACH);
 
     /* no op, all of memory is exposed */
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_DETACH);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_DETACH);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -214,9 +214,9 @@ int MPIDI_CH3U_Win_allocate(MPI_Aint size, int disp_unit, MPIR_Info * info,
                             MPIR_Comm * comm_ptr, void *baseptr, MPIR_Win ** win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_WIN_ALLOCATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_WIN_ALLOCATE);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_WIN_ALLOCATE);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3U_WIN_ALLOCATE);
 
     if ((*win_ptr)->info_args.alloc_shm == TRUE) {
         if (MPIDI_CH3U_Win_fns.allocate_shm != NULL) {
@@ -233,7 +233,7 @@ int MPIDI_CH3U_Win_allocate(MPI_Aint size, int disp_unit, MPIR_Info * info,
         MPIR_ERR_POP(mpi_errno);
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_WIN_ALLOCATE);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3U_WIN_ALLOCATE);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -249,13 +249,13 @@ int MPIDI_CH3U_Win_allocate_no_shm(MPI_Aint size, int disp_unit, MPIR_Info * inf
 {
     void **base_pp = (void **) baseptr;
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_DECL(1);
 
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_WIN_ALLOCATE_NO_SHM);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_WIN_ALLOCATE_NO_SHM);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_WIN_ALLOCATE_NO_SHM);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3U_WIN_ALLOCATE_NO_SHM);
 
     if (size > 0) {
-        MPIU_CHKPMEM_MALLOC(*base_pp, void *, size, mpi_errno, "(*win_ptr)->base");
+        MPIR_CHKPMEM_MALLOC(*base_pp, void *, size, mpi_errno, "(*win_ptr)->base");
         MPL_VG_MEM_INIT(*base_pp, size);
     }
     else if (size == 0) {
@@ -273,11 +273,11 @@ int MPIDI_CH3U_Win_allocate_no_shm(MPI_Aint size, int disp_unit, MPIR_Info * inf
     }
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_WIN_ALLOCATE_NO_SHM);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3U_WIN_ALLOCATE_NO_SHM);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
@@ -292,15 +292,15 @@ int MPIDI_CH3U_Win_shared_query(MPIR_Win * win_ptr, int target_rank, MPI_Aint *
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_WIN_SHARED_QUERY);
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_WIN_SHARED_QUERY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_WIN_SHARED_QUERY);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3U_WIN_SHARED_QUERY);
 
     *(void **) baseptr = win_ptr->base;
     *size = win_ptr->size;
     *disp_unit = win_ptr->disp_unit;
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_WIN_SHARED_QUERY);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3U_WIN_SHARED_QUERY);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
   fn_fail:
@@ -316,9 +316,9 @@ int MPIDI_CH3U_Win_shared_query(MPIR_Win * win_ptr, int target_rank, MPI_Aint *
 int MPID_Win_set_info(MPIR_Win * win, MPIR_Info * info)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_SET_INFO);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_SET_INFO);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_WIN_SET_INFO);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_WIN_SET_INFO);
 
     /********************************************************/
     /************** check for info no_locks *****************/
@@ -381,7 +381,7 @@ int MPID_Win_set_info(MPIR_Win * win, MPIR_Info * info)
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_WIN_SET_INFO);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_WIN_SET_INFO);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -395,12 +395,12 @@ int MPID_Win_set_info(MPIR_Win * win, MPIR_Info * info)
 int MPID_Win_get_info(MPIR_Win * win, MPIR_Info ** info_used)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_GET_INFO);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_GET_INFO);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_WIN_GET_INFO);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_WIN_GET_INFO);
 
     /* Allocate an empty info object */
-    mpi_errno = MPIU_Info_alloc(info_used);
+    mpi_errno = MPIR_Info_alloc(info_used);
     if (mpi_errno != MPI_SUCCESS) {
         MPIR_ERR_POP(mpi_errno);
     }
@@ -475,7 +475,7 @@ int MPID_Win_get_info(MPIR_Win * win, MPIR_Info ** info_used)
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_WIN_GET_INFO);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_WIN_GET_INFO);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/src/mpid_abort.c b/src/mpid/ch3/src/mpid_abort.c
index 8cb5d37..032b5a1 100644
--- a/src/mpid/ch3/src/mpid_abort.c
+++ b/src/mpid/ch3/src/mpid_abort.c
@@ -36,9 +36,9 @@ int MPID_Abort(MPIR_Comm * comm, int mpi_errno, int exit_code,
     int rank;
     char msg[MPI_MAX_ERROR_STRING] = "";
     char error_str[MPI_MAX_ERROR_STRING + 100];
-    MPIDI_STATE_DECL(MPID_STATE_MPID_ABORT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_ABORT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_ABORT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_ABORT);
 
     if (error_msg == NULL) {
 	/* Create a default error message */
@@ -80,7 +80,7 @@ int MPID_Abort(MPIR_Comm * comm, int mpi_errno, int exit_code,
     MPIDU_Ftb_finalize();
     
 #ifdef HAVE_DEBUGGER_SUPPORT
-    MPIR_DebuggerSetAborting( error_msg );
+    MPIR_Debugger_set_aborting( error_msg );
 #endif
 
     /* Dumping the error message in MPICH and passing the same
@@ -106,7 +106,7 @@ int MPID_Abort(MPIR_Comm * comm, int mpi_errno, int exit_code,
 
     /* pmi_abort should not return but if it does, exit here.  If it does,
        add the function exit code before calling the final exit.  */
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_ABORT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_ABORT);
     MPL_exit(exit_code);
 
     return MPI_ERR_INTERN;
diff --git a/src/mpid/ch3/src/mpid_aint.c b/src/mpid/ch3/src/mpid_aint.c
index e5f6af7..fe60ebe 100644
--- a/src/mpid/ch3/src/mpid_aint.c
+++ b/src/mpid/ch3/src/mpid_aint.c
@@ -24,12 +24,12 @@ MPI_Aint MPID_Aint_add(MPI_Aint base, MPI_Aint disp)
 {
     MPI_Aint result;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_AINT_ADD);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_AINT_ADD);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_AINT_ADD);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_AINT_ADD);
 
-    result =  MPIU_VOID_PTR_CAST_TO_MPI_AINT ((char*)MPIU_AINT_CAST_TO_VOID_PTR(base) + disp);
+    result =  MPIR_VOID_PTR_CAST_TO_MPI_AINT ((char*)MPIR_AINT_CAST_TO_VOID_PTR(base) + disp);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_AINT_ADD);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_AINT_ADD);
     return result;
 }
 
@@ -51,11 +51,11 @@ MPI_Aint MPID_Aint_diff(MPI_Aint addr1, MPI_Aint addr2)
 {
     MPI_Aint result;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_AINT_DIFF);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_AINT_DIFF);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_AINT_DIFF);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_AINT_DIFF);
 
-    result =  MPIU_PTR_DISP_CAST_TO_MPI_AINT ((char*)MPIU_AINT_CAST_TO_VOID_PTR(addr1) - (char*)MPIU_AINT_CAST_TO_VOID_PTR(addr2));
+    result =  MPIR_PTR_DISP_CAST_TO_MPI_AINT ((char*)MPIR_AINT_CAST_TO_VOID_PTR(addr1) - (char*)MPIR_AINT_CAST_TO_VOID_PTR(addr2));
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_AINT_DIFF);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_AINT_DIFF);
     return result;
 }
diff --git a/src/mpid/ch3/src/mpid_cancel_recv.c b/src/mpid/ch3/src/mpid_cancel_recv.c
index 69da3a1..146bd1f 100644
--- a/src/mpid/ch3/src/mpid_cancel_recv.c
+++ b/src/mpid/ch3/src/mpid_cancel_recv.c
@@ -15,11 +15,11 @@ int MPID_Cancel_recv(MPIR_Request * rreq)
     int netmod_cancelled = TRUE;
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_CANCEL_RECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_CANCEL_RECV);
     
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_CANCEL_RECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_CANCEL_RECV);
     
-    MPIU_Assert(rreq->kind == MPIR_REQUEST_KIND__RECV);
+    MPIR_Assert(rreq->kind == MPIR_REQUEST_KIND__RECV);
     
     /* If the netmod has its own cancel_recv function, we need to call
        it here. ANYSOURCE cancels (netmod and otherwise) are handled by
@@ -52,7 +52,7 @@ int MPID_Cancel_recv(MPIR_Request * rreq)
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_CANCEL_RECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_CANCEL_RECV);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/src/mpid_cancel_send.c b/src/mpid/ch3/src/mpid_cancel_send.c
index 2e6d7ff..251c068 100644
--- a/src/mpid/ch3/src/mpid_cancel_send.c
+++ b/src/mpid/ch3/src/mpid_cancel_send.c
@@ -20,11 +20,11 @@ int MPID_Cancel_send(MPIR_Request * sreq)
     int proto;
     int flag;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_CANCEL_SEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_CANCEL_SEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_CANCEL_SEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_CANCEL_SEND);
     
-    MPIU_Assert(sreq->kind == MPIR_REQUEST_KIND__SEND);
+    MPIR_Assert(sreq->kind == MPIR_REQUEST_KIND__SEND);
 
     MPIDI_Request_cancel_pending(sreq, &flag);
     if (flag)
@@ -61,7 +61,7 @@ int MPID_Cancel_send(MPIR_Request * sreq)
 	MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX);
 	if (rreq)
 	{
-	    MPIU_Assert(rreq->dev.partner_request == sreq);
+	    MPIR_Assert(rreq->dev.partner_request == sreq);
 	    
 	    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
              "send-to-self cancellation successful, sreq=0x%08x, rreq=0x%08x",
@@ -144,7 +144,7 @@ int MPID_Cancel_send(MPIR_Request * sreq)
 		       safe to reset ref_count and cc */
                     MPIR_cc_set(&sreq->cc, 0);
                     /* FIXME should be a decr and assert, not a set */
-		    MPIU_Object_set_ref(sreq, 1);
+		    MPIR_Object_set_ref(sreq, 1);
 		    goto fn_exit;
 		}
 	    }
@@ -159,7 +159,7 @@ int MPID_Cancel_send(MPIR_Request * sreq)
 		   reset ref_count and cc */
                 MPIR_cc_set(&sreq->cc, 0);
                 /* FIXME should be a decr and assert, not a set */
-		MPIU_Object_set_ref(sreq, 1);
+		MPIR_Object_set_ref(sreq, 1);
 		goto fn_exit;
 	    }
 	}
@@ -218,7 +218,7 @@ int MPID_Cancel_send(MPIR_Request * sreq)
        should be adequate. */
  fn_fail:
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_CANCEL_SEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_CANCEL_SEND);
     return mpi_errno;
 }
 
diff --git a/src/mpid/ch3/src/mpid_comm_disconnect.c b/src/mpid/ch3/src/mpid_comm_disconnect.c
index de414e6..f4fa049 100644
--- a/src/mpid/ch3/src/mpid_comm_disconnect.c
+++ b/src/mpid/ch3/src/mpid_comm_disconnect.c
@@ -25,9 +25,9 @@
 int MPID_Comm_disconnect(MPIR_Comm *comm_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_COMM_DISCONNECT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_COMM_DISCONNECT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_COMM_DISCONNECT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_COMM_DISCONNECT);
 
     /* Check to make sure the communicator hasn't already been revoked */
     if (comm_ptr->revoked) {
@@ -48,7 +48,7 @@ int MPID_Comm_disconnect(MPIR_Comm *comm_ptr)
 
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_COMM_DISCONNECT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_COMM_DISCONNECT);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/src/mpid_comm_failure_ack.c b/src/mpid/ch3/src/mpid_comm_failure_ack.c
index 037611e..8ea5b20 100644
--- a/src/mpid/ch3/src/mpid_comm_failure_ack.c
+++ b/src/mpid/ch3/src/mpid_comm_failure_ack.c
@@ -13,9 +13,9 @@
 int MPID_Comm_failure_ack(MPIR_Comm *comm_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_COMM_FAILURE_ACK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_COMM_FAILURE_ACK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_COMM_FAILURE_ACK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_COMM_FAILURE_ACK);
 
     /* Update the list of failed processes that we know about locally.
      * This part could technically be turned off and be a correct
@@ -32,7 +32,7 @@ int MPID_Comm_failure_ack(MPIR_Comm *comm_ptr)
     comm_ptr->dev.anysource_enabled = 1;
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_COMM_FAILURE_ACK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_COMM_FAILURE_ACK);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -46,9 +46,9 @@ int MPID_Comm_failure_get_acked(MPIR_Comm *comm_ptr, MPIR_Group **group_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Group *failed_group, *comm_group;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_COMM_FAILURE_GET_ACKED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_COMM_FAILURE_GET_ACKED);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_COMM_FAILURE_GET_ACKED);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_COMM_FAILURE_GET_ACKED);
 
     /* Get the group of all failed processes */
     MPIDI_CH3U_Check_for_failed_procs();
@@ -67,7 +67,7 @@ int MPID_Comm_failure_get_acked(MPIR_Comm *comm_ptr, MPIR_Group **group_ptr)
     MPIR_Group_release(failed_group);
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_COMM_FAILURE_GET_ACKED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_COMM_FAILURE_GET_ACKED);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/src/mpid_comm_get_all_failed_procs.c b/src/mpid/ch3/src/mpid_comm_get_all_failed_procs.c
index bae3891..3d35432 100644
--- a/src/mpid/ch3/src/mpid_comm_get_all_failed_procs.c
+++ b/src/mpid/ch3/src/mpid_comm_get_all_failed_procs.c
@@ -93,9 +93,9 @@ int MPID_Comm_get_all_failed_procs(MPIR_Comm *comm_ptr, MPIR_Group **failed_grou
     int i, j, bitarray_size;
     int *bitarray, *remote_bitarray;
     MPIR_Group *local_fail;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_COMM_GET_ALL_FAILED_PROCS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_COMM_GET_ALL_FAILED_PROCS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_COMM_GET_ALL_FAILED_PROCS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_COMM_GET_ALL_FAILED_PROCS);
 
     /* Kick the progress engine in case it's been a while so we get all the
      * latest updates about failures */
@@ -156,7 +156,7 @@ int MPID_Comm_get_all_failed_procs(MPIR_Comm *comm_ptr, MPIR_Group **failed_grou
     MPL_free(remote_bitarray);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_COMM_GET_ALL_FAILED_PROCS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_COMM_GET_ALL_FAILED_PROCS);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/src/mpid_comm_revoke.c b/src/mpid/ch3/src/mpid_comm_revoke.c
index 56e6abd..23cd53c 100644
--- a/src/mpid/ch3/src/mpid_comm_revoke.c
+++ b/src/mpid/ch3/src/mpid_comm_revoke.c
@@ -28,9 +28,9 @@ int MPID_Comm_revoke(MPIR_Comm *comm_ptr, int is_remote)
     MPIR_Request *request;
     MPIDI_CH3_Pkt_t upkt;
     MPIDI_CH3_Pkt_revoke_t *revoke_pkt = &upkt.revoke;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_COMM_REVOKE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_COMM_REVOKE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_COMM_REVOKE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_COMM_REVOKE);
 
     if (0 == comm_ptr->revoked) {
         /* Mark the communicator as revoked locally */
@@ -98,6 +98,6 @@ int MPID_Comm_revoke(MPIR_Comm *comm_ptr, int is_remote)
         }
     }
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_COMM_REVOKE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_COMM_REVOKE);
     return MPI_SUCCESS;
 }
diff --git a/src/mpid/ch3/src/mpid_comm_spawn_multiple.c b/src/mpid/ch3/src/mpid_comm_spawn_multiple.c
index 62b333a..c702d2b 100644
--- a/src/mpid/ch3/src/mpid_comm_spawn_multiple.c
+++ b/src/mpid/ch3/src/mpid_comm_spawn_multiple.c
@@ -39,9 +39,9 @@ int MPID_Comm_spawn_multiple(int count, char *array_of_commands[],
 			     int array_of_errcodes[]) 
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_COMM_SPAWN_MULTIPLE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_COMM_SPAWN_MULTIPLE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_COMM_SPAWN_MULTIPLE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_COMM_SPAWN_MULTIPLE);
 
     /* Check to make sure the communicator hasn't already been revoked */
     if (comm_ptr->revoked) {
@@ -64,6 +64,6 @@ int MPID_Comm_spawn_multiple(int count, char *array_of_commands[],
     
 fn_fail:
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_COMM_SPAWN_MULTIPLE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_COMM_SPAWN_MULTIPLE);
     return mpi_errno;
 }
diff --git a/src/mpid/ch3/src/mpid_finalize.c b/src/mpid/ch3/src/mpid_finalize.c
index 49ba490..0bf8586 100644
--- a/src/mpid/ch3/src/mpid_finalize.c
+++ b/src/mpid/ch3/src/mpid_finalize.c
@@ -18,9 +18,9 @@
 int MPID_Finalize(void)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_FINALIZE);
 
     /*
      * Wait for all posted receives to complete.  For now we are not doing 
@@ -152,7 +152,7 @@ int MPID_Finalize(void)
     MPIDU_Ftb_finalize();
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_FINALIZE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/src/mpid_imrecv.c b/src/mpid/ch3/src/mpid_imrecv.c
index 23be8f2..d8c9e67 100644
--- a/src/mpid/ch3/src/mpid_imrecv.c
+++ b/src/mpid/ch3/src/mpid_imrecv.c
@@ -27,8 +27,8 @@ int MPID_Imrecv(void *buf, int count, MPI_Datatype datatype,
         goto fn_exit;
     }
 
-    MPIU_Assert(message != NULL);
-    MPIU_Assert(message->kind == MPIR_REQUEST_KIND__MPROBE);
+    MPIR_Assert(message != NULL);
+    MPIR_Assert(message->kind == MPIR_REQUEST_KIND__MPROBE);
 
     /* promote the request object to be a "real" recv request */
     message->kind = MPIR_REQUEST_KIND__RECV;
@@ -76,7 +76,7 @@ int MPID_Imrecv(void *buf, int count, MPI_Datatype datatype,
 
         if (MPIR_Request_is_complete(rreq)) {
             /* is it ever possible to have (cc==0 && recv_pending>0) ? */
-            MPIU_Assert(!recv_pending);
+            MPIR_Assert(!recv_pending);
 
             /* All of the data has arrived, we need to copy the data and
                then free the buffer. */
@@ -93,7 +93,7 @@ int MPID_Imrecv(void *buf, int count, MPI_Datatype datatype,
         {
             /* there should never be outstanding completion events for an unexpected
              * recv without also having a "pending recv" */
-            MPIU_Assert(recv_pending);
+            MPIR_Assert(recv_pending);
             /* The data is still being transfered across the net.  We'll
                leave it to the progress engine to handle once the
                entire message has arrived. */
diff --git a/src/mpid/ch3/src/mpid_init.c b/src/mpid/ch3/src/mpid_init.c
index 2fd4cca..f4cbadd 100644
--- a/src/mpid/ch3/src/mpid_init.c
+++ b/src/mpid/ch3/src/mpid_init.c
@@ -78,9 +78,9 @@ static int set_eager_threshold(MPIR_Comm *comm_ptr, MPIR_Info *info, void *state
 {
     int mpi_errno = MPI_SUCCESS;
     char *endptr;
-    MPID_MPI_STATE_DECL(MPID_STATE_MPIDI_CH3_SET_EAGER_THRESHOLD);
+    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIDI_CH3_SET_EAGER_THRESHOLD);
 
-    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_SET_EAGER_THRESHOLD);
+    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIDI_CH3_SET_EAGER_THRESHOLD);
 
     comm_ptr->dev.eager_max_msg_sz = strtol(info->value, &endptr, 0);
 
@@ -89,7 +89,7 @@ static int set_eager_threshold(MPIR_Comm *comm_ptr, MPIR_Info *info, void *state
                          info->key);
 
  fn_exit:
-    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_SET_EAGER_THRESHOLD);
+    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIDI_CH3_SET_EAGER_THRESHOLD);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -111,9 +111,9 @@ int MPID_Init(int *argc, char ***argv, int requested, int *provided,
     MPIR_Comm * comm;
     int p;
     int val;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_INIT);
 
     /* initialization routine for ch3u_comm.c */
     mpi_errno = MPIDI_CH3I_Comm_init();
@@ -326,7 +326,7 @@ int MPID_Init(int *argc, char ***argv, int requested, int *provided,
 	}
 
 	MPIR_Process.comm_parent = comm;
-	MPIU_Assert(MPIR_Process.comm_parent != NULL);
+	MPIR_Assert(MPIR_Process.comm_parent != NULL);
 	MPL_strncpy(comm->name, "MPI_COMM_PARENT", MPI_MAX_OBJECT_NAME);
         
 	/* FIXME: Check that this intercommunicator gets freed in MPI_Finalize
@@ -354,7 +354,7 @@ int MPID_Init(int *argc, char ***argv, int requested, int *provided,
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_INIT);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
diff --git a/src/mpid/ch3/src/mpid_iprobe.c b/src/mpid/ch3/src/mpid_iprobe.c
index 4911c9b..7e68081 100644
--- a/src/mpid/ch3/src/mpid_iprobe.c
+++ b/src/mpid/ch3/src/mpid_iprobe.c
@@ -19,9 +19,9 @@ int MPID_Iprobe(int source, int tag, MPIR_Comm *comm, int context_offset,
     const int context = comm->recvcontext_id + context_offset;
     int found = 0;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_IPROBE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_IPROBE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_IPROBE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_IPROBE);
 
     if (source == MPI_PROC_NULL)
     {
@@ -109,7 +109,7 @@ int MPID_Iprobe(int source, int tag, MPIR_Comm *comm, int context_offset,
     *flag = found;
 
  fn_exit:    
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_IPROBE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_IPROBE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/src/mpid_irecv.c b/src/mpid/ch3/src/mpid_irecv.c
index 55b7bc3..05afa12 100644
--- a/src/mpid/ch3/src/mpid_irecv.c
+++ b/src/mpid/ch3/src/mpid_irecv.c
@@ -17,9 +17,9 @@ int MPID_Irecv(void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, int
     MPIR_Request * rreq;
     int found;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_IRECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_IRECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_IRECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_IRECV);
 
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
 			"rank=%d, tag=%d, context=%d", 
@@ -83,7 +83,7 @@ int MPID_Irecv(void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, int
 
             if (MPIR_Request_is_complete(rreq)) {
                 /* is it ever possible to have (cc==0 && recv_pending>0) ? */
-                MPIU_Assert(!recv_pending);
+                MPIR_Assert(!recv_pending);
 
                 /* All of the data has arrived, we need to copy the data and 
                    then free the buffer. */
@@ -100,7 +100,7 @@ int MPID_Irecv(void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, int
 	    {
                 /* there should never be outstanding completion events for an unexpected
                  * recv without also having a "pending recv" */
-                MPIU_Assert(recv_pending);
+                MPIR_Assert(recv_pending);
 		/* The data is still being transfered across the net.  We'll 
 		   leave it to the progress engine to handle once the
 		   entire message has arrived. */
@@ -171,6 +171,6 @@ int MPID_Irecv(void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, int
  fn_fail:
     MPL_DBG_MSG_D(MPIDI_CH3_DBG_OTHER,VERBOSE,"IRECV errno: 0x%08x", mpi_errno);
     MPL_DBG_MSG_D(MPIDI_CH3_DBG_OTHER,VERBOSE,"(class: %d)", MPIR_ERR_GET_CLASS(mpi_errno));
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_IRECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_IRECV);
     return mpi_errno;
 }
diff --git a/src/mpid/ch3/src/mpid_irsend.c b/src/mpid/ch3/src/mpid_irsend.c
index 1105509..58071b1 100644
--- a/src/mpid/ch3/src/mpid_irsend.c
+++ b/src/mpid/ch3/src/mpid_irsend.c
@@ -30,9 +30,9 @@ int MPID_Irsend(const void * buf, int count, MPI_Datatype datatype, int rank, in
     MPID_Seqnum_t seqnum;
 #endif    
     int mpi_errno = MPI_SUCCESS;    
-    MPIDI_STATE_DECL(MPID_STATE_MPID_IRSEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_IRSEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_IRSEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_IRSEND);
 
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
                 "rank=%d, tag=%d, context=%d", 
@@ -70,7 +70,7 @@ int MPID_Irsend(const void * buf, int count, MPI_Datatype datatype, int rank, in
     
     if (rank == MPI_PROC_NULL)
     {
-	MPIU_Object_set_ref(sreq, 1);
+	MPIR_Object_set_ref(sreq, 1);
         MPIR_cc_set(&sreq->cc, 0);
 	goto fn_exit;
     }
@@ -156,6 +156,6 @@ int MPID_Irsend(const void * buf, int count, MPI_Datatype datatype, int rank, in
 		  );
     
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_IRSEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_IRSEND);
     return mpi_errno;
 }
diff --git a/src/mpid/ch3/src/mpid_isend.c b/src/mpid/ch3/src/mpid_isend.c
index ad91ad1..34d5e03 100644
--- a/src/mpid/ch3/src/mpid_isend.c
+++ b/src/mpid/ch3/src/mpid_isend.c
@@ -42,9 +42,9 @@ int MPID_Isend(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank
 #endif    
     int eager_threshold = -1;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_ISEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_ISEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_ISEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_ISEND);
 
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
                   "rank=%d, tag=%d, context=%d", 
@@ -83,7 +83,7 @@ int MPID_Isend(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank
 
     if (rank == MPI_PROC_NULL)
     {
-	MPIU_Object_set_ref(sreq, 1);
+	MPIR_Object_set_ref(sreq, 1);
         MPIR_cc_set(&sreq->cc, 0);
 	goto fn_exit;
     }
@@ -188,6 +188,6 @@ int MPID_Isend(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank
 		  );
     
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_ISEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_ISEND);
     return mpi_errno;
 }
diff --git a/src/mpid/ch3/src/mpid_issend.c b/src/mpid/ch3/src/mpid_issend.c
index 5397b24..f8ea6f8 100644
--- a/src/mpid/ch3/src/mpid_issend.c
+++ b/src/mpid/ch3/src/mpid_issend.c
@@ -29,9 +29,9 @@ int MPID_Issend(const void * buf, int count, MPI_Datatype datatype, int rank, in
 #endif    
     int eager_threshold = -1;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_ISSEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_ISSEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_ISSEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_ISSEND);
 
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
                  "rank=%d, tag=%d, context=%d", 
@@ -69,7 +69,7 @@ int MPID_Issend(const void * buf, int count, MPI_Datatype datatype, int rank, in
     
     if (rank == MPI_PROC_NULL)
     {
-	MPIU_Object_set_ref(sreq, 1);
+	MPIR_Object_set_ref(sreq, 1);
         MPIR_cc_set(&sreq->cc, 0);
 	goto fn_exit;
     }
@@ -132,6 +132,6 @@ int MPID_Issend(const void * buf, int count, MPI_Datatype datatype, int rank, in
 		  )
     
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_ISSEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_ISSEND);
     return mpi_errno;
 }
diff --git a/src/mpid/ch3/src/mpid_port.c b/src/mpid/ch3/src/mpid_port.c
index 7940c8d..e306338 100644
--- a/src/mpid/ch3/src/mpid_port.c
+++ b/src/mpid/ch3/src/mpid_port.c
@@ -44,9 +44,9 @@ static MPIDI_PortFns portFns = { 0, 0, 0, 0 };
 int MPID_Open_port(MPIR_Info *info_ptr, char *port_name)
 {
     int mpi_errno=MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIR_OPEN_PORT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIR_OPEN_PORT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIR_OPEN_PORT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIR_OPEN_PORT);
 
     /* Check to see if we need to setup channel-specific functions
        for handling the port operations */
@@ -71,7 +71,7 @@ int MPID_Open_port(MPIR_Info *info_ptr, char *port_name)
     }
 
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIR_OPEN_PORT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIR_OPEN_PORT);
     return mpi_errno;
 }
 
@@ -95,9 +95,9 @@ Input Parameters:
 int MPID_Close_port(const char *port_name)
 {
     int mpi_errno=MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_CLOSE_PORT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_CLOSE_PORT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_CLOSE_PORT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_CLOSE_PORT);
 
     /* Check to see if we need to setup channel-specific functions
        for handling the port operations */
@@ -120,7 +120,7 @@ int MPID_Close_port(const char *port_name)
     }
 
  fn_fail:	
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_CLOSE_PORT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_CLOSE_PORT);
     return mpi_errno;
 }
 
@@ -132,9 +132,9 @@ int MPID_Comm_accept(const char * port_name, MPIR_Info * info, int root,
 		     MPIR_Comm * comm, MPIR_Comm ** newcomm_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_COMM_ACCEPT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_COMM_ACCEPT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_COMM_ACCEPT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_COMM_ACCEPT);
 
     /* Check to see if we need to setup channel-specific functions
        for handling the port operations */
@@ -158,7 +158,7 @@ int MPID_Comm_accept(const char * port_name, MPIR_Info * info, int root,
     }
 
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_COMM_ACCEPT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_COMM_ACCEPT);
     return mpi_errno;
 }
 
@@ -170,9 +170,9 @@ int MPID_Comm_connect(const char * port_name, MPIR_Info * info, int root,
 		      MPIR_Comm * comm, MPIR_Comm ** newcomm_ptr)
 {
     int mpi_errno=MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_COMM_CONNECT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_COMM_CONNECT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_COMM_CONNECT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_COMM_CONNECT);
 
     /* Check to see if we need to setup channel-specific functions
        for handling the port operations */
@@ -196,7 +196,7 @@ int MPID_Comm_connect(const char * port_name, MPIR_Info * info, int root,
     }
 
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_COMM_CONNECT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_COMM_CONNECT);
     return mpi_errno;
 }
 
@@ -289,9 +289,9 @@ static int MPIDI_Open_port(MPIR_Info *info_ptr, char *port_name)
     int port_name_tag = 0; /* this tag is added to the business card,
                               which is then returned as the port name */
     int myRank = MPIR_Process.comm_world->rank;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_OPEN_PORT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_OPEN_PORT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_OPEN_PORT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_OPEN_PORT);
 
     mpi_errno = get_port_name_tag(&port_name_tag);
     MPIR_ERR_CHKANDJUMP(mpi_errno,mpi_errno,MPI_ERR_OTHER,"**argstr_port_name_tag");
@@ -313,7 +313,7 @@ static int MPIDI_Open_port(MPIR_Info *info_ptr, char *port_name)
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER, VERBOSE, (MPL_DBG_FDEST, "port_name = %s", port_name));
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_OPEN_PORT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_OPEN_PORT);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -330,9 +330,9 @@ static int MPIDI_Close_port(const char *port_name)
 {
     int mpi_errno = MPI_SUCCESS;
     int port_name_tag;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CLOSE_PORT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CLOSE_PORT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CLOSE_PORT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CLOSE_PORT);
 
     mpi_errno = MPIDI_GetTagFromPort(port_name, &port_name_tag);
     MPIR_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER,"**argstr_port_name_tag");
@@ -340,7 +340,7 @@ static int MPIDI_Close_port(const char *port_name)
     free_port_name_tag(port_name_tag);
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CLOSE_PORT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CLOSE_PORT);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/src/mpid_probe.c b/src/mpid/ch3/src/mpid_probe.c
index d7279bf..3b3dde5 100644
--- a/src/mpid/ch3/src/mpid_probe.c
+++ b/src/mpid/ch3/src/mpid_probe.c
@@ -16,9 +16,9 @@ int MPID_Probe(int source, int tag, MPIR_Comm * comm, int context_offset,
     MPID_Progress_state progress_state;
     const int context = comm->recvcontext_id + context_offset;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_PROBE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_PROBE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_PROBE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_PROBE);
 
     if (source == MPI_PROC_NULL)
     {
@@ -97,7 +97,7 @@ int MPID_Probe(int source, int tag, MPIR_Comm * comm, int context_offset,
     MPIDI_CH3_Progress_end(&progress_state);
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_PROBE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_PROBE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/src/mpid_recv.c b/src/mpid/ch3/src/mpid_recv.c
index 88f3311..19e5861 100644
--- a/src/mpid/ch3/src/mpid_recv.c
+++ b/src/mpid/ch3/src/mpid_recv.c
@@ -24,9 +24,9 @@ int MPID_Recv(void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, int t
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request * rreq;
     int found;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_RECV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_RECV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_RECV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_RECV);
 
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
                       "rank=%d, tag=%d, context=%d", rank, tag,
@@ -87,7 +87,7 @@ int MPID_Recv(void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, int t
 
             if (MPIR_Request_is_complete(rreq)) {
                 /* is it ever possible to have (cc==0 && recv_pending>0) ? */
-                MPIU_Assert(!recv_pending);
+                MPIR_Assert(!recv_pending);
 
                 /* All of the data has arrived, we need to unpack the data and 
                    then free the buffer and the request. */
@@ -112,7 +112,7 @@ int MPID_Recv(void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, int t
 	    {
                 /* there should never be outstanding completion events for an unexpected
                  * recv without also having a "pending recv" */
-                MPIU_Assert(recv_pending);
+                MPIR_Assert(recv_pending);
 
 		/* The data is still being transfered across the net.  
 		   We'll leave it to the progress engine to handle once the
@@ -198,6 +198,6 @@ int MPID_Recv(void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, int t
     });
 
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_RECV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_RECV);
     return mpi_errno;
 }
diff --git a/src/mpid/ch3/src/mpid_rma.c b/src/mpid/ch3/src/mpid_rma.c
index 79f115b..e104d09 100644
--- a/src/mpid/ch3/src/mpid_rma.c
+++ b/src/mpid/ch3/src/mpid_rma.c
@@ -63,9 +63,9 @@ int MPID_Win_create(void *base, MPI_Aint size, int disp_unit, MPIR_Info * info,
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_CREATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_CREATE);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_CREATE);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_CREATE);
 
     /* Check to make sure the communicator hasn't already been revoked */
     if (comm_ptr->revoked) {
@@ -84,7 +84,7 @@ int MPID_Win_create(void *base, MPI_Aint size, int disp_unit, MPIR_Info * info,
         MPIR_ERR_POP(mpi_errno);
 
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_WIN_CREATE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_WIN_CREATE);
     return mpi_errno;
 }
 
@@ -97,9 +97,9 @@ int MPID_Win_allocate(MPI_Aint size, int disp_unit, MPIR_Info * info,
                       MPIR_Comm * comm_ptr, void *baseptr, MPIR_Win ** win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_ALLOCATE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_ALLOCATE);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_ALLOCATE);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_ALLOCATE);
 
     mpi_errno =
         win_init(size, disp_unit, MPI_WIN_FLAVOR_ALLOCATE, MPI_WIN_UNIFIED, info, comm_ptr,
@@ -114,7 +114,7 @@ int MPID_Win_allocate(MPI_Aint size, int disp_unit, MPIR_Info * info,
     }
 
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_WIN_ALLOCATE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_WIN_ALLOCATE);
     return mpi_errno;
 }
 
@@ -127,9 +127,9 @@ int MPID_Win_create_dynamic(MPIR_Info * info, MPIR_Comm * comm_ptr, MPIR_Win **
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_CREATE_DYNAMIC);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_CREATE_DYNAMIC);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_CREATE_DYNAMIC);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_CREATE_DYNAMIC);
 
     mpi_errno = win_init(0 /* spec defines size to be 0 */ ,
                          1 /* spec defines disp_unit to be 1 */ ,
@@ -146,7 +146,7 @@ int MPID_Win_create_dynamic(MPIR_Info * info, MPIR_Comm * comm_ptr, MPIR_Win **
     }
 
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_WIN_CREATE_DYNAMIC);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_WIN_CREATE_DYNAMIC);
     return mpi_errno;
 }
 
@@ -159,13 +159,13 @@ int MPID_Win_create_dynamic(MPIR_Info * info, MPIR_Comm * comm_ptr, MPIR_Win **
 void *MPID_Alloc_mem(size_t size, MPIR_Info * info_ptr)
 {
     void *ap = NULL;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_ALLOC_MEM);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_ALLOC_MEM);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_ALLOC_MEM);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_ALLOC_MEM);
 
     ap = MPIDI_CH3I_Alloc_mem(size, info_ptr);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_ALLOC_MEM);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_ALLOC_MEM);
     return ap;
 }
 
@@ -177,14 +177,14 @@ void *MPID_Alloc_mem(size_t size, MPIR_Info * info_ptr)
 int MPID_Free_mem(void *ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_FREE_MEM);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_FREE_MEM);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_FREE_MEM);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_FREE_MEM);
 
     MPIDI_CH3I_Free_mem(ptr);
 
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_FREE_MEM);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_FREE_MEM);
     return mpi_errno;
 }
 
@@ -198,9 +198,9 @@ int MPID_Win_allocate_shared(MPI_Aint size, int disp_unit, MPIR_Info * info, MPI
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_ALLOCATE_SHARED);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_ALLOCATE_SHARED);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_ALLOCATE_SHARED);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_ALLOCATE_SHARED);
 
     mpi_errno =
         win_init(size, disp_unit, MPI_WIN_FLAVOR_SHARED, MPI_WIN_UNIFIED, info, comm_ptr, win_ptr);
@@ -213,7 +213,7 @@ int MPID_Win_allocate_shared(MPI_Aint size, int disp_unit, MPIR_Info * info, MPI
         MPIR_ERR_POP(mpi_errno);
 
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_WIN_ALLOCATE_SHARED);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_WIN_ALLOCATE_SHARED);
     return mpi_errno;
 }
 
@@ -225,9 +225,9 @@ int MPID_Win_shared_query(MPIR_Win * win, int rank, MPI_Aint * size, int *disp_u
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_SHARED_QUERY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_SHARED_QUERY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_WIN_SHARED_QUERY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_WIN_SHARED_QUERY);
 
     mpi_errno = MPIDI_CH3U_Win_fns.shared_query(win, rank, size, disp_unit, baseptr);
     if (mpi_errno != MPI_SUCCESS) {
@@ -235,7 +235,7 @@ int MPID_Win_shared_query(MPIR_Win * win, int rank, MPI_Aint * size, int *disp_u
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_WIN_SHARED_QUERY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_WIN_SHARED_QUERY);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -252,10 +252,10 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
     int i;
     MPIR_Comm *win_comm_ptr;
     int win_target_pool_size;
-    MPIU_CHKPMEM_DECL(5);
-    MPIDI_STATE_DECL(MPID_STATE_WIN_INIT);
+    MPIR_CHKPMEM_DECL(5);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_WIN_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_WIN_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_WIN_INIT);
 
     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
     if (initRMAoptions) {
@@ -267,7 +267,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
     }
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
 
-    *win_ptr = (MPIR_Win *) MPIU_Handle_obj_alloc(&MPIR_Win_mem);
+    *win_ptr = (MPIR_Win *) MPIR_Handle_obj_alloc(&MPIR_Win_mem);
     MPIR_ERR_CHKANDJUMP1(!(*win_ptr), mpi_errno, MPI_ERR_OTHER, "**nomem",
                          "**nomem %s", "MPIR_Win_mem");
 
@@ -275,7 +275,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
     if (mpi_errno)
         MPIR_ERR_POP(mpi_errno);
 
-    MPIU_Object_set_ref(*win_ptr, 1);
+    MPIR_Object_set_ref(*win_ptr, 1);
 
     /* (*win_ptr)->errhandler is set by upper level; */
     /* (*win_ptr)->base is set by caller; */
@@ -322,7 +322,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
     if (mpi_errno != MPI_SUCCESS)
         MPIR_ERR_POP(mpi_errno);
 
-    MPIU_CHKPMEM_MALLOC((*win_ptr)->op_pool_start, MPIDI_RMA_Op_t *,
+    MPIR_CHKPMEM_MALLOC((*win_ptr)->op_pool_start, MPIDI_RMA_Op_t *,
                         sizeof(MPIDI_RMA_Op_t) * MPIR_CVAR_CH3_RMA_OP_WIN_POOL_SIZE, mpi_errno,
                         "RMA op pool");
     (*win_ptr)->op_pool_head = NULL;
@@ -333,7 +333,7 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
 
     win_target_pool_size =
         MPL_MIN(MPIR_CVAR_CH3_RMA_TARGET_WIN_POOL_SIZE, MPIR_Comm_size(win_comm_ptr));
-    MPIU_CHKPMEM_MALLOC((*win_ptr)->target_pool_start, MPIDI_RMA_Target_t *,
+    MPIR_CHKPMEM_MALLOC((*win_ptr)->target_pool_start, MPIDI_RMA_Target_t *,
                         sizeof(MPIDI_RMA_Target_t) * win_target_pool_size, mpi_errno,
                         "RMA target pool");
     (*win_ptr)->target_pool_head = NULL;
@@ -343,13 +343,13 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
     }
 
     (*win_ptr)->num_slots = MPL_MIN(MPIR_CVAR_CH3_RMA_SLOTS_SIZE, MPIR_Comm_size(win_comm_ptr));
-    MPIU_CHKPMEM_MALLOC((*win_ptr)->slots, MPIDI_RMA_Slot_t *,
+    MPIR_CHKPMEM_MALLOC((*win_ptr)->slots, MPIDI_RMA_Slot_t *,
                         sizeof(MPIDI_RMA_Slot_t) * (*win_ptr)->num_slots, mpi_errno, "RMA slots");
     for (i = 0; i < (*win_ptr)->num_slots; i++) {
         (*win_ptr)->slots[i].target_list_head = NULL;
     }
 
-    MPIU_CHKPMEM_MALLOC((*win_ptr)->target_lock_entry_pool_start,
+    MPIR_CHKPMEM_MALLOC((*win_ptr)->target_lock_entry_pool_start,
                         MPIDI_RMA_Target_lock_entry_t *,
                         sizeof(MPIDI_RMA_Target_lock_entry_t) *
                         MPIR_CVAR_CH3_RMA_TARGET_LOCK_ENTRY_WIN_POOL_SIZE, mpi_errno,
@@ -380,9 +380,9 @@ static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model,
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_WIN_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_WIN_INIT);
     return mpi_errno;
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
diff --git a/src/mpid/ch3/src/mpid_rsend.c b/src/mpid/ch3/src/mpid_rsend.c
index 794fef9..445e31b 100644
--- a/src/mpid/ch3/src/mpid_rsend.c
+++ b/src/mpid/ch3/src/mpid_rsend.c
@@ -32,9 +32,9 @@ int MPID_Rsend(const void * buf, int count, MPI_Datatype datatype, int rank, int
     MPID_Seqnum_t seqnum;
 #endif    
     int mpi_errno = MPI_SUCCESS;    
-    MPIDI_STATE_DECL(MPID_STATE_MPID_RSEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_RSEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_RSEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_RSEND);
 
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
 					"rank=%d, tag=%d, context=%d", 
@@ -159,6 +159,6 @@ int MPID_Rsend(const void * buf, int count, MPI_Datatype datatype, int rank, int
 		  );
     
   fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_RSEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_RSEND);
     return mpi_errno;
 }
diff --git a/src/mpid/ch3/src/mpid_send.c b/src/mpid/ch3/src/mpid_send.c
index c438d72..3389423 100644
--- a/src/mpid/ch3/src/mpid_send.c
+++ b/src/mpid/ch3/src/mpid_send.c
@@ -30,9 +30,9 @@ int MPID_Send(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank,
 #endif    
     int eager_threshold = -1;
     int mpi_errno = MPI_SUCCESS;    
-    MPIDI_STATE_DECL(MPID_STATE_MPID_SEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SEND);
 
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
                 "rank=%d, tag=%d, context=%d", 
@@ -190,6 +190,6 @@ int MPID_Send(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank,
     }
 		  );
     
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEND);
     return mpi_errno;
 }
diff --git a/src/mpid/ch3/src/mpid_ssend.c b/src/mpid/ch3/src/mpid_ssend.c
index 94f9f1b..7133f26 100644
--- a/src/mpid/ch3/src/mpid_ssend.c
+++ b/src/mpid/ch3/src/mpid_ssend.c
@@ -29,9 +29,9 @@ int MPID_Ssend(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank
 #endif    
     int eager_threshold = -1;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_SSEND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SSEND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SSEND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SSEND);
 
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
               "rank=%d, tag=%d, context=%d", 
@@ -124,6 +124,6 @@ int MPID_Ssend(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank
             MPL_DBG_MSG_P(MPIDI_CH3_DBG_OTHER,VERBOSE,
 			   "request allocated, handle=0x%08x", sreq->handle);}});
     
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_SSEND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SSEND);
     return mpi_errno;
 }
diff --git a/src/mpid/ch3/src/mpid_startall.c b/src/mpid/ch3/src/mpid_startall.c
index e45d66c..e4701b0 100644
--- a/src/mpid/ch3/src/mpid_startall.c
+++ b/src/mpid/ch3/src/mpid_startall.c
@@ -30,7 +30,7 @@
 	FAIL_;								\
     }									\
 									\
-    MPIU_Object_set_ref((sreq_), 1);					\
+    MPIR_Object_set_ref((sreq_), 1);					\
     MPIR_cc_set(&(sreq_)->cc, 0);                                       \
     (sreq_)->kind = MPIR_REQUEST_KIND__PREQUEST_SEND;					\
     (sreq_)->comm = comm;						\
@@ -57,9 +57,9 @@ int MPID_Startall(int count, MPIR_Request * requests[])
     int i;
     int rc;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_STARTALL);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_STARTALL);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_STARTALL);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_STARTALL);
 
     for (i = 0; i < count; i++)
     {
@@ -147,7 +147,7 @@ int MPID_Startall(int count, MPIR_Request * requests[])
 	/* --END ERROR HANDLING-- */
     }
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_STARTALL);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_STARTALL);
     return mpi_errno;
 }
 
@@ -167,9 +167,9 @@ int MPID_Send_init(const void * buf, int count, MPI_Datatype datatype, int rank,
 {
     MPIR_Request * sreq;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_SEND_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SEND_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEND_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SEND_INIT);
 
     MPIDI_Request_create_psreq(sreq, mpi_errno, goto fn_exit);
     MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
@@ -181,7 +181,7 @@ int MPID_Send_init(const void * buf, int count, MPI_Datatype datatype, int rank,
     *request = sreq;
 
   fn_exit:    
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEND_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEND_INIT);
     return mpi_errno;
 }
 
@@ -197,9 +197,9 @@ int MPID_Ssend_init(const void * buf, int count, MPI_Datatype datatype, int rank
 {
     MPIR_Request * sreq;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_SSEND_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SSEND_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SSEND_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SSEND_INIT);
 
     MPIDI_Request_create_psreq(sreq, mpi_errno, goto fn_exit);
     MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SSEND);
@@ -211,7 +211,7 @@ int MPID_Ssend_init(const void * buf, int count, MPI_Datatype datatype, int rank
     *request = sreq;
 
   fn_exit:    
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_SSEND_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SSEND_INIT);
     return mpi_errno;
 }
 
@@ -227,9 +227,9 @@ int MPID_Rsend_init(const void * buf, int count, MPI_Datatype datatype, int rank
 {
     MPIR_Request * sreq;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_RSEND_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_RSEND_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_RSEND_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_RSEND_INIT);
 
     MPIDI_Request_create_psreq(sreq, mpi_errno, goto fn_exit);
     MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_RSEND);
@@ -241,7 +241,7 @@ int MPID_Rsend_init(const void * buf, int count, MPI_Datatype datatype, int rank
     *request = sreq;
 
   fn_exit:    
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_RSEND_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_RSEND_INIT);
     return mpi_errno;
 }
 
@@ -257,9 +257,9 @@ int MPID_Bsend_init(const void * buf, int count, MPI_Datatype datatype, int rank
 {
     MPIR_Request * sreq;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_BSEND_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_BSEND_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_BSEND_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_BSEND_INIT);
 
     MPIDI_Request_create_psreq(sreq, mpi_errno, goto fn_exit);
     MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_BSEND);
@@ -271,7 +271,7 @@ int MPID_Bsend_init(const void * buf, int count, MPI_Datatype datatype, int rank
     *request = sreq;
 
   fn_exit:    
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_BSEND_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_BSEND_INIT);
     return mpi_errno;
 }
 
@@ -295,9 +295,9 @@ int MPID_Recv_init(void * buf, int count, MPI_Datatype datatype, int rank, int t
 {
     MPIR_Request * rreq;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_RECV_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_RECV_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_RECV_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_RECV_INIT);
     
     rreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED);
     if (rreq == NULL)
@@ -308,7 +308,7 @@ int MPID_Recv_init(void * buf, int count, MPI_Datatype datatype, int rank, int t
 	goto fn_exit;
     }
     
-    MPIU_Object_set_ref(rreq, 1);
+    MPIR_Object_set_ref(rreq, 1);
     rreq->kind = MPIR_REQUEST_KIND__PREQUEST_RECV;
     rreq->comm = comm;
     MPIR_cc_set(&rreq->cc, 0);
@@ -329,6 +329,6 @@ int MPID_Recv_init(void * buf, int count, MPI_Datatype datatype, int rank, int t
     *request = rreq;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_RECV_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_RECV_INIT);
     return mpi_errno;
 }
diff --git a/src/mpid/ch3/src/mpid_vc.c b/src/mpid/ch3/src/mpid_vc.c
index a852f95..ce3db9d 100644
--- a/src/mpid/ch3/src/mpid_vc.c
+++ b/src/mpid/ch3/src/mpid_vc.c
@@ -94,24 +94,24 @@ int MPIDI_VCRT_Create(int size, struct MPIDI_VCRT **vcrt_ptr)
 {
     MPIDI_VCRT_t * vcrt;
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_VCRT_CREATE);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_VCRT_CREATE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_VCRT_CREATE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_VCRT_CREATE);
 
-    MPIU_CHKPMEM_MALLOC(vcrt, MPIDI_VCRT_t *, sizeof(MPIDI_VCRT_t) + (size - 1) * sizeof(MPIDI_VC_t *),	mpi_errno, "**nomem");
+    MPIR_CHKPMEM_MALLOC(vcrt, MPIDI_VCRT_t *, sizeof(MPIDI_VCRT_t) + (size - 1) * sizeof(MPIDI_VC_t *),	mpi_errno, "**nomem");
     vcrt->handle = HANDLE_SET_KIND(0, HANDLE_KIND_INVALID);
-    MPIU_Object_set_ref(vcrt, 1);
+    MPIR_Object_set_ref(vcrt, 1);
     vcrt->size = size;
     *vcrt_ptr = vcrt;
 
  fn_exit:
-    MPIU_CHKPMEM_COMMIT();
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_VCRT_CREATE);
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_VCRT_CREATE);
     return mpi_errno;
  fn_fail:
     /* --BEGIN ERROR HANDLING-- */
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
@@ -131,12 +131,12 @@ int MPIDI_VCRT_Create(int size, struct MPIDI_VCRT **vcrt_ptr)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIDI_VCRT_Add_ref(struct MPIDI_VCRT *vcrt)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_VCRT_ADD_REF);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_VCRT_ADD_REF);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_VCRT_ADD_REF);
-    MPIU_Object_add_ref(vcrt);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_VCRT_ADD_REF);
+    MPIR_Object_add_ref(vcrt);
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_REFCOUNT,TYPICAL,(MPL_DBG_FDEST, "Incr VCRT %p ref count",vcrt));
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_VCRT_ADD_REF);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_VCRT_ADD_REF);
     return MPI_SUCCESS;
 }
 
@@ -156,11 +156,11 @@ int MPIDI_VCRT_Release(struct MPIDI_VCRT *vcrt, int isDisconnect )
 {
     int in_use;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_VCRT_RELEASE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_VCRT_RELEASE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_VCRT_RELEASE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_VCRT_RELEASE);
 
-    MPIU_Object_release_ref(vcrt, &in_use);
+    MPIR_Object_release_ref(vcrt, &in_use);
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_REFCOUNT,TYPICAL,(MPL_DBG_FDEST, "Decr VCRT %p ref count",vcrt));
     
     /* If this VC reference table is no longer in use, we can
@@ -184,17 +184,17 @@ int MPIDI_VCRT_Release(struct MPIDI_VCRT *vcrt, int isDisconnect )
             /* probably not, need to do something like the following instead: */
 #if 0
             if (isDisconnect) {
-                MPIU_Assert(in_use);
+                MPIR_Assert(in_use);
                 /* FIXME this is still bogus, the VCRT may contain a mix of
                  * dynamic and non-dynamic VCs, so the ref_count isn't
                  * guaranteed to have started at 2.  The best thing to do might
                  * be to avoid overloading the reference counting this way and
                  * use a separate check for dynamic VCs (another flag? compare
                  * PGs?) */
-                MPIU_Object_release_ref(vc, &in_use);
+                MPIR_Object_release_ref(vc, &in_use);
             }
 #endif
-	    if (isDisconnect && MPIU_Object_get_ref(vc) == 1) {
+	    if (isDisconnect && MPIR_Object_get_ref(vc) == 1) {
 		MPIDI_VC_release_ref(vc, &in_use);
 	    }
 
@@ -244,7 +244,7 @@ int MPIDI_VCRT_Release(struct MPIDI_VCRT *vcrt, int isDisconnect )
     }
 
  fn_exit:    
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_VCRT_RELEASE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_VCRT_RELEASE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -270,16 +270,16 @@ int MPIDI_VCRT_Release(struct MPIDI_VCRT *vcrt, int isDisconnect )
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIDI_VCR_Dup(MPIDI_VCR orig_vcr, MPIDI_VCR * new_vcr)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPID_VCR_DUP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_VCR_DUP);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_VCR_DUP);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_VCR_DUP);
 
     /* We are allowed to create a vc that belongs to no process group 
      as part of the initial connect/accept action, so in that case,
      ignore the pg ref count update */
     /* XXX DJG FIXME-MT should we be checking this? */
     /* we probably need a test-and-incr operation or equivalent to avoid races */
-    if (MPIU_Object_get_ref(orig_vcr) == 0 && orig_vcr->pg) {
+    if (MPIR_Object_get_ref(orig_vcr) == 0 && orig_vcr->pg) {
 	MPIDI_VC_add_ref( orig_vcr );
 	MPIDI_VC_add_ref( orig_vcr );
 	MPIDI_PG_add_ref( orig_vcr->pg );
@@ -289,7 +289,7 @@ int MPIDI_VCR_Dup(MPIDI_VCR orig_vcr, MPIDI_VCR * new_vcr)
     }
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_REFCOUNT,TYPICAL,(MPL_DBG_FDEST,"Incr VCR %p ref count",orig_vcr));
     *new_vcr = orig_vcr;
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_VCR_DUP);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_VCR_DUP);
     return MPI_SUCCESS;
 }
 
@@ -302,9 +302,9 @@ int MPIDI_VCR_Dup(MPIDI_VCR orig_vcr, MPIDI_VCR * new_vcr)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPID_Comm_get_lpid(MPIR_Comm *comm_ptr, int idx, int * lpid_ptr, MPL_bool is_remote)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPID_VCR_GET_LPID);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_VCR_GET_LPID);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_VCR_GET_LPID);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_VCR_GET_LPID);
 
     if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM)
         *lpid_ptr = comm_ptr->dev.vcrt->vcr_table[idx]->lpid;
@@ -313,7 +313,7 @@ int MPID_Comm_get_lpid(MPIR_Comm *comm_ptr, int idx, int * lpid_ptr, MPL_bool is
     else
         *lpid_ptr = comm_ptr->dev.local_vcrt->vcr_table[idx]->lpid;
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_VCR_GET_LPID);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_VCR_GET_LPID);
     return MPI_SUCCESS;
 }
 
@@ -335,11 +335,11 @@ int MPID_GPID_GetAllInComm( MPIR_Comm *comm_ptr, int local_size,
     int *gpid = (int*)&local_gpids[0];
     int lastPGID = -1, pgid;
     MPIDI_VCR vc;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_GPID_GETALLINCOMM);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_GPID_GETALLINCOMM);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_GPID_GETALLINCOMM);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_GPID_GETALLINCOMM);
 
-    MPIU_Assert(comm_ptr->local_size == local_size);
+    MPIR_Assert(comm_ptr->local_size == local_size);
     
     *singlePG = 1;
     for (i=0; i<comm_ptr->local_size; i++) {
@@ -361,7 +361,7 @@ int MPID_GPID_GetAllInComm( MPIR_Comm *comm_ptr, int local_size,
                          pgid, vc->pg_rank));
     }
     
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_GPID_GETALLINCOMM);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_GPID_GETALLINCOMM);
     return mpi_errno;
 }
 
@@ -621,7 +621,7 @@ int MPIDI_VC_Init( MPIDI_VC_t *vc, MPIDI_PG_t *pg, int rank )
 {
     vc->state = MPIDI_VC_STATE_INACTIVE;
     vc->handle  = HANDLE_SET_MPI_KIND(0, MPIR_VCONN);
-    MPIU_Object_set_ref(vc, 0);
+    MPIR_Object_set_ref(vc, 0);
     vc->pg      = pg;
     vc->pg_rank = rank;
     vc->lpid    = lpid_counter++;
@@ -639,11 +639,11 @@ int MPIDI_VC_Init( MPIDI_VC_t *vc, MPIDI_PG_t *pg, int rank )
 #endif
     /* FIXME: We need a better abstraction for initializing the thread state 
        for an object */
-#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT
+#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ
     {
         int err;
         MPID_Thread_mutex_create(&vc->pobj_mutex,&err);
-        MPIU_Assert(err == 0);
+        MPIR_Assert(err == 0);
     }
 #endif /* MPICH_THREAD_GRANULARITY */
     MPIDI_CH3_VC_Init(vc);
@@ -679,7 +679,7 @@ int MPID_Get_max_node_id(MPIR_Comm *comm, MPID_Node_id_t *max_id_p)
 {
     /* easiest way to implement this is to track it at PG create/destroy time */
     *max_id_p = g_max_node_id;
-    MPIU_Assert(*max_id_p >= 0);
+    MPIR_Assert(*max_id_p >= 0);
     return MPI_SUCCESS;
 }
 
@@ -693,18 +693,18 @@ static int publish_node_id(MPIDI_PG_t *pg, int our_pg_rank)
     char *key;
     int key_max_sz;
     char *kvs_name;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     /* set MPIU_hostname */
     ret = gethostname(MPIU_hostname, MAX_HOSTNAME_LEN);
-    MPIR_ERR_CHKANDJUMP2(ret == -1, mpi_errno, MPI_ERR_OTHER, "**sock_gethost", "**sock_gethost %s %d", MPIU_Strerror(errno), errno);
+    MPIR_ERR_CHKANDJUMP2(ret == -1, mpi_errno, MPI_ERR_OTHER, "**sock_gethost", "**sock_gethost %s %d", MPIR_Strerror(errno), errno);
     MPIU_hostname[MAX_HOSTNAME_LEN-1] = '\0';
 
     /* Allocate space for pmi key */
     pmi_errno = PMI_KVS_Get_key_length_max(&key_max_sz);
     MPIR_ERR_CHKANDJUMP1(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %d", pmi_errno);
 
-    MPIU_CHKLMEM_MALLOC(key, char *, key_max_sz, mpi_errno, "key");
+    MPIR_CHKLMEM_MALLOC(key, char *, key_max_sz, mpi_errno, "key");
 
     mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -726,7 +726,7 @@ static int publish_node_id(MPIDI_PG_t *pg, int our_pg_rank)
     }
     
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -771,10 +771,10 @@ static int parse_mapping(char *map_str, mapping_type_t *type, map_block_t **map,
     char *c = map_str, *d;
     int num_blocks = 0;
     int i;
-    MPIU_CHKPMEM_DECL(1);
-    MPIDI_STATE_DECL(MPID_STATE_PARSE_MAPPING);
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_PARSE_MAPPING);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_PARSE_MAPPING);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_PARSE_MAPPING);
 
     /* parse string of the form:
        '(' <format> ',' '(' <num> ',' <num> ',' <num> ')' {',' '(' <num> ',' <num> ',' <num> ')'} ')'
@@ -816,7 +816,7 @@ static int parse_mapping(char *map_str, mapping_type_t *type, map_block_t **map,
         ++d;
     }
 
-    MPIU_CHKPMEM_MALLOC(*map, map_block_t *, sizeof(map_block_t) * num_blocks, mpi_errno, "map");
+    MPIR_CHKPMEM_MALLOC(*map, map_block_t *, sizeof(map_block_t) * num_blocks, mpi_errno, "map");
 
     /* parse block descriptors */
     for (i = 0; i < num_blocks; ++i) {
@@ -853,13 +853,13 @@ static int parse_mapping(char *map_str, mapping_type_t *type, map_block_t **map,
     expect_and_skip_c(c, ')');
 
     *nblocks = num_blocks;
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_PARSE_MAPPING);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_PARSE_MAPPING);
     return mpi_errno;
 fn_fail:
     /* --BEGIN ERROR HANDLING-- */
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     /* --END ERROR HANDLING-- */
 }
@@ -1030,7 +1030,7 @@ int MPIDI_Populate_vc_node_ids(MPIDI_PG_t *pg, int our_pg_rank)
     int odd_even_cliques = 0;
     int pmi_version = MPIDI_CH3I_DEFAULT_PMI_VERSION;
     int pmi_subversion = MPIDI_CH3I_DEFAULT_PMI_SUBVERSION;
-    MPIU_CHKLMEM_DECL(4);
+    MPIR_CHKLMEM_DECL(4);
 
     /* See if the user wants to override our default values */
     MPL_env2int("PMI_VERSION", &pmi_version);
@@ -1089,17 +1089,17 @@ int MPIDI_Populate_vc_node_ids(MPIDI_PG_t *pg, int our_pg_rank)
     if (our_pg_rank == -1) {
         /* FIXME this routine can't handle the dynamic process case at this
            time.  This will require more support from the process manager. */
-        MPIU_Assert(0);
+        MPIR_Assert(0);
     }
 
     /* Allocate space for pmi key and value */
     pmi_errno = PMI_KVS_Get_key_length_max(&key_max_sz);
     MPIR_ERR_CHKANDJUMP1(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %d", pmi_errno);
-    MPIU_CHKLMEM_MALLOC(key, char *, key_max_sz, mpi_errno, "key");
+    MPIR_CHKLMEM_MALLOC(key, char *, key_max_sz, mpi_errno, "key");
 
     pmi_errno = PMI_KVS_Get_value_length_max(&val_max_sz);
     MPIR_ERR_CHKANDJUMP1(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %d", pmi_errno);
-    MPIU_CHKLMEM_MALLOC(value, char *, val_max_sz, mpi_errno, "value");
+    MPIR_CHKLMEM_MALLOC(value, char *, val_max_sz, mpi_errno, "value");
 
     mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
@@ -1131,8 +1131,8 @@ int MPIDI_Populate_vc_node_ids(MPIDI_PG_t *pg, int our_pg_rank)
 
     /* Allocate temporary structures.  These would need to be persistent if
        we somehow were able to support dynamic processes via this method. */
-    MPIU_CHKLMEM_MALLOC(node_names, char **, pg->size * sizeof(char*), mpi_errno, "node_names");
-    MPIU_CHKLMEM_MALLOC(node_name_buf, char *, pg->size * key_max_sz * sizeof(char), mpi_errno, "node_name_buf");
+    MPIR_CHKLMEM_MALLOC(node_names, char **, pg->size * sizeof(char*), mpi_errno, "node_names");
+    MPIR_CHKLMEM_MALLOC(node_name_buf, char *, pg->size * key_max_sz * sizeof(char), mpi_errno, "node_name_buf");
 
     /* Gather hostnames */
     for (i = 0; i < pg->size; ++i)
@@ -1145,7 +1145,7 @@ int MPIDI_Populate_vc_node_ids(MPIDI_PG_t *pg, int our_pg_rank)
 
     for (i = 0; i < pg->size; ++i)
     {
-        MPIU_Assert(g_max_node_id < pg->size);
+        MPIR_Assert(g_max_node_id < pg->size);
         if (i == our_pg_rank)
         {
             /* This is us, no need to perform a get */
@@ -1188,7 +1188,7 @@ odd_even_cliques:
 #endif
 
 fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
 fn_fail:
     goto fn_exit;
diff --git a/src/mpid/ch3/src/mpidi_pg.c b/src/mpid/ch3/src/mpidi_pg.c
index ac619a6..dba77eb 100644
--- a/src/mpid/ch3/src/mpidi_pg.c
+++ b/src/mpid/ch3/src/mpidi_pg.c
@@ -82,9 +82,9 @@ int MPIDI_PG_Finalize(void)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIDI_PG_t *pg, *pgNext;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PG_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PG_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_FINALIZE);
 
     /* Print the state of the process groups */
     if (verbose) {
@@ -118,11 +118,11 @@ int MPIDI_PG_Finalize(void)
 	   fails to use MPI_Comm_disconnect on communicators that
 	   were created with the dynamic process routines.*/
         /* XXX DJG FIXME-MT should we be checking this? */
-	if (MPIU_Object_get_ref(pg) == 0 || 1) {
+	if (MPIR_Object_get_ref(pg) == 0 || 1) {
 	    if (pg == MPIDI_Process.my_pg)
 		MPIDI_Process.my_pg = NULL;
 
-	    MPIU_Object_set_ref(pg, 0); /* satisfy assertions in PG_Destroy */
+	    MPIR_Object_set_ref(pg, 0); /* satisfy assertions in PG_Destroy */
 	    MPIDI_PG_Destroy( pg );
 	}
 	pg     = pgNext;
@@ -155,7 +155,7 @@ int MPIDI_PG_Finalize(void)
     }
 #endif
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PG_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_FINALIZE);
     return mpi_errno;
 }
 
@@ -179,13 +179,13 @@ int MPIDI_PG_Create(int vct_sz, void * pg_id, MPIDI_PG_t ** pg_ptr)
     MPIDI_PG_t * pg = NULL, *pgnext;
     int p;
     int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKPMEM_DECL(2);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PG_CREATE);
+    MPIR_CHKPMEM_DECL(2);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_CREATE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PG_CREATE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_CREATE);
     
-    MPIU_CHKPMEM_MALLOC(pg,MPIDI_PG_t*,sizeof(MPIDI_PG_t),mpi_errno,"pg");
-    MPIU_CHKPMEM_MALLOC(pg->vct,MPIDI_VC_t *,sizeof(MPIDI_VC_t)*vct_sz,
+    MPIR_CHKPMEM_MALLOC(pg,MPIDI_PG_t*,sizeof(MPIDI_PG_t),mpi_errno,"pg");
+    MPIR_CHKPMEM_MALLOC(pg->vct,MPIDI_VC_t *,sizeof(MPIDI_VC_t)*vct_sz,
 			mpi_errno,"pg->vct");
 
     if (verbose) {
@@ -197,7 +197,7 @@ int MPIDI_PG_Create(int vct_sz, void * pg_id, MPIDI_PG_t ** pg_ptr)
     /* The reference count indicates the number of vc's that are or 
        have been in use and not disconnected. It starts at zero,
        except for MPI_COMM_WORLD. */
-    MPIU_Object_set_ref(pg, 0);
+    MPIR_Object_set_ref(pg, 0);
     pg->size = vct_sz;
     pg->id   = pg_id;
     pg->finalize = 0;
@@ -251,11 +251,11 @@ int MPIDI_PG_Create(int vct_sz, void * pg_id, MPIDI_PG_t ** pg_ptr)
     *pg_ptr = pg;
     
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PG_CREATE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_CREATE);
     return mpi_errno;
     
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -269,11 +269,11 @@ int MPIDI_PG_Destroy(MPIDI_PG_t * pg)
     MPIDI_PG_t * pg_cur;
     int i;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PG_DESTROY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_DESTROY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PG_DESTROY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_DESTROY);
 
-    MPIU_Assert(MPIU_Object_get_ref(pg) == 0);
+    MPIR_Assert(MPIR_Object_get_ref(pg) == 0);
 
     pg_prev = NULL;
     pg_cur = MPIDI_PG_list;
@@ -304,7 +304,7 @@ int MPIDI_PG_Destroy(MPIDI_PG_t * pg)
 		      counts IS required and missing one is a bug.)
                    2) There is a real bug lurking out there somewhere and we
                       just haven't hit it in the tests yet.  */
-                /*MPIU_Assert(MPIU_Object_get_ref(pg->vct[i]) == 0);*/
+                /*MPIR_Assert(MPIR_Object_get_ref(pg->vct[i]) == 0);*/
 
                 MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_DISCONNECT, VERBOSE, (MPL_DBG_FDEST, "about to free pg->vct=%p which contains vc=%p", pg->vct, &pg->vct[i]));
 
@@ -347,7 +347,7 @@ int MPIDI_PG_Destroy(MPIDI_PG_t * pg)
 		  "**dev|pg_not_found", "**dev|pg_not_found %p", pg);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PG_DESTROY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_DESTROY);
     return mpi_errno;
   fn_fail:
     goto fn_exit;
@@ -361,9 +361,9 @@ int MPIDI_PG_Find(void * id, MPIDI_PG_t ** pg_ptr)
 {
     MPIDI_PG_t * pg;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PG_FIND);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_FIND);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PG_FIND);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_FIND);
     
     pg = MPIDI_PG_list;
     while (pg != NULL)
@@ -380,7 +380,7 @@ int MPIDI_PG_Find(void * id, MPIDI_PG_t ** pg_ptr)
     *pg_ptr = NULL;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PG_FIND);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_FIND);
     return mpi_errno;
 }
 
@@ -444,9 +444,9 @@ int MPIDI_PG_Get_iterator(MPIDI_PG_iterator *iter)
 int MPIDI_PG_To_string(MPIDI_PG_t *pg_ptr, char **str_ptr, int *lenStr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PG_TO_STRING);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_TO_STRING);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PG_TO_STRING);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_TO_STRING);
 
     /* Replace this with the new string */
     if (pg_ptr->connInfoToString) {
@@ -458,7 +458,7 @@ int MPIDI_PG_To_string(MPIDI_PG_t *pg_ptr, char **str_ptr, int *lenStr)
 
     /*printf( "PgToString: Pg string is %s\n", *str_ptr ); fflush(stdout);*/
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PG_TO_STRING);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_TO_STRING);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -483,9 +483,9 @@ int MPIDI_PG_Create_from_string(const char * str, MPIDI_PG_t ** pg_pptr,
     const char *p;
     int vct_sz;
     MPIDI_PG_t *existing_pg, *pg_ptr=0;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PG_CREATE_FROM_STRING);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_CREATE_FROM_STRING);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PG_CREATE_FROM_STRING);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_CREATE_FROM_STRING);
 
     /*printf( "PgCreateFromString: Creating pg from %s\n", str ); 
       fflush(stdout); */
@@ -522,7 +522,7 @@ int MPIDI_PG_Create_from_string(const char * str, MPIDI_PG_t ** pg_pptr,
     (*pg_ptr->connInfoFromString)( str, pg_ptr );
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PG_CREATE_FROM_STRING);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_CREATE_FROM_STRING);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -591,9 +591,9 @@ int MPIDI_PG_SetConnInfo( int rank, const char *connString )
     int mpi_errno = MPI_SUCCESS;
     int len;
     char key[PMI2_MAX_KEYLEN];
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PG_SetConnInfo);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_SetConnInfo);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PG_SetConnInfo);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_SetConnInfo);
 
     len = MPL_snprintf(key, sizeof(key), "P%d-businesscard", rank);
     MPIR_ERR_CHKANDJUMP1(len < 0 || len > sizeof(key), mpi_errno, MPI_ERR_OTHER, "**snprintf", "**snprintf %d", len);
@@ -605,7 +605,7 @@ int MPIDI_PG_SetConnInfo( int rank, const char *connString )
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
     
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PG_SetConnInfo);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_SetConnInfo);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -614,11 +614,11 @@ int MPIDI_PG_SetConnInfo( int rank, const char *connString )
     int pmi_errno;
     int len;
     char key[128];
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PG_SetConnInfo);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_SetConnInfo);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PG_SetConnInfo);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_SetConnInfo);
 
-    MPIU_Assert(pg_world->connData);
+    MPIR_Assert(pg_world->connData);
     
     len = MPL_snprintf(key, sizeof(key), "P%d-businesscard", rank);
     if (len < 0 || len > sizeof(key)) {
@@ -642,7 +642,7 @@ int MPIDI_PG_SetConnInfo( int rank, const char *connString )
 			     "**pmi_barrier %d", pmi_errno);
     }
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PG_SetConnInfo);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_SetConnInfo);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -795,7 +795,7 @@ static int connToStringKVS( char **buf_p, int *slen, MPIDI_PG_t *pg )
 	}
     }
 
-    MPIU_Assert(len <= curSlen);
+    MPIR_Assert(len <= curSlen);
 
     *buf_p = string;
     *slen  = len;
@@ -918,11 +918,11 @@ static int connToString( char **buf_p, int *slen, MPIDI_PG_t *pg )
     int mpi_errno = MPI_SUCCESS;
     char *str = NULL, *pg_id;
     int  i, len=0;
-    MPIU_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_DECL(1);
     MPIDI_ConnInfo *connInfo = (MPIDI_ConnInfo *)pg->connData;
 
     /* Create this from the string array */
-    MPIU_CHKPMEM_MALLOC(str, char *, connInfo->toStringLen, mpi_errno, "str");
+    MPIR_CHKPMEM_MALLOC(str, char *, connInfo->toStringLen, mpi_errno, "str");
 
 #if defined(MPICH_DEBUG_MEMINIT)
     memset(str, 0, connInfo->toStringLen);
@@ -937,7 +937,7 @@ static int connToString( char **buf_p, int *slen, MPIDI_PG_t *pg )
     /* XXX DJG TODO figure out what this little bit is all about. */
     if (strstr( pg_id, "singinit_kvs" ) == pg_id) {
 #ifdef USE_PMI2_API
-        MPIU_Assertp(0); /* don't know what to do here for pmi2 yet.  DARIUS */
+        MPIR_Assertp(0); /* don't know what to do here for pmi2 yet.  DARIUS */
 #else
 	PMI_KVS_Get_my_name( pg->id, 256 );
 #endif
@@ -967,10 +967,10 @@ static int connToString( char **buf_p, int *slen, MPIDI_PG_t *pg )
     *slen = len;
 
 fn_exit:
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
     return mpi_errno;
 fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
     
 }
@@ -1126,9 +1126,9 @@ int MPIDI_PG_GetConnString( MPIDI_PG_t *pg, int rank, char *val, int vallen )
 int MPIDI_PG_Dup_vcr( MPIDI_PG_t *pg, int rank, MPIDI_VC_t **vc_p )
 {
     MPIDI_VC_t *vc;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PG_DUP_VCR);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_DUP_VCR);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PG_DUP_VCR);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_DUP_VCR);
 
     vc = &pg->vct[rank];
     /* Increase the reference count of the vc.  If the reference count 
@@ -1136,14 +1136,14 @@ int MPIDI_PG_Dup_vcr( MPIDI_PG_t *pg, int rank, MPIDI_VC_t **vc_p )
        process group *and* the reference count of the vc (this
        allows us to distinquish between Comm_free and Comm_disconnect) */
     /* FIXME-MT: This should be a fetch and increment for thread-safety */
-    if (MPIU_Object_get_ref(vc) == 0) {
+    if (MPIR_Object_get_ref(vc) == 0) {
 	MPIDI_PG_add_ref(pg);
 	MPIDI_VC_add_ref(vc);
     }
     MPIDI_VC_add_ref(vc);
     *vc_p = vc;
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PG_DUP_VCR);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_DUP_VCR);
     return MPI_SUCCESS;
 }
 
@@ -1164,9 +1164,9 @@ int MPIDI_PG_Close_VCs( void )
 {
     MPIDI_PG_t * pg = MPIDI_PG_list;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PG_CLOSE_VCS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_CLOSE_VCS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PG_CLOSE_VCS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_CLOSE_VCS);
 
     while (pg) {
 	int i, inuse, n, i_start;
@@ -1187,7 +1187,7 @@ int MPIDI_PG_Close_VCs( void )
 	    /* If the VC is myself then skip the close message */
 	    if (pg == MPIDI_Process.my_pg && i == MPIDI_Process.my_pg_rank) {
                 /* XXX DJG FIXME-MT should we be checking this? */
-                if (MPIU_Object_get_ref(vc) != 0) {
+                if (MPIR_Object_get_ref(vc) != 0) {
                     MPIDI_PG_release_ref(pg, &inuse);
                 }
 		continue;
@@ -1200,7 +1200,7 @@ int MPIDI_PG_Close_VCs( void )
 	    } else if (vc->state == MPIDI_VC_STATE_INACTIVE ||
                        vc->state == MPIDI_VC_STATE_MORIBUND) {
                 /* XXX DJG FIXME-MT should we be checking this? */
-                if (MPIU_Object_get_ref(vc) != 0) {
+                if (MPIR_Object_get_ref(vc) != 0) {
 		    /* FIXME: If the reference count for the vc is not 0,
 		       something is wrong */
                     MPIDI_PG_release_ref(pg, &inuse);
@@ -1226,7 +1226,7 @@ int MPIDI_PG_Close_VCs( void )
        handles any close requests that this code generates) */
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PG_CLOSE_VCS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_CLOSE_VCS);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -1247,10 +1247,10 @@ int MPIU_PG_Printall( FILE *fp )
     while (pg) {
         /* XXX DJG FIXME-MT should we be checking this? */
 	fprintf( fp, "size = %d, refcount = %d, id = %s\n", 
-		 pg->size, MPIU_Object_get_ref(pg), (char *)pg->id );
+		 pg->size, MPIR_Object_get_ref(pg), (char *)pg->id );
 	for (i=0; i<pg->size; i++) {
 	    fprintf( fp, "\tVCT rank = %d, refcount = %d, lpid = %d, state = %d \n", 
-		     pg->vct[i].pg_rank, MPIU_Object_get_ref(&pg->vct[i]),
+		     pg->vct[i].pg_rank, MPIR_Object_get_ref(&pg->vct[i]),
 		     pg->vct[i].lpid, (int)pg->vct[i].state );
 	}
 	fflush(fp);
diff --git a/src/mpid/ch3/src/mpidi_rma.c b/src/mpid/ch3/src/mpidi_rma.c
index 3682605..cb98e8e 100644
--- a/src/mpid/ch3/src/mpidi_rma.c
+++ b/src/mpid/ch3/src/mpidi_rma.c
@@ -90,13 +90,13 @@ int MPIDI_RMA_init(void)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPIU_CHKPMEM_DECL(3);
+    MPIR_CHKPMEM_DECL(3);
 
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_RMA_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_RMA_INIT);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_RMA_INIT);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_RMA_INIT);
 
-    MPIU_CHKPMEM_MALLOC(global_rma_op_pool_start, MPIDI_RMA_Op_t *,
+    MPIR_CHKPMEM_MALLOC(global_rma_op_pool_start, MPIDI_RMA_Op_t *,
                         sizeof(MPIDI_RMA_Op_t) * MPIR_CVAR_CH3_RMA_OP_GLOBAL_POOL_SIZE,
                         mpi_errno, "RMA op pool");
     for (i = 0; i < MPIR_CVAR_CH3_RMA_OP_GLOBAL_POOL_SIZE; i++) {
@@ -104,7 +104,7 @@ int MPIDI_RMA_init(void)
         MPL_DL_APPEND(global_rma_op_pool_head, &(global_rma_op_pool_start[i]));
     }
 
-    MPIU_CHKPMEM_MALLOC(global_rma_target_pool_start, MPIDI_RMA_Target_t *,
+    MPIR_CHKPMEM_MALLOC(global_rma_target_pool_start, MPIDI_RMA_Target_t *,
                         sizeof(MPIDI_RMA_Target_t) * MPIR_CVAR_CH3_RMA_TARGET_GLOBAL_POOL_SIZE,
                         mpi_errno, "RMA target pool");
     for (i = 0; i < MPIR_CVAR_CH3_RMA_TARGET_GLOBAL_POOL_SIZE; i++) {
@@ -113,11 +113,11 @@ int MPIDI_RMA_init(void)
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_RMA_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_RMA_INIT);
     return mpi_errno;
 
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_fail;
 }
 
@@ -128,14 +128,14 @@ int MPIDI_RMA_init(void)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 void MPIDI_RMA_finalize(void)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_RMA_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_RMA_FINALIZE);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_RMA_FINALIZE);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_RMA_FINALIZE);
 
     MPL_free(global_rma_op_pool_start);
     MPL_free(global_rma_target_pool_start);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_RMA_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_RMA_FINALIZE);
 }
 
 
@@ -149,9 +149,9 @@ int MPID_Win_free(MPIR_Win ** win_ptr)
     int in_use;
     MPIR_Comm *comm_ptr;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_FREE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_WIN_FREE);
 
-    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_FREE);
+    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPID_WIN_FREE);
 
     MPIR_ERR_CHKANDJUMP(((*win_ptr)->states.access_state != MPIDI_RMA_NONE &&
                          (*win_ptr)->states.access_state != MPIDI_RMA_FENCE_ISSUED &&
@@ -190,7 +190,7 @@ int MPID_Win_free(MPIR_Win ** win_ptr)
     }
 
     /* dequeue window from the global list */
-    MPIU_Assert((*win_ptr)->active == FALSE);
+    MPIR_Assert((*win_ptr)->active == FALSE);
     MPL_DL_DELETE(MPIDI_RMA_Win_inactive_list_head, (*win_ptr));
 
     if (MPIDI_RMA_Win_inactive_list_head == NULL && MPIDI_RMA_Win_active_list_head == NULL) {
@@ -213,7 +213,7 @@ int MPID_Win_free(MPIR_Win ** win_ptr)
     MPL_free((*win_ptr)->slots);
     MPL_free((*win_ptr)->target_lock_entry_pool_start);
 
-    MPIU_Assert((*win_ptr)->current_target_lock_data_bytes == 0);
+    MPIR_Assert((*win_ptr)->current_target_lock_data_bytes == 0);
 
     /* Free the attached buffer for windows created with MPI_Win_allocate() */
     if ((*win_ptr)->create_flavor == MPI_WIN_FLAVOR_ALLOCATE ||
@@ -223,13 +223,13 @@ int MPID_Win_free(MPIR_Win ** win_ptr)
         }
     }
 
-    MPIU_Object_release_ref(*win_ptr, &in_use);
+    MPIR_Object_release_ref(*win_ptr, &in_use);
     /* MPI windows don't have reference count semantics, so this should always be true */
-    MPIU_Assert(!in_use);
-    MPIU_Handle_obj_free(&MPIR_Win_mem, *win_ptr);
+    MPIR_Assert(!in_use);
+    MPIR_Handle_obj_free(&MPIR_Win_mem, *win_ptr);
 
   fn_exit:
-    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPID_WIN_FREE);
+    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPID_WIN_FREE);
     return mpi_errno;
 
   fn_fail:
diff --git a/src/mpid/ch3/util/ftb/ftb.c b/src/mpid/ch3/util/ftb/ftb.c
index b7746a0..f17e13f 100644
--- a/src/mpid/ch3/util/ftb/ftb.c
+++ b/src/mpid/ch3/util/ftb/ftb.c
@@ -23,7 +23,7 @@ static FTB_event_info_t event_info[] = {
 };
 
 #ifdef DEBUG_MPIDU_FTB
-#define CHECK_FTB_ERROR(x) do { MPIU_Assertp(x); } while(0)
+#define CHECK_FTB_ERROR(x) do { MPIR_Assertp(x); } while(0)
 #else
 #define CHECK_FTB_ERROR(x) (void)x
 #endif
@@ -37,9 +37,9 @@ int MPIDU_Ftb_init(void)
     int mpi_errno = MPI_SUCCESS;
     int ret;
     FTB_client_t ci;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_FTB_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_FTB_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_FTB_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_FTB_INIT);
 
     MPL_strncpy(ci.event_space, "ftb.mpi.mpich", sizeof(ci.event_space));
     MPL_strncpy(ci.client_name, "mpich " MPICH_VERSION, sizeof(ci.client_name));
@@ -61,7 +61,7 @@ int MPIDU_Ftb_init(void)
     MPIR_ERR_CHKANDJUMP(ret, mpi_errno, MPI_ERR_OTHER, "**ftb_declare_publishable_events");
 
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_FTB_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_FTB_INIT);
     return mpi_errno;
 fn_fail:
     goto fn_exit;
@@ -80,16 +80,16 @@ void MPIDU_Ftb_publish(const char *event_name, const char *event_payload)
 {
     FTB_event_properties_t event_prop;
     FTB_event_handle_t event_handle;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_FTB_PUBLISH);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_FTB_PUBLISH);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_FTB_PUBLISH);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_FTB_PUBLISH);
 
     event_prop.event_type = 1;
     MPL_strncpy(event_prop.event_payload, event_payload, sizeof(event_prop.event_payload));
     
     CHECK_FTB_ERROR(FTB_Publish(client_handle, event_name, &event_prop, &event_handle));
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_FTB_PUBLISH);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_FTB_PUBLISH);
     return;
 }
 
@@ -132,13 +132,13 @@ void MPIDU_Ftb_publish_me(const char *event_name)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 void MPIDU_Ftb_finalize(void)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_FTB_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_FTB_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_FTB_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_FTB_FINALIZE);
 
     CHECK_FTB_ERROR(FTB_Disconnect(client_handle));
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_FTB_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_FTB_FINALIZE);
     return;
 }
 
diff --git a/src/mpid/ch3/util/sock/ch3u_connect_sock.c b/src/mpid/ch3/util/sock/ch3u_connect_sock.c
index 602bc81..8feaf3b 100644
--- a/src/mpid/ch3/util/sock/ch3u_connect_sock.c
+++ b/src/mpid/ch3/util/sock/ch3u_connect_sock.c
@@ -165,12 +165,12 @@ int MPIDI_CH3I_Connection_alloc(MPIDI_CH3I_Connection_t ** connp)
     MPIDI_CH3I_Connection_t * conn = NULL;
     int id_sz;
     int pmi_errno;
-    MPIU_CHKPMEM_DECL(2);
-    MPIDI_STATE_DECL(MPID_STATE_CONNECTION_ALLOC);
+    MPIR_CHKPMEM_DECL(2);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONNECTION_ALLOC);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CONNECTION_ALLOC);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONNECTION_ALLOC);
 
-    MPIU_CHKPMEM_MALLOC(conn,MPIDI_CH3I_Connection_t*,
+    MPIR_CHKPMEM_MALLOC(conn,MPIDI_CH3I_Connection_t*,
 			sizeof(MPIDI_CH3I_Connection_t),mpi_errno,"conn");
 
     /* FIXME: This size is unchanging, so get it only once (at most); 
@@ -184,16 +184,16 @@ int MPIDI_CH3I_Connection_alloc(MPIDI_CH3I_Connection_t ** connp)
 			     "**pmi_get_id_length_max",
 			     "**pmi_get_id_length_max %d", pmi_errno);
 #endif
-    MPIU_CHKPMEM_MALLOC(conn->pg_id,char*,id_sz + 1,mpi_errno,"conn->pg_id");
+    MPIR_CHKPMEM_MALLOC(conn->pg_id,char*,id_sz + 1,mpi_errno,"conn->pg_id");
     conn->pg_id[0] = 0;           /* Be careful about pg_id in case a later 
 				     error */
     *connp = conn;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_CONNECTION_ALLOC);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONNECTION_ALLOC);
     return mpi_errno;
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -211,19 +211,19 @@ int MPIDI_CH3I_Connect_to_root_sock(const char * port_name,
     int mpi_errno = MPI_SUCCESS;
     MPIDI_VC_t * vc;
     MPIDI_CH3I_VC *vcch;
-    MPIU_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_DECL(1);
     char host_description[MAX_HOST_DESCRIPTION_LEN];
     int port, port_name_tag;
     MPIDU_Sock_ifaddr_t ifaddr;
     int hasIfaddr = 0;
     MPIDI_CH3I_Connection_t * conn;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_CONNECT_TO_ROOT_SOCK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_CONNECT_TO_ROOT_SOCK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_CONNECT_TO_ROOT_SOCK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_CONNECT_TO_ROOT_SOCK);
 
     /* First, create a new vc (we may use this to pass to a generic
        connection routine) */
-    MPIU_CHKPMEM_MALLOC(vc,MPIDI_VC_t *,sizeof(MPIDI_VC_t),mpi_errno,"vc");
+    MPIR_CHKPMEM_MALLOC(vc,MPIDI_VC_t *,sizeof(MPIDI_VC_t),mpi_errno,"vc");
     /* FIXME - where does this vc get freed? */
 
     *new_vc = vc;
@@ -311,10 +311,10 @@ int MPIDI_CH3I_Connect_to_root_sock(const char * port_name,
     /* --END ERROR HANDLING-- */
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_CONNECT_TO_ROOT_SOCK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_CONNECT_TO_ROOT_SOCK);
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -342,9 +342,9 @@ int MPIDU_Sock_get_conninfo_from_bc( const char *bc,
 #if !defined(HAVE_WINDOWS_H) && defined(HAVE_INET_PTON)
     char ifname[256];
 #endif
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_CONNINFO_FROM_BC);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_CONNINFO_FROM_BC);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_GET_CONNINFO_FROM_BC);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_GET_CONNINFO_FROM_BC);
 
     str_errno = MPL_str_get_string_arg(bc, MPIDI_CH3I_HOST_DESCRIPTION_KEY,
 				 host_description, maxlen);
@@ -406,7 +406,7 @@ int MPIDU_Sock_get_conninfo_from_bc( const char *bc,
 #endif
     
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_GET_CONNINFO_FROM_BC);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_GET_CONNINFO_FROM_BC);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -436,9 +436,9 @@ int MPIDI_CH3U_Get_business_card_sock(int myRank,
     MPIDU_Sock_ifaddr_t ifaddr;
     char ifnamestr[MAX_HOST_DESCRIPTION_LEN];
     char *bc_orig = *bc_val_p;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_GET_BUSINESS_CARD_SOCK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_GET_BUSINESS_CARD_SOCK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_GET_BUSINESS_CARD_SOCK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_GET_BUSINESS_CARD_SOCK);
 
     MPIDU_CH3U_GetSockInterfaceAddr( myRank, ifnamestr, sizeof(ifnamestr), &ifaddr );
 
@@ -517,7 +517,7 @@ int MPIDI_CH3U_Get_business_card_sock(int myRank,
     MPL_DBG_MSG_S(MPIDI_CH3_DBG_CONNECT,TYPICAL,"business card is %s", bc_orig );
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_GET_BUSINESS_CARD_SOCK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_GET_BUSINESS_CARD_SOCK);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -544,9 +544,9 @@ int MPIDI_CH3_Sockconn_handle_accept_event( void )
 {
     int mpi_errno = MPI_SUCCESS;
     MPIDI_CH3I_Connection_t * conn;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_ACCEPT_EVENT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_ACCEPT_EVENT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_ACCEPT_EVENT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_ACCEPT_EVENT);
     
     mpi_errno = MPIDI_CH3I_Connection_alloc(&conn);
     if (mpi_errno != MPI_SUCCESS) {
@@ -570,7 +570,7 @@ int MPIDI_CH3_Sockconn_handle_accept_event( void )
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_ACCEPT_EVENT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_ACCEPT_EVENT);
 
     return mpi_errno;
  fn_fail:
@@ -585,9 +585,9 @@ int MPIDI_CH3_Sockconn_handle_connect_event( MPIDI_CH3I_Connection_t *conn,
 					     int event_error )
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNECT_EVENT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNECT_EVENT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNECT_EVENT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNECT_EVENT);
     
     /* --BEGIN ERROR HANDLING-- */
     if (event_error != MPI_SUCCESS) {
@@ -618,7 +618,7 @@ int MPIDI_CH3_Sockconn_handle_connect_event( MPIDI_CH3I_Connection_t *conn,
 	MPIDI_CH3I_Pkt_sc_conn_accept_t *acceptpkt = 
 	    (MPIDI_CH3I_Pkt_sc_conn_accept_t *)&conn->pkt.type;
 
-	MPIU_Assert(conn->state == CONN_STATE_CONNECT_ACCEPT);
+	MPIR_Assert(conn->state == CONN_STATE_CONNECT_ACCEPT);
 	MPL_DBG_CONNSTATECHANGE(conn->vc,conn,CONN_STATE_OPEN_CSEND);
 	conn->state = CONN_STATE_OPEN_CSEND;
 	
@@ -637,7 +637,7 @@ int MPIDI_CH3_Sockconn_handle_connect_event( MPIDI_CH3I_Connection_t *conn,
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNECT_EVENT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNECT_EVENT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -650,16 +650,16 @@ int MPIDI_CH3_Sockconn_handle_connect_event( MPIDI_CH3I_Connection_t *conn,
 int MPIDI_CH3_Sockconn_handle_close_event( MPIDI_CH3I_Connection_t * conn )
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CLOSE_EVENT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CLOSE_EVENT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CLOSE_EVENT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CLOSE_EVENT);
 
     /* If the conn pointer is NULL then the close was intentional */
     /* FIXME: What does the above comment mean? */
     if (conn != NULL) {
 	if (conn->state == CONN_STATE_CLOSING) {
-	    MPIU_Assert(conn->send_active == NULL);
-	    MPIU_Assert(conn->recv_active == NULL);
+	    MPIR_Assert(conn->send_active == NULL);
+	    MPIR_Assert(conn->recv_active == NULL);
 	    if (conn->vc != NULL) {
 		MPIDI_CH3I_VC *vcch = &conn->vc->ch;
 
@@ -701,7 +701,7 @@ int MPIDI_CH3_Sockconn_handle_close_event( MPIDI_CH3I_Connection_t * conn )
             goto fn_exit;
         }
 	else {
-	    MPIU_Assert(conn->state == CONN_STATE_LISTENING);
+	    MPIR_Assert(conn->state == CONN_STATE_LISTENING);
 	    MPIDI_CH3I_listener_conn = NULL;
 	    MPIDI_CH3I_listener_port = 0;
 	    
@@ -711,7 +711,7 @@ int MPIDI_CH3_Sockconn_handle_close_event( MPIDI_CH3I_Connection_t * conn )
 	connection_destroy(conn); 
     }
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CLOSE_EVENT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CLOSE_EVENT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -727,9 +727,9 @@ int MPIDI_CH3_Sockconn_handle_close_event( MPIDI_CH3I_Connection_t * conn )
 int MPIDI_CH3_Sockconn_handle_conn_event( MPIDI_CH3I_Connection_t * conn )
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONN_EVENT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONN_EVENT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONN_EVENT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONN_EVENT);
 
     /* FIXME: Is there an assumption about conn->state? */
 
@@ -738,7 +738,7 @@ int MPIDI_CH3_Sockconn_handle_conn_event( MPIDI_CH3I_Connection_t * conn )
 	    (MPIDI_CH3I_Pkt_sc_open_req_t *)&conn->pkt.type;
 	/* Answer to fixme: it appears from the control flow that this is
 	   the required state) */
-	MPIU_Assert( conn->state == CONN_STATE_OPEN_LRECV_PKT);
+	MPIR_Assert( conn->state == CONN_STATE_OPEN_LRECV_PKT);
 	MPL_DBG_CONNSTATECHANGE(conn->vc,conn,CONN_STATE_OPEN_LRECV_DATA);
 	conn->state = CONN_STATE_OPEN_LRECV_DATA;
 	mpi_errno = MPIDU_Sock_post_read(conn->sock, conn->pg_id, 
@@ -799,13 +799,13 @@ int MPIDI_CH3_Sockconn_handle_conn_event( MPIDI_CH3I_Connection_t * conn )
 	/* FIXME: is this the correct assert? */
 
 	if (openpkt->ack && conn->state != CONN_STATE_DISCARD) {
-	    MPIU_Assert( conn->state == CONN_STATE_OPEN_CRECV );
+	    MPIR_Assert( conn->state == CONN_STATE_OPEN_CRECV );
 	    MPIDI_CH3I_VC *vcch = &conn->vc->ch;
 	    MPL_DBG_CONNSTATECHANGE(conn->vc,conn,CONN_STATE_CONNECTED);
 	    conn->state = CONN_STATE_CONNECTED;
 	    vcch->state = MPIDI_CH3I_VC_STATE_CONNECTED;
-	    MPIU_Assert(vcch->conn == conn);
-	    MPIU_Assert(vcch->sock == conn->sock);
+	    MPIR_Assert(vcch->conn == conn);
+	    MPIR_Assert(vcch->sock == conn->sock);
 	    
 	    mpi_errno = connection_post_recv_pkt(conn);
 	    if (mpi_errno != MPI_SUCCESS) {
@@ -830,7 +830,7 @@ int MPIDI_CH3_Sockconn_handle_conn_event( MPIDI_CH3I_Connection_t * conn )
                a head-to-head connection. The VC is still in use, but by
                another sochekt connection. The refcount is not incremented
                By chaning the assosiated connection. */
-	    /* MPIU_Assert( conn->vc->ch.conn != conn ); */
+	    /* MPIR_Assert( conn->vc->ch.conn != conn ); */
 	    /* Set the candidate vc for this connection to NULL (we
 	       are discarding this connection because (I think) we
 	       are performing a head-to-head connection, and this
@@ -865,7 +865,7 @@ int MPIDI_CH3_Sockconn_handle_conn_event( MPIDI_CH3I_Connection_t * conn )
 
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONN_EVENT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONN_EVENT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -887,9 +887,9 @@ int MPIDI_CH3_Sockconn_handle_connopen_event( MPIDI_CH3I_Connection_t * conn )
 	(MPIDI_CH3I_Pkt_sc_open_req_t *)&conn->pkt.type;
     MPIDI_CH3I_Pkt_sc_open_resp_t *openresp = 
 	(MPIDI_CH3I_Pkt_sc_open_resp_t *)&conn->pkt.type;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNOPEN_EVENT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNOPEN_EVENT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNOPEN_EVENT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNOPEN_EVENT);
 
     /* Look up pg based on conn->pg_id */
     mpi_errno = MPIDI_PG_Find(conn->pg_id, &pg);
@@ -902,7 +902,7 @@ int MPIDI_CH3_Sockconn_handle_connopen_event( MPIDI_CH3I_Connection_t * conn )
     /* We require that the packet be the open_req type */
     pg_rank = openpkt->pg_rank;
     MPIDI_PG_Get_vc_set_active(pg, pg_rank, &vc);
-    MPIU_Assert(vc->pg_rank == pg_rank);
+    MPIR_Assert(vc->pg_rank == pg_rank);
     
     if(pg->finalize == 1) {
         MPIDI_Pkt_init(openresp, MPIDI_CH3I_PKT_SC_OPEN_RESP);
@@ -999,7 +999,7 @@ int MPIDI_CH3_Sockconn_handle_connopen_event( MPIDI_CH3I_Connection_t * conn )
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNOPEN_EVENT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNOPEN_EVENT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -1013,9 +1013,9 @@ int MPIDI_CH3_Sockconn_handle_connopen_event( MPIDI_CH3I_Connection_t * conn )
 int MPIDI_CH3_Sockconn_handle_connwrite( MPIDI_CH3I_Connection_t * conn )
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNWRITE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNWRITE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNWRITE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNWRITE);
 
     if (conn->state == CONN_STATE_OPEN_CSEND || conn->state == CONN_STATE_DISCARD) {
 	/* finished sending open request packet */
@@ -1073,7 +1073,7 @@ int MPIDI_CH3_Sockconn_handle_connwrite( MPIDI_CH3I_Connection_t * conn )
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNWRITE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_SOCKCONN_HANDLE_CONNWRITE);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -1090,9 +1090,9 @@ int MPIDI_CH3I_VC_post_sockconnect(MPIDI_VC_t * vc)
     int mpi_errno = MPI_SUCCESS;
     char val[MPIDI_MAX_KVS_VALUE_LEN];
     MPIDI_CH3I_VC *vcch = &vc->ch;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_VC_POST_SOCKCONNECT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_VC_POST_SOCKCONNECT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_VC_POST_SOCKCONNECT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_VC_POST_SOCKCONNECT);
 
     /* MPIDI_PG_GetConnString() can block & release the lock for 
      * the current thread. Prevent other threads from trying to
@@ -1137,7 +1137,7 @@ int MPIDI_CH3I_VC_post_sockconnect(MPIDI_VC_t * vc)
     mpi_errno = MPIDI_CH3I_Sock_connect( vc, val, sizeof(val) );
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_VC_POST_SOCKCONNECT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_VC_POST_SOCKCONNECT);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -1163,9 +1163,9 @@ int MPIDI_CH3I_Sock_connect( MPIDI_VC_t *vc, const char val[], int vallen )
     MPIDI_CH3I_Connection_t * conn = 0;
     int mpi_errno = MPI_SUCCESS;
     MPIDI_CH3I_VC *vcch = &vc->ch;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SOCK_CONNECT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SOCK_CONNECT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SOCK_CONNECT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_SOCK_CONNECT);
     
     if(vcch->state == MPIDI_CH3I_VC_STATE_CONNECTING){ 
 	MPL_DBG_MSG_P(MPIDI_CH3_DBG_CONNECT,TYPICAL,"Posting a connect for vc=(%p)", vc);
@@ -1242,7 +1242,7 @@ int MPIDI_CH3I_Sock_connect( MPIDI_VC_t *vc, const char val[], int vallen )
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SOCK_CONNECT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_SOCK_CONNECT);
     return mpi_errno;
  fn_fail:
     /* --BEGIN ERROR HANDLING-- */
@@ -1265,9 +1265,9 @@ int MPIDI_CH3I_Sock_connect( MPIDI_VC_t *vc, const char val[], int vallen )
 static int connection_post_recv_pkt(MPIDI_CH3I_Connection_t * conn)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_CONNECTION_POST_RECV_PKT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONNECTION_POST_RECV_PKT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CONNECTION_POST_RECV_PKT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONNECTION_POST_RECV_PKT);
 
     mpi_errno = MPIDU_Sock_post_read(conn->sock, &conn->pkt, sizeof(conn->pkt),
 				     sizeof(conn->pkt), NULL);
@@ -1276,7 +1276,7 @@ static int connection_post_recv_pkt(MPIDI_CH3I_Connection_t * conn)
     }
 
  fn_fail:    
-    MPIDI_FUNC_EXIT(MPID_STATE_CONNECTION_POST_RECV_PKT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONNECTION_POST_RECV_PKT);
     return mpi_errno;
 }
 
@@ -1288,9 +1288,9 @@ static int connection_post_recv_pkt(MPIDI_CH3I_Connection_t * conn)
 static int connection_post_send_pkt(MPIDI_CH3I_Connection_t * conn)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_CONNECTION_POST_SEND_PKT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONNECTION_POST_SEND_PKT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CONNECTION_POST_SEND_PKT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONNECTION_POST_SEND_PKT);
  
     MPL_DBG_PKT(conn,&conn->pkt,"connect");
     mpi_errno = MPIDU_Sock_post_write(conn->sock, &conn->pkt, sizeof(conn->pkt),
@@ -1300,7 +1300,7 @@ static int connection_post_send_pkt(MPIDI_CH3I_Connection_t * conn)
     }
     
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_CONNECTION_POST_SEND_PKT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONNECTION_POST_SEND_PKT);
     return mpi_errno;
 }
 
@@ -1311,9 +1311,9 @@ static int connection_post_send_pkt(MPIDI_CH3I_Connection_t * conn)
 static int connection_post_send_pkt_and_pgid(MPIDI_CH3I_Connection_t * conn)
 {
     int mpi_errno;
-    MPIDI_STATE_DECL(MPID_STATE_CONNECTION_POST_SEND_PKT_AND_PGID);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONNECTION_POST_SEND_PKT_AND_PGID);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CONNECTION_POST_SEND_PKT_AND_PGID);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONNECTION_POST_SEND_PKT_AND_PGID);
     
     conn->iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST) &conn->pkt;
     conn->iov[0].MPL_IOV_LEN = (int) sizeof(conn->pkt);
@@ -1328,7 +1328,7 @@ static int connection_post_send_pkt_and_pgid(MPIDI_CH3I_Connection_t * conn)
     }
     
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_CONNECTION_POST_SEND_PKT_AND_PGID);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONNECTION_POST_SEND_PKT_AND_PGID);
     return mpi_errno;
 }
 
@@ -1341,9 +1341,9 @@ static int connection_post_sendq_req(MPIDI_CH3I_Connection_t * conn)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIDI_CH3I_VC *vcch = &conn->vc->ch;
-    MPIDI_STATE_DECL(MPID_STATE_CONNECTION_POST_SENDQ_REQ);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONNECTION_POST_SENDQ_REQ);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CONNECTION_POST_SENDQ_REQ);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONNECTION_POST_SENDQ_REQ);
 
     /* post send of next request on the send queue */
     conn->send_active = MPIDI_CH3I_SendQ_head(vcch); /* MT */
@@ -1360,7 +1360,7 @@ static int connection_post_sendq_req(MPIDI_CH3I_Connection_t * conn)
     }
     
  fn_fail:
-    MPIDI_FUNC_EXIT(MPID_STATE_CONNECTION_POST_SENDQ_REQ);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONNECTION_POST_SENDQ_REQ);
     return mpi_errno;
 }
 
@@ -1375,14 +1375,14 @@ static int connection_post_sendq_req(MPIDI_CH3I_Connection_t * conn)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static void connection_destroy(MPIDI_CH3I_Connection_t * conn)
 {
-    MPIDI_STATE_DECL(MPID_STATE_CONNECTION_DESTROY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONNECTION_DESTROY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_CONNECTION_DESTROY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONNECTION_DESTROY);
 
     MPL_free(conn->pg_id);
     MPL_free(conn);
     
-    MPIDI_FUNC_EXIT(MPID_STATE_CONNECTION_DESTROY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONNECTION_DESTROY);
 }
 
 
diff --git a/src/mpid/ch3/util/sock/ch3u_getinterfaces.c b/src/mpid/ch3/util/sock/ch3u_getinterfaces.c
index b2e5c4b..0eb833f 100644
--- a/src/mpid/ch3/util/sock/ch3u_getinterfaces.c
+++ b/src/mpid/ch3/util/sock/ch3u_getinterfaces.c
@@ -139,7 +139,7 @@ int MPIDU_CH3U_GetSockInterfaceAddr( int myRank, char *ifname, int maxIfname,
 		ifaddr->type = -1;
 	    }
 	    else {
-		MPIU_Memcpy( ifaddr->ifaddr, info->h_addr_list[0], ifaddr->len );
+		MPIR_Memcpy( ifaddr->ifaddr, info->h_addr_list[0], ifaddr->len );
 	    }
 	}
     }
@@ -295,14 +295,14 @@ static int MPIDI_CH3U_GetIPInterface( MPIDU_Sock_ifaddr_t *ifaddr, int *found )
 		if (nfound == 0) {
 		    myifaddr.type = AF_INET;
 		    myifaddr.len  = 4;
-		    MPIU_Memcpy( myifaddr.ifaddr, &addr.s_addr, 4 );
+		    MPIR_Memcpy( myifaddr.ifaddr, &addr.s_addr, 4 );
 		}
 	    }
 	    else {
 		nfound++;
 		myifaddr.type = AF_INET;
 		myifaddr.len  = 4;
-		MPIU_Memcpy( myifaddr.ifaddr, &addr.s_addr, 4 );
+		MPIR_Memcpy( myifaddr.ifaddr, &addr.s_addr, 4 );
 	    }
 	}
 	else {
diff --git a/src/mpid/ch3/util/unordered/unordered.c b/src/mpid/ch3/util/unordered/unordered.c
index bb971fb..f656708 100644
--- a/src/mpid/ch3/util/unordered/unordered.c
+++ b/src/mpid/ch3/util/unordered/unordered.c
@@ -29,9 +29,9 @@ int MPIDI_CH3U_Handle_unordered_recv_pkt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t * pkt,
 					 MPIR_Request ** rreqp)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_UNORDERED_RECV_PKT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_UNORDERED_RECV_PKT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_UNORDERED_RECV_PKT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_UNORDERED_RECV_PKT);
 
     /* FIXME: This should probably be *rreqp = NULL? */
     rreqp = NULL;
@@ -157,7 +157,7 @@ int MPIDI_CH3U_Handle_unordered_recv_pkt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t * pkt,
     }
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_UNORDERED_RECV_PKT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_UNORDERED_RECV_PKT);
     return mpi_errno;
 }
 #endif /* defined(MPIDI_CH3_MSGS_UNORDERED) */
diff --git a/src/mpid/common/datatype/dataloop/darray_support.c b/src/mpid/common/datatype/dataloop/darray_support.c
index d422bec..615bc24 100644
--- a/src/mpid/common/datatype/dataloop/darray_support.c
+++ b/src/mpid/common/datatype/dataloop/darray_support.c
@@ -39,7 +39,7 @@ int PREPEND_PREFIX(Type_convert_darray)(int size,
     int procs, tmp_rank, i, tmp_size, blklens[3], *coords;
     MPI_Aint *st_offsets, orig_extent, disps[3];
 
-    MPIR_Type_extent_impl(oldtype, &orig_extent);
+    MPIDU_Datatype_get_extent_macro(oldtype, orig_extent);
 
 /* calculate position in Cartesian grid as MPI would (row-major
    ordering) */
diff --git a/src/mpid/common/datatype/dataloop/subarray_support.c b/src/mpid/common/datatype/dataloop/subarray_support.c
index 1bdb80a..1953926 100644
--- a/src/mpid/common/datatype/dataloop/subarray_support.c
+++ b/src/mpid/common/datatype/dataloop/subarray_support.c
@@ -25,7 +25,7 @@ int PREPEND_PREFIX(Type_convert_subarray)(int ndims,
     int i, blklens[3];
     MPI_Datatype tmp1, tmp2, types[3];
 
-    MPIR_Type_extent_impl(oldtype, &extent);
+    MPIDU_Datatype_get_extent_macro(oldtype, extent);
 
     if (order == MPI_ORDER_FORTRAN) {
 	/* dimension 0 changes fastest */
diff --git a/src/mpid/common/datatype/dataloop/typesize_support.c b/src/mpid/common/datatype/dataloop/typesize_support.c
index 074c6d4..a73260d 100644
--- a/src/mpid/common/datatype/dataloop/typesize_support.c
+++ b/src/mpid/common/datatype/dataloop/typesize_support.c
@@ -168,8 +168,8 @@ void PREPEND_PREFIX(Type_calc_footprint)(MPI_Datatype type,
 	int mpisize;
 	MPI_Aint mpiextent;
 
-	MPIR_Type_size_impl(type, &mpisize);
-	MPIR_Type_extent_impl(type, &mpiextent);
+	MPIDU_Datatype_get_size_macro(type, mpisize);
+	MPIDU_Datatype_get_extent_macro(type, mpiextent);
 	tfp->size    = (DLOOP_Offset) mpisize;
 	tfp->lb      = 0;
 	tfp->ub      = (DLOOP_Offset) mpiextent;
@@ -592,7 +592,7 @@ static int DLOOP_Named_type_alignsize(MPI_Datatype type, MPI_Aint disp)
     if (type == MPI_LB || type == MPI_UB)
 	return 0;
 
-    MPIR_Type_size_impl(type, &alignsize);
+    MPIDU_Datatype_get_size_macro(type, alignsize);
 
     switch(type)
     {
diff --git a/src/mpid/common/datatype/mpidu_dataloop.h b/src/mpid/common/datatype/mpidu_dataloop.h
index 1d68e49..1fa82d2 100644
--- a/src/mpid/common/datatype/mpidu_dataloop.h
+++ b/src/mpid/common/datatype/mpidu_dataloop.h
@@ -74,22 +74,22 @@
     ((HANDLE_GET_KIND(handle_) == HANDLE_KIND_BUILTIN) ? 0 : 1)
 
 #define DLOOP_Ensure_Offset_fits_in_pointer(value_) \
-    MPIU_Ensure_Aint_fits_in_pointer(value_)
+    MPIR_Ensure_Aint_fits_in_pointer(value_)
 
 /* allocate and free functions must also be defined. */
 #define DLOOP_Malloc MPL_malloc
 #define DLOOP_Free   MPL_free
 
 /* assert function */
-#define DLOOP_Assert MPIU_Assert
+#define DLOOP_Assert MPIR_Assert
 
 /* memory copy function */
-#define DLOOP_Memcpy MPIU_Memcpy
+#define DLOOP_Memcpy MPIR_Memcpy
 
 /* casting macros */
-#define DLOOP_OFFSET_CAST_TO_VOID_PTR MPIU_AINT_CAST_TO_VOID_PTR
-#define DLOOP_VOID_PTR_CAST_TO_OFFSET MPIU_VOID_PTR_CAST_TO_MPI_AINT
-#define DLOOP_PTR_DISP_CAST_TO_OFFSET MPIU_PTR_DISP_CAST_TO_MPI_AINT
+#define DLOOP_OFFSET_CAST_TO_VOID_PTR MPIR_AINT_CAST_TO_VOID_PTR
+#define DLOOP_VOID_PTR_CAST_TO_OFFSET MPIR_VOID_PTR_CAST_TO_MPI_AINT
+#define DLOOP_PTR_DISP_CAST_TO_OFFSET MPIR_PTR_DISP_CAST_TO_MPI_AINT
 
 /* printing macros */
 #define DLOOP_OFFSET_FMT_DEC_SPEC MPI_AINT_FMT_DEC_SPEC
diff --git a/src/mpid/common/datatype/mpidu_datatype.h b/src/mpid/common/datatype/mpidu_datatype.h
index 37f524d..73cd35d 100644
--- a/src/mpid/common/datatype/mpidu_datatype.h
+++ b/src/mpid/common/datatype/mpidu_datatype.h
@@ -13,7 +13,7 @@
 /* NOTE: 
  * - struct MPIDU_Dataloop and MPIDU_Segment are defined in 
  *   src/mpid/common/datatype/mpidu_dataloop.h (and gen_dataloop.h).
- * - MPIU_Object_alloc_t is defined in src/include/mpihandle.h
+ * - MPIR_Object_alloc_t is defined in src/include/mpihandle.h
  */
 
 #define MPIDU_Datatype_get_ptr(a,ptr)   MPIR_Getb_ptr(Datatype,a,0x000000ff,ptr)
@@ -22,7 +22,7 @@
 #define MPIDU_Datatype_get_basic_id(a) ((a)&0x000000ff)
 #define MPIDU_Datatype_get_basic_size(a) (((a)&0x0000ff00)>>8)
 
-#define MPIDU_Datatype_add_ref(datatype_ptr) MPIU_Object_add_ref((datatype_ptr))
+#define MPIDU_Datatype_add_ref(datatype_ptr) MPIR_Object_add_ref((datatype_ptr))
 
 #define MPIDU_Datatype_get_basic_type(a,basic_type_) do {                    \
     void *ptr;								\
@@ -33,7 +33,7 @@
             break;							\
         case HANDLE_KIND_INDIRECT:					\
             ptr = ((MPIDU_Datatype *)					\
-		   MPIU_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
+		   MPIR_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
             basic_type_ = ((MPIDU_Datatype *) ptr)->basic_type;			\
             break;							\
         case HANDLE_KIND_BUILTIN:					\
@@ -60,7 +60,7 @@
 #define MPIDU_Datatype_release(datatype_ptr) do {                            \
     int inuse_;								    \
 									    \
-    MPIU_Object_release_ref((datatype_ptr),&inuse_);			    \
+    MPIR_Object_release_ref((datatype_ptr),&inuse_);			    \
     if (!inuse_) {							    \
         int lmpi_errno = MPI_SUCCESS;					    \
 	if (MPIR_Process.attr_free && datatype_ptr->attributes) {	    \
@@ -71,7 +71,7 @@
 	if (datatype_ptr->free_fn) {					    \
 	    mpi_errno = (datatype_ptr->free_fn)( datatype_ptr );	    \
 	     if (mpi_errno) {						    \
-		 MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_FREE);		    \
+		 MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_FREE);		    \
 		 return MPIR_Err_return_comm( 0, FCNAME, mpi_errno );	    \
 	     }								    \
 	} */								    \
@@ -92,7 +92,7 @@
             break;							\
         case HANDLE_KIND_INDIRECT:					\
             ptr = ((MPIDU_Datatype *)					\
-		   MPIU_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
+		   MPIR_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
             size_ = ((MPIDU_Datatype *) ptr)->size;			\
             break;							\
         case HANDLE_KIND_BUILTIN:					\
@@ -145,7 +145,7 @@
             break;							\
         case HANDLE_KIND_INDIRECT:					\
             ptr = ((MPIDU_Datatype *)					\
-		   MPIU_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
+		   MPIR_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
             MPIDU_GET_FIELD(hetero_,depth_,_depth);                      \
             break;							\
         case HANDLE_KIND_INVALID:					\
@@ -165,7 +165,7 @@
             break;							\
         case HANDLE_KIND_INDIRECT:					\
             ptr = ((MPIDU_Datatype *)					\
-		   MPIU_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
+		   MPIR_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
             MPIDU_GET_FIELD(hetero_,depth_,_size);                       \
             break;							\
         case HANDLE_KIND_INVALID:					\
@@ -185,7 +185,7 @@
             break;							\
         case HANDLE_KIND_INDIRECT:					\
             ptr = ((MPIDU_Datatype *)					\
-		   MPIU_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
+		   MPIR_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
             MPIDU_GET_FIELD(hetero_,lptr_,);                             \
             break;							\
         case HANDLE_KIND_INVALID:					\
@@ -204,7 +204,7 @@
             break;							\
         case HANDLE_KIND_INDIRECT:					\
             ptr = ((MPIDU_Datatype *)					\
-		   MPIU_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
+		   MPIR_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
             MPIDU_SET_FIELD(hetero_,depth_,_depth);                      \
             break;							\
         case HANDLE_KIND_INVALID:					\
@@ -224,7 +224,7 @@
             break;							\
         case HANDLE_KIND_INDIRECT:					\
             ptr = ((MPIDU_Datatype *)					\
-		   MPIU_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
+		   MPIR_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
             MPIDU_SET_FIELD(hetero_,depth_,_size);                       \
             break;							\
         case HANDLE_KIND_INVALID:					\
@@ -244,7 +244,7 @@
             break;							\
         case HANDLE_KIND_INDIRECT:					\
             ptr = ((MPIDU_Datatype *)					\
-		   MPIU_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
+		   MPIR_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	\
             MPIDU_SET_FIELD(hetero_,lptr_,);                             \
             break;							\
         case HANDLE_KIND_INVALID:					\
@@ -264,7 +264,7 @@
             break;							    \
         case HANDLE_KIND_INDIRECT:					    \
             ptr = ((MPIDU_Datatype *)					    \
-		   MPIU_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	    \
+		   MPIR_Handle_get_ptr_indirect(a,&MPIDU_Datatype_mem));	    \
             extent_ = ((MPIDU_Datatype *) ptr)->extent;			    \
             break;							    \
         case HANDLE_KIND_INVALID:					    \
@@ -355,13 +355,13 @@ typedef struct MPIDU_Datatype_contents {
   with each MPI object type having a separate routine.  Since reference
   count changes are always up or down one, and since all MPI objects 
   are defined to have the 'ref_count' field in the same place, the current
-  ADI3 API uses two routines, 'MPIU_Object_add_ref' and 
-  'MPIU_Object_release_ref', to increment and decrement the reference count.
+  ADI3 API uses two routines, 'MPIR_Object_add_ref' and
+  'MPIR_Object_release_ref', to increment and decrement the reference count.
 
   S*/
 typedef struct MPIDU_Datatype { 
-    /* handle and ref_count are filled in by MPIU_Handle_obj_alloc() */
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    /* handle and ref_count are filled in by MPIR_Handle_obj_alloc() */
+    MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
 
     /* basic parameters for datatype, accessible via MPI calls */
     MPI_Aint size;   /* MPI_Count could be 128 bits, so use MPI_Aint */
@@ -431,7 +431,7 @@ typedef struct MPIDU_Datatype {
 #endif
 } MPIDU_Datatype;
 
-extern MPIU_Object_alloc_t MPIDU_Datatype_mem;
+extern MPIR_Object_alloc_t MPIDU_Datatype_mem;
 
 /* Preallocated datatype objects */
 /* This value should be set to greatest value used as the type index suffix in
diff --git a/src/mpid/common/datatype/mpidu_datatype_contents.c b/src/mpid/common/datatype/mpidu_datatype_contents.c
index 8aacd83..636e1be 100644
--- a/src/mpid/common/datatype/mpidu_datatype_contents.c
+++ b/src/mpid/common/datatype/mpidu_datatype_contents.c
@@ -84,17 +84,17 @@ int MPIDU_Datatype_set_contents(MPIDU_Datatype *new_dtp,
     ptr = ((char *) cp) + struct_sz;
     /* Fortran90 combiner types do not have a "base" type */
     if (nr_types > 0) {
-	MPIU_Memcpy(ptr, array_of_types, nr_types * sizeof(MPI_Datatype));
+	MPIR_Memcpy(ptr, array_of_types, nr_types * sizeof(MPI_Datatype));
     }
     
     ptr = ((char *) cp) + struct_sz + types_sz;
     if (nr_ints > 0) {
-	MPIU_Memcpy(ptr, array_of_ints, nr_ints * sizeof(int));
+	MPIR_Memcpy(ptr, array_of_ints, nr_ints * sizeof(int));
     }
 
     ptr = ((char *) cp) + struct_sz + types_sz + ints_sz;
     if (nr_aints > 0) {
-	MPIU_Memcpy(ptr, array_of_aints, nr_aints * sizeof(MPI_Aint));
+	MPIR_Memcpy(ptr, array_of_aints, nr_aints * sizeof(MPI_Aint));
     }
     new_dtp->contents = cp;
 
@@ -163,7 +163,7 @@ void MPIDI_Datatype_get_contents_ints(MPIDU_Datatype_contents *cp,
     }
 
     ptr = ((char *) cp) + struct_sz + types_sz;
-    MPIU_Memcpy(user_ints, ptr, cp->nr_ints * sizeof(int));
+    MPIR_Memcpy(user_ints, ptr, cp->nr_ints * sizeof(int));
 
     return;
 }
@@ -201,7 +201,7 @@ void MPIDI_Datatype_get_contents_aints(MPIDU_Datatype_contents *cp,
     }
 
     ptr = ((char *) cp) + struct_sz + types_sz + ints_sz;
-    MPIU_Memcpy(user_aints, ptr, cp->nr_aints * sizeof(MPI_Aint));
+    MPIR_Memcpy(user_aints, ptr, cp->nr_aints * sizeof(MPI_Aint));
 
     return;
 }
@@ -231,7 +231,7 @@ void MPIDI_Datatype_get_contents_types(MPIDU_Datatype_contents *cp,
     }
 
     ptr = ((char *) cp) + struct_sz;
-    MPIU_Memcpy(user_types, ptr, cp->nr_types * sizeof(MPI_Datatype));
+    MPIR_Memcpy(user_types, ptr, cp->nr_types * sizeof(MPI_Datatype));
 
     return;
 }
diff --git a/src/mpid/common/datatype/mpidu_datatype_free.c b/src/mpid/common/datatype/mpidu_datatype_free.c
index 2aa72c0..4a25dea 100644
--- a/src/mpid/common/datatype/mpidu_datatype_free.c
+++ b/src/mpid/common/datatype/mpidu_datatype_free.c
@@ -55,5 +55,5 @@ void MPIDU_Datatype_free(MPIDU_Datatype *ptr)
 	MPIDU_Dataloop_free(&(ptr->hetero_dloop));
     }
 #endif /* MPID_HAS_HETERO */
-    MPIU_Handle_obj_free(&MPIDU_Datatype_mem, ptr);
+    MPIR_Handle_obj_free(&MPIDU_Datatype_mem, ptr);
 }
diff --git a/src/mpid/common/datatype/mpidu_ext32_segment.c b/src/mpid/common/datatype/mpidu_ext32_segment.c
index be87991..cea3fb1 100644
--- a/src/mpid/common/datatype/mpidu_ext32_segment.c
+++ b/src/mpid/common/datatype/mpidu_ext32_segment.c
@@ -71,7 +71,7 @@ static int external32_basic_convert(char *dest_buf,
     char *src_ptr = src_buf, *dest_ptr = dest_buf;
     char *src_end = (char *)(src_buf + ((int)count * src_el_size));
 
-    MPIU_Assert(dest_buf && src_buf);
+    MPIR_Assert(dest_buf && src_buf);
 
     if (src_el_size == dest_el_size)
     {
@@ -129,7 +129,7 @@ static int external32_float_convert(char *dest_buf,
     char *src_ptr = src_buf, *dest_ptr = dest_buf;
     char *src_end = (char *)(src_buf + ((int)count * src_el_size));
 
-    MPIU_Assert(dest_buf && src_buf);
+    MPIR_Assert(dest_buf && src_buf);
 
     if (src_el_size == dest_el_size)
     {
@@ -180,13 +180,13 @@ static int MPIDU_Segment_contig_pack_external32_to_buf(DLOOP_Offset *blocks_p,
 {
     int src_el_size, dest_el_size;
     struct MPIDU_Segment_piece_params *paramp = v_paramp;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_SEGMENT_CONTIG_PACK_EXTERNAL32_TO_BUF);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SEGMENT_CONTIG_PACK_EXTERNAL32_TO_BUF);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEGMENT_CONTIG_PACK_EXTERNAL32_TO_BUF);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SEGMENT_CONTIG_PACK_EXTERNAL32_TO_BUF);
 
     src_el_size = MPIDU_Datatype_get_basic_size(el_type);
     dest_el_size = MPIDI_Datatype_get_basic_size_external32(el_type);
-    MPIU_Assert(dest_el_size);
+    MPIR_Assert(dest_el_size);
 
     /*
      * h  = handle value
@@ -210,7 +210,7 @@ static int MPIDU_Segment_contig_pack_external32_to_buf(DLOOP_Offset *blocks_p,
     /* TODO: DEAL WITH CASE WHERE ALL DATA DOESN'T FIT! */
     if ((src_el_size == dest_el_size) && (src_el_size == 1))
     {
-        MPIU_Memcpy(paramp->u.pack.pack_buffer,
+        MPIR_Memcpy(paramp->u.pack.pack_buffer,
 	       ((char *) bufp) + rel_off, *blocks_p);
     }
     else if (is_float_type(el_type))
@@ -227,7 +227,7 @@ static int MPIDU_Segment_contig_pack_external32_to_buf(DLOOP_Offset *blocks_p,
     }
     paramp->u.pack.pack_buffer += (dest_el_size * (*blocks_p));
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEGMENT_CONTIG_PACK_EXTERNAL32_TO_BUF);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEGMENT_CONTIG_PACK_EXTERNAL32_TO_BUF);
     return 0;
 }
 
@@ -243,13 +243,13 @@ static int MPIDU_Segment_contig_unpack_external32_to_buf(DLOOP_Offset *blocks_p,
 {
     int src_el_size, dest_el_size;
     struct MPIDU_Segment_piece_params *paramp = v_paramp;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_SEGMENT_CONTIG_UNPACK_EXTERNAL32_TO_BUF);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SEGMENT_CONTIG_UNPACK_EXTERNAL32_TO_BUF);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEGMENT_CONTIG_UNPACK_EXTERNAL32_TO_BUF);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SEGMENT_CONTIG_UNPACK_EXTERNAL32_TO_BUF);
 
     src_el_size = MPIDU_Datatype_get_basic_size(el_type);
     dest_el_size = MPIDI_Datatype_get_basic_size_external32(el_type);
-    MPIU_Assert(dest_el_size);
+    MPIR_Assert(dest_el_size);
 
     /*
      * h  = handle value
@@ -273,7 +273,7 @@ static int MPIDU_Segment_contig_unpack_external32_to_buf(DLOOP_Offset *blocks_p,
     /* TODO: DEAL WITH CASE WHERE ALL DATA DOESN'T FIT! */
     if ((src_el_size == dest_el_size) && (src_el_size == 1))
     {
-        MPIU_Memcpy(((char *)bufp) + rel_off,
+        MPIR_Memcpy(((char *)bufp) + rel_off,
 	       paramp->u.unpack.unpack_buffer, *blocks_p);
     }
     else if (is_float_type(el_type))
@@ -290,7 +290,7 @@ static int MPIDU_Segment_contig_unpack_external32_to_buf(DLOOP_Offset *blocks_p,
     }
     paramp->u.unpack.unpack_buffer += (dest_el_size * (*blocks_p));
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEGMENT_CONTIG_UNPACK_EXTERNAL32_TO_BUF);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEGMENT_CONTIG_UNPACK_EXTERNAL32_TO_BUF);
     return 0;
 }
 
@@ -304,9 +304,9 @@ void MPIDU_Segment_pack_external32(struct DLOOP_Segment *segp,
 				  void *pack_buffer)
 {
     struct MPIDU_Segment_piece_params pack_params;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_SEGMENT_PACK_EXTERNAL);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SEGMENT_PACK_EXTERNAL);
     
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEGMENT_PACK_EXTERNAL);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SEGMENT_PACK_EXTERNAL);
 
     pack_params.u.pack.pack_buffer = (DLOOP_Buffer)pack_buffer;
     MPIDU_Segment_manipulate(segp,
@@ -319,7 +319,7 @@ void MPIDU_Segment_pack_external32(struct DLOOP_Segment *segp,
                             MPIDI_Datatype_get_basic_size_external32,
 			    &pack_params);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEGMENT_PACK_EXTERNAL);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEGMENT_PACK_EXTERNAL);
     return;
 }
 
@@ -333,9 +333,9 @@ void MPIDU_Segment_unpack_external32(struct DLOOP_Segment *segp,
 				    DLOOP_Buffer unpack_buffer)
 {
     struct MPIDU_Segment_piece_params pack_params;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_SEGMENT_UNPACK_EXTERNAL32);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SEGMENT_UNPACK_EXTERNAL32);
     
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEGMENT_UNPACK_EXTERNAL32);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SEGMENT_UNPACK_EXTERNAL32);
 
     pack_params.u.unpack.unpack_buffer = unpack_buffer;
     MPIDU_Segment_manipulate(segp,
@@ -348,6 +348,6 @@ void MPIDU_Segment_unpack_external32(struct DLOOP_Segment *segp,
                             MPIDI_Datatype_get_basic_size_external32,
 			    &pack_params);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEGMENT_UNPACK_EXTERNAL32);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEGMENT_UNPACK_EXTERNAL32);
     return;
 }
diff --git a/src/mpid/common/datatype/mpidu_segment.c b/src/mpid/common/datatype/mpidu_segment.c
index 783db7e..ffeb521 100644
--- a/src/mpid/common/datatype/mpidu_segment.c
+++ b/src/mpid/common/datatype/mpidu_segment.c
@@ -108,15 +108,15 @@ void MPIDU_Segment_pack_vector(struct DLOOP_Segment *segp,
 			  int *lengthp)
 {
 struct MPIDU_Segment_piece_params packvec_params;
-MPIDI_STATE_DECL(MPID_STATE_MPID_SEGMENT_PACK_VECTOR);
+MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SEGMENT_PACK_VECTOR);
 
-MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEGMENT_PACK_VECTOR);
+MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SEGMENT_PACK_VECTOR);
 
 packvec_params.u.pack_vector.vectorp = vectorp;
 packvec_params.u.pack_vector.index   = 0;
 packvec_params.u.pack_vector.length  = *lengthp;
 
-MPIU_Assert(*lengthp > 0);
+MPIR_Assert(*lengthp > 0);
 
 MPIDU_Segment_manipulate(segp,
 			first,
@@ -130,7 +130,7 @@ MPIDU_Segment_manipulate(segp,
 
 /* last value already handled by MPIDU_Segment_manipulate */
 *lengthp = packvec_params.u.pack_vector.index;
-MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEGMENT_PACK_VECTOR);
+MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEGMENT_PACK_VECTOR);
 return;
 }
 
@@ -148,10 +148,10 @@ void MPIDU_Segment_unpack_vector(struct DLOOP_Segment *segp,
 			    DLOOP_VECTOR *vectorp,
 			    int *lengthp)
 {
-MPIDI_STATE_DECL(MPID_STATE_MPID_SEGMENT_UNPACK_VECTOR);
-MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEGMENT_UNPACK_VECTOR);
+MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SEGMENT_UNPACK_VECTOR);
+MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SEGMENT_UNPACK_VECTOR);
 MPIDU_Segment_pack_vector(segp, first, lastp, vectorp, lengthp);
-MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEGMENT_UNPACK_VECTOR);
+MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEGMENT_UNPACK_VECTOR);
 return;
 }
 
@@ -177,16 +177,16 @@ void MPIDU_Segment_flatten(struct DLOOP_Segment *segp,
 		      DLOOP_Offset *lengthp)
 {
 struct MPIDU_Segment_piece_params packvec_params;
-MPIDI_STATE_DECL(MPID_STATE_MPID_SEGMENT_FLATTEN);
+MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SEGMENT_FLATTEN);
 
-MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEGMENT_FLATTEN);
+MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SEGMENT_FLATTEN);
 
 packvec_params.u.flatten.offp = (int64_t *) offp;
 packvec_params.u.flatten.sizep = sizep;
 packvec_params.u.flatten.index   = 0;
 packvec_params.u.flatten.length  = *lengthp;
 
-MPIU_Assert(*lengthp > 0);
+MPIR_Assert(*lengthp > 0);
 
 MPIDU_Segment_manipulate(segp,
 			first,
@@ -200,7 +200,7 @@ MPIDU_Segment_manipulate(segp,
 
 /* last value already handled by MPIDU_Segment_manipulate */
 *lengthp = packvec_params.u.flatten.index;
-MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEGMENT_FLATTEN);
+MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEGMENT_FLATTEN);
 return;
 }
 
@@ -227,9 +227,9 @@ static int MPIDU_Segment_contig_pack_to_iov(DLOOP_Offset *blocks_p,
     DLOOP_Offset size;
     char *last_end = NULL;
     struct MPIDU_Segment_piece_params *paramp = v_paramp;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_SEGMENT_CONTIG_PACK_TO_IOV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SEGMENT_CONTIG_PACK_TO_IOV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEGMENT_CONTIG_PACK_TO_IOV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SEGMENT_CONTIG_PACK_TO_IOV);
 
     el_size = MPIDU_Datatype_get_basic_size(el_type);
     size = *blocks_p * (DLOOP_Offset) el_size;
@@ -248,7 +248,7 @@ static int MPIDU_Segment_contig_pack_to_iov(DLOOP_Offset *blocks_p,
 	    paramp->u.pack_vector.vectorp[last_idx].DLOOP_VECTOR_LEN;
     }
 
-    MPIU_Ensure_Aint_fits_in_pointer((MPIU_VOID_PTR_CAST_TO_MPI_AINT (bufp)) + rel_off);
+    MPIR_Ensure_Aint_fits_in_pointer((MPIR_VOID_PTR_CAST_TO_MPI_AINT (bufp)) + rel_off);
     if ((last_idx == paramp->u.pack_vector.length-1) &&
 	(last_end != ((char *) bufp + rel_off)))
     {
@@ -257,7 +257,7 @@ static int MPIDU_Segment_contig_pack_to_iov(DLOOP_Offset *blocks_p,
 	 * function that we are done (and that we didn't process any blocks).
 	 */
 	*blocks_p = 0;
-	MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEGMENT_CONTIG_PACK_TO_IOV);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEGMENT_CONTIG_PACK_TO_IOV);
 	return 1;
     }
     else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off)))
@@ -270,7 +270,7 @@ static int MPIDU_Segment_contig_pack_to_iov(DLOOP_Offset *blocks_p,
 	paramp->u.pack_vector.vectorp[last_idx+1].DLOOP_VECTOR_LEN = size;
 	paramp->u.pack_vector.index++;
     }
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEGMENT_CONTIG_PACK_TO_IOV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEGMENT_CONTIG_PACK_TO_IOV);
     return 0;
 }
 
@@ -303,9 +303,9 @@ static int MPIDU_Segment_vector_pack_to_iov(DLOOP_Offset *blocks_p,
     int i;
     DLOOP_Offset size, blocks_left, basic_size;
     struct MPIDU_Segment_piece_params *paramp = v_paramp;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_SEGMENT_VECTOR_PACK_TO_IOV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SEGMENT_VECTOR_PACK_TO_IOV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEGMENT_VECTOR_PACK_TO_IOV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SEGMENT_VECTOR_PACK_TO_IOV);
 
     basic_size = (DLOOP_Offset) MPIDU_Datatype_get_basic_size(el_type);
     blocks_left = *blocks_p;
@@ -348,7 +348,7 @@ static int MPIDU_Segment_vector_pack_to_iov(DLOOP_Offset *blocks_p,
 		paramp->u.pack_vector.vectorp[last_idx].DLOOP_VECTOR_LEN;
 	}
 
-	MPIU_Ensure_Aint_fits_in_pointer((MPIU_VOID_PTR_CAST_TO_MPI_AINT (bufp)) + rel_off);
+	MPIR_Ensure_Aint_fits_in_pointer((MPIR_VOID_PTR_CAST_TO_MPI_AINT (bufp)) + rel_off);
 	if ((last_idx == paramp->u.pack_vector.length-1) &&
 	    (last_end != ((char *) bufp + rel_off)))
 	{
@@ -361,7 +361,7 @@ static int MPIDU_Segment_vector_pack_to_iov(DLOOP_Offset *blocks_p,
 			    paramp->u.pack_vector.index,
                             (MPI_Aint) *blocks_p));
 #endif
-	    MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEGMENT_VECTOR_PACK_TO_IOV);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEGMENT_VECTOR_PACK_TO_IOV);
 	    return 1;
 	}
 	else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off)))
@@ -389,8 +389,8 @@ static int MPIDU_Segment_vector_pack_to_iov(DLOOP_Offset *blocks_p,
     /* if we get here then we processed ALL the blocks; don't need to update
      * blocks_p
      */
-    MPIU_Assert(blocks_left == 0);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEGMENT_VECTOR_PACK_TO_IOV);
+    MPIR_Assert(blocks_left == 0);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEGMENT_VECTOR_PACK_TO_IOV);
     return 0;
 }
 
@@ -411,9 +411,9 @@ static int MPIDU_Segment_contig_flatten(DLOOP_Offset *blocks_p,
     int idx, el_size;
     DLOOP_Offset size;
     struct MPIDU_Segment_piece_params *paramp = v_paramp;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_SEGMENT_CONTIG_FLATTEN);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SEGMENT_CONTIG_FLATTEN);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEGMENT_CONTIG_FLATTEN);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SEGMENT_CONTIG_FLATTEN);
 
     el_size = MPIDU_Datatype_get_basic_size(el_type);
     size = *blocks_p * (DLOOP_Offset) el_size;
@@ -422,13 +422,13 @@ static int MPIDU_Segment_contig_flatten(DLOOP_Offset *blocks_p,
 #ifdef MPID_SP_VERBOSE
     MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE,VERBOSE,(MPL_DBG_FDEST,"\t[contig flatten: idx = %d, loc = (" MPI_AINT_FMT_HEX_SPEC " + " MPI_AINT_FMT_HEX_SPEC ") = " MPI_AINT_FMT_HEX_SPEC ", size = " MPI_AINT_FMT_DEC_SPEC "]\n",
 		    idx,
-		    MPIU_VOID_PTR_CAST_TO_MPI_AINT bufp,
+		    MPIR_VOID_PTR_CAST_TO_MPI_AINT bufp,
 		    (MPI_Aint) rel_off,
-		    MPIU_VOID_PTR_CAST_TO_MPI_AINT bufp + rel_off,
+		    MPIR_VOID_PTR_CAST_TO_MPI_AINT bufp + rel_off,
                     (MPI_Aint) size));
 #endif
 
-    if (idx > 0 && ((DLOOP_Offset) MPIU_VOID_PTR_CAST_TO_MPI_AINT bufp + rel_off) ==
+    if (idx > 0 && ((DLOOP_Offset) MPIR_VOID_PTR_CAST_TO_MPI_AINT bufp + rel_off) ==
 	((paramp->u.flatten.offp[idx - 1]) +
 	 (DLOOP_Offset) paramp->u.flatten.sizep[idx - 1]))
     {
@@ -436,7 +436,7 @@ static int MPIDU_Segment_contig_flatten(DLOOP_Offset *blocks_p,
 	paramp->u.flatten.sizep[idx - 1] += size;
     }
     else {
-	paramp->u.flatten.offp[idx] =  ((int64_t) MPIU_VOID_PTR_CAST_TO_MPI_AINT bufp) + (int64_t) rel_off;
+	paramp->u.flatten.offp[idx] =  ((int64_t) MPIR_VOID_PTR_CAST_TO_MPI_AINT bufp) + (int64_t) rel_off;
 	paramp->u.flatten.sizep[idx] = size;
 
 	paramp->u.flatten.index++;
@@ -445,11 +445,11 @@ static int MPIDU_Segment_contig_flatten(DLOOP_Offset *blocks_p,
 	 */
 	if (paramp->u.flatten.index == paramp->u.flatten.length)
 	{
-	    MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEGMENT_CONTIG_FLATTEN);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEGMENT_CONTIG_FLATTEN);
 	    return 1;
 	}
     }
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEGMENT_CONTIG_FLATTEN);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEGMENT_CONTIG_FLATTEN);
     return 0;
 }
 
@@ -479,9 +479,9 @@ static int MPIDU_Segment_vector_flatten(DLOOP_Offset *blocks_p,
     int i;
     DLOOP_Offset size, blocks_left, basic_size;
     struct MPIDU_Segment_piece_params *paramp = v_paramp;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_SEGMENT_VECTOR_FLATTEN);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SEGMENT_VECTOR_FLATTEN);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEGMENT_VECTOR_FLATTEN);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SEGMENT_VECTOR_FLATTEN);
 
     basic_size = (DLOOP_Offset) MPIDU_Datatype_get_basic_size(el_type);
     blocks_left = *blocks_p;
@@ -499,7 +499,7 @@ static int MPIDU_Segment_vector_flatten(DLOOP_Offset *blocks_p,
 	    blocks_left = 0;
 	}
 
-	if (idx > 0 && ((DLOOP_Offset) MPIU_VOID_PTR_CAST_TO_MPI_AINT bufp + rel_off) ==
+	if (idx > 0 && ((DLOOP_Offset) MPIR_VOID_PTR_CAST_TO_MPI_AINT bufp + rel_off) ==
 	    ((paramp->u.flatten.offp[idx - 1]) + (DLOOP_Offset) paramp->u.flatten.sizep[idx - 1]))
 	{
 	    /* add this size to the last region rather than using up another one */
@@ -507,22 +507,22 @@ static int MPIDU_Segment_vector_flatten(DLOOP_Offset *blocks_p,
 	}
 	else if (idx < paramp->u.flatten.length) {
 	    /* take up another region */
-	    paramp->u.flatten.offp[idx]  = (DLOOP_Offset) MPIU_VOID_PTR_CAST_TO_MPI_AINT bufp + rel_off;
+	    paramp->u.flatten.offp[idx]  = (DLOOP_Offset) MPIR_VOID_PTR_CAST_TO_MPI_AINT bufp + rel_off;
 	    paramp->u.flatten.sizep[idx] = size;
 	    paramp->u.flatten.index++;
 	}
 	else {
 	    /* we tried to add to the end of the last region and failed; add blocks back in */
 	    *blocks_p = *blocks_p - blocks_left + (size / basic_size);
-	    MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEGMENT_VECTOR_FLATTEN);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEGMENT_VECTOR_FLATTEN);
 	    return 1;
 	}
 	rel_off += stride;
 
     }
     /* --BEGIN ERROR HANDLING-- */
-    MPIU_Assert(blocks_left == 0);
+    MPIR_Assert(blocks_left == 0);
     /* --END ERROR HANDLING-- */
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_SEGMENT_VECTOR_FLATTEN);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_SEGMENT_VECTOR_FLATTEN);
     return 0;
 }
diff --git a/src/mpid/common/datatype/mpidu_type_blockindexed.c b/src/mpid/common/datatype/mpidu_type_blockindexed.c
index 89f8d58..f1f42c2 100644
--- a/src/mpid/common/datatype/mpidu_type_blockindexed.c
+++ b/src/mpid/common/datatype/mpidu_type_blockindexed.c
@@ -50,7 +50,7 @@ int MPIDU_Type_blockindexed(int count,
     if (count == 0) return MPIDU_Type_zerolen(newtype);
 
     /* allocate new datatype object and handle */
-    new_dtp = (MPIDU_Datatype *) MPIU_Handle_obj_alloc(&MPIDU_Datatype_mem);
+    new_dtp = (MPIDU_Datatype *) MPIR_Handle_obj_alloc(&MPIDU_Datatype_mem);
     /* --BEGIN ERROR HANDLING-- */
     if (!new_dtp)
     {
@@ -61,8 +61,8 @@ int MPIDU_Type_blockindexed(int count,
     }
     /* --END ERROR HANDLING-- */
 
-    /* handle is filled in by MPIU_Handle_obj_alloc() */
-    MPIU_Object_set_ref(new_dtp, 1);
+    /* handle is filled in by MPIR_Handle_obj_alloc() */
+    MPIR_Object_set_ref(new_dtp, 1);
     new_dtp->is_permanent = 0;
     new_dtp->is_committed = 0;
     new_dtp->attributes   = NULL;
diff --git a/src/mpid/common/datatype/mpidu_type_commit.c b/src/mpid/common/datatype/mpidu_type_commit.c
index f78bd08..dd02fce 100644
--- a/src/mpid/common/datatype/mpidu_type_commit.c
+++ b/src/mpid/common/datatype/mpidu_type_commit.c
@@ -25,7 +25,7 @@ int MPIDU_Type_commit(MPI_Datatype *datatype_p)
     int           mpi_errno=MPI_SUCCESS;
     MPIDU_Datatype *datatype_ptr;
 
-    MPIU_Assert(HANDLE_GET_KIND(*datatype_p) != HANDLE_KIND_BUILTIN);
+    MPIR_Assert(HANDLE_GET_KIND(*datatype_p) != HANDLE_KIND_BUILTIN);
 
     MPIDU_Datatype_get_ptr(*datatype_p, datatype_ptr);
 
diff --git a/src/mpid/common/datatype/mpidu_type_contiguous.c b/src/mpid/common/datatype/mpidu_type_contiguous.c
index 64f663d..d711159 100644
--- a/src/mpid/common/datatype/mpidu_type_contiguous.c
+++ b/src/mpid/common/datatype/mpidu_type_contiguous.c
@@ -37,7 +37,7 @@ int MPIDU_Type_contiguous(int count,
     if (count == 0) return MPIDU_Type_zerolen(newtype);
 
     /* allocate new datatype object and handle */
-    new_dtp = (MPIDU_Datatype *) MPIU_Handle_obj_alloc(&MPIDU_Datatype_mem);
+    new_dtp = (MPIDU_Datatype *) MPIR_Handle_obj_alloc(&MPIDU_Datatype_mem);
     /* --BEGIN ERROR HANDLING-- */
     if (!new_dtp)
     {
@@ -49,8 +49,8 @@ int MPIDU_Type_contiguous(int count,
     }
     /* --END ERROR HANDLING-- */
 
-    /* handle is filled in by MPIU_Handle_obj_alloc() */
-    MPIU_Object_set_ref(new_dtp, 1);
+    /* handle is filled in by MPIR_Handle_obj_alloc() */
+    MPIR_Object_set_ref(new_dtp, 1);
     new_dtp->is_permanent = 0;
     new_dtp->is_committed = 0;
     new_dtp->attributes   = NULL;
diff --git a/src/mpid/common/datatype/mpidu_type_create_pairtype.c b/src/mpid/common/datatype/mpidu_type_create_pairtype.c
index 7b57e6c..57dfdc2 100644
--- a/src/mpid/common/datatype/mpidu_type_create_pairtype.c
+++ b/src/mpid/common/datatype/mpidu_type_create_pairtype.c
@@ -20,7 +20,7 @@
 	type_size_   = sizeof(foo.a) + sizeof(foo.b);			\
 	type_extent_ = (MPI_Aint) sizeof(foo);				\
 	el_size_ = (sizeof(foo.a) == sizeof(foo.b)) ? (int) sizeof(foo.a) : -1; \
-	true_ub_ = (MPIU_VOID_PTR_CAST_TO_MPI_AINT ((char *) &foo.b -     \
+	true_ub_ = (MPIR_VOID_PTR_CAST_TO_MPI_AINT ((char *) &foo.b -     \
                                                   (char *) &foo.a)) +   \
                   (MPI_Aint) sizeof(foo.b);                             \
 	alignsize_ = MPL_MAX(MPIDU_Datatype_get_basic_size(mt1_),	\
@@ -70,8 +70,8 @@ int MPIDU_Type_create_pairtype(MPI_Datatype type,
     int type_size, alignsize;
     MPI_Aint type_extent, true_ub, el_size;
 
-    /* handle is filled in by MPIU_Handle_obj_alloc() */
-    MPIU_Object_set_ref(new_dtp, 1);
+    /* handle is filled in by MPIR_Handle_obj_alloc() */
+    MPIR_Object_set_ref(new_dtp, 1);
     new_dtp->is_permanent = 1;
     new_dtp->is_committed = 1; /* predefined types are pre-committed */
     new_dtp->attributes   = NULL;
diff --git a/src/mpid/common/datatype/mpidu_type_create_resized.c b/src/mpid/common/datatype/mpidu_type_create_resized.c
index 6ca3c06..dc520d8 100644
--- a/src/mpid/common/datatype/mpidu_type_create_resized.c
+++ b/src/mpid/common/datatype/mpidu_type_create_resized.c
@@ -20,13 +20,13 @@ int MPIDU_Type_create_resized(MPI_Datatype oldtype,
 {
     MPIDU_Datatype *new_dtp;
 
-    new_dtp = (MPIDU_Datatype *) MPIU_Handle_obj_alloc(&MPIDU_Datatype_mem);
+    new_dtp = (MPIDU_Datatype *) MPIR_Handle_obj_alloc(&MPIDU_Datatype_mem);
     /* --BEGIN ERROR HANDLING-- */
     if (!new_dtp) return MPIDI_Type_create_resized_memory_error();
     /* --END ERROR HANDLING-- */
 
-    /* handle is filled in by MPIU_Handle_obj_alloc() */
-    MPIU_Object_set_ref(new_dtp, 1);
+    /* handle is filled in by MPIR_Handle_obj_alloc() */
+    MPIR_Object_set_ref(new_dtp, 1);
     new_dtp->is_permanent = 0;
     new_dtp->is_committed = 0;
     new_dtp->attributes   = 0;
diff --git a/src/mpid/common/datatype/mpidu_type_debug.c b/src/mpid/common/datatype/mpidu_type_debug.c
index 21cd000..14ea56a 100644
--- a/src/mpid/common/datatype/mpidu_type_debug.c
+++ b/src/mpid/common/datatype/mpidu_type_debug.c
@@ -191,7 +191,7 @@ void MPIDI_Dataloop_dot_printf(MPIDU_Dataloop *loop_p,
 	    }
 	    break;
 	default:
-	    MPIU_Assert(0);
+	    MPIR_Assert(0);
     }
 
     if (!(loop_p->kind & DLOOP_FINAL_MASK)) {
@@ -256,7 +256,7 @@ void MPIDI_Datatype_printf(MPI_Datatype type,
 	sticky_ub = type_ptr->has_sticky_ub;
     }
 
-    MPIR_Type_size_impl(type, &size);
+    MPIDU_Datatype_get_size_macro(type, size);
     MPIR_Type_get_true_extent_impl(type, &true_lb, &extent);
     true_ub = extent + true_lb;
     MPIR_Type_get_extent_impl(type, &lb, &extent);
@@ -463,7 +463,7 @@ void MPIDU_Datatype_debug(MPI_Datatype type,
     if (is_builtin) return;
 
     MPIDU_Datatype_get_ptr(type, dtp);
-    MPIU_Assert(dtp != NULL);
+    MPIR_Assert(dtp != NULL);
 
     MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,
       "# Size = " MPI_AINT_FMT_DEC_SPEC ", Extent = " MPI_AINT_FMT_DEC_SPEC ", LB = " MPI_AINT_FMT_DEC_SPEC "%s, UB = " MPI_AINT_FMT_DEC_SPEC "%s, Extent = " MPI_AINT_FMT_DEC_SPEC ", Element Size = " MPI_AINT_FMT_DEC_SPEC " (%s), %s",
diff --git a/src/mpid/common/datatype/mpidu_type_dup.c b/src/mpid/common/datatype/mpidu_type_dup.c
index 2a6f3cb..ed683f6 100644
--- a/src/mpid/common/datatype/mpidu_type_dup.c
+++ b/src/mpid/common/datatype/mpidu_type_dup.c
@@ -39,7 +39,7 @@ int MPIDU_Type_dup(MPI_Datatype oldtype,
     }
     else {
       	/* allocate new datatype object and handle */
-	new_dtp = (MPIDU_Datatype *) MPIU_Handle_obj_alloc(&MPIDU_Datatype_mem);
+	new_dtp = (MPIDU_Datatype *) MPIR_Handle_obj_alloc(&MPIDU_Datatype_mem);
 	if (!new_dtp) {
 	    /* --BEGIN ERROR HANDLING-- */
 	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
@@ -52,8 +52,8 @@ int MPIDU_Type_dup(MPI_Datatype oldtype,
 	MPIDU_Datatype_get_ptr(oldtype, old_dtp);
 
 	/* fill in datatype */
-	MPIU_Object_set_ref(new_dtp, 1);
-	/* new_dtp->handle is filled in by MPIU_Handle_obj_alloc() */
+	MPIR_Object_set_ref(new_dtp, 1);
+	/* new_dtp->handle is filled in by MPIR_Handle_obj_alloc() */
 	new_dtp->is_contig     = old_dtp->is_contig;
 	new_dtp->size          = old_dtp->size;
 	new_dtp->extent        = old_dtp->extent;
@@ -85,7 +85,7 @@ int MPIDU_Type_dup(MPI_Datatype oldtype,
 	*newtype = new_dtp->handle;
 
 	if (old_dtp->is_committed) {
-	    MPIU_Assert(old_dtp->dataloop != NULL);
+	    MPIR_Assert(old_dtp->dataloop != NULL);
 	    MPIDU_Dataloop_dup(old_dtp->dataloop,
 			      old_dtp->dataloop_size,
 			      &new_dtp->dataloop);
diff --git a/src/mpid/common/datatype/mpidu_type_get_contents.c b/src/mpid/common/datatype/mpidu_type_get_contents.c
index 84ded31..d16d7ea 100644
--- a/src/mpid/common/datatype/mpidu_type_get_contents.c
+++ b/src/mpid/common/datatype/mpidu_type_get_contents.c
@@ -41,8 +41,8 @@ int MPIDU_Type_get_contents(MPI_Datatype datatype,
     /* these are checked at the MPI layer, so I feel that asserts
      * are appropriate.
      */
-    MPIU_Assert(HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN);
-    MPIU_Assert(datatype != MPI_FLOAT_INT &&
+    MPIR_Assert(HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN);
+    MPIR_Assert(datatype != MPI_FLOAT_INT &&
 		datatype != MPI_DOUBLE_INT &&
 		datatype != MPI_LONG_INT &&
 		datatype != MPI_SHORT_INT &&
@@ -51,7 +51,7 @@ int MPIDU_Type_get_contents(MPI_Datatype datatype,
 
     MPIDU_Datatype_get_ptr(datatype, dtp);
     cp = dtp->contents;
-    MPIU_Assert(cp != NULL);
+    MPIR_Assert(cp != NULL);
 
     /* --BEGIN ERROR HANDLING-- */
     if (max_integers < cp->nr_ints ||
diff --git a/src/mpid/common/datatype/mpidu_type_indexed.c b/src/mpid/common/datatype/mpidu_type_indexed.c
index ed46c7e..3a6649b 100644
--- a/src/mpid/common/datatype/mpidu_type_indexed.c
+++ b/src/mpid/common/datatype/mpidu_type_indexed.c
@@ -57,7 +57,7 @@ int MPIDU_Type_indexed(int count,
     }
 
     /* allocate new datatype object and handle */
-    new_dtp = (MPIDU_Datatype *) MPIU_Handle_obj_alloc(&MPIDU_Datatype_mem);
+    new_dtp = (MPIDU_Datatype *) MPIR_Handle_obj_alloc(&MPIDU_Datatype_mem);
     /* --BEGIN ERROR HANDLING-- */
     if (!new_dtp)
     {
@@ -72,8 +72,8 @@ int MPIDU_Type_indexed(int count,
     }
     /* --END ERROR HANDLING-- */
 
-    /* handle is filled in by MPIU_Handle_obj_alloc() */
-    MPIU_Object_set_ref(new_dtp, 1);
+    /* handle is filled in by MPIR_Handle_obj_alloc() */
+    MPIR_Object_set_ref(new_dtp, 1);
     new_dtp->is_permanent = 0;
     new_dtp->is_committed = 0;
     new_dtp->attributes   = NULL;
@@ -110,7 +110,7 @@ int MPIDU_Type_indexed(int count,
 	new_dtp->has_sticky_ub = 0;
 	new_dtp->has_sticky_lb = 0;
 
-        MPIU_Assign_trunc(new_dtp->alignsize, el_sz, MPI_Aint);
+        MPIR_Assign_trunc(new_dtp->alignsize, el_sz, MPI_Aint);
 	new_dtp->builtin_element_size = el_sz;
 	new_dtp->basic_type       = el_type;
 
@@ -124,7 +124,7 @@ int MPIDU_Type_indexed(int count,
 	MPIDU_Datatype_get_ptr(oldtype, old_dtp);
 
 	/* Ensure that "builtin_element_size" fits into an int datatype. */
-	MPIU_Ensure_Aint_fits_in_int(old_dtp->builtin_element_size);
+	MPIR_Ensure_Aint_fits_in_int(old_dtp->builtin_element_size);
 
 	el_sz   = old_dtp->builtin_element_size;
 	old_sz  = old_dtp->size;
@@ -155,7 +155,7 @@ int MPIDU_Type_indexed(int count,
     while (i < count && blocklength_array[i] == 0) i++;
 
     if (i == count) {
-	MPIU_Handle_obj_free(&MPIDU_Datatype_mem, new_dtp);
+	MPIR_Handle_obj_free(&MPIDU_Datatype_mem, new_dtp);
 	return MPIDU_Type_zerolen(newtype);
     }
 
diff --git a/src/mpid/common/datatype/mpidu_type_struct.c b/src/mpid/common/datatype/mpidu_type_struct.c
index 51f480a..7cb560e 100644
--- a/src/mpid/common/datatype/mpidu_type_struct.c
+++ b/src/mpid/common/datatype/mpidu_type_struct.c
@@ -173,7 +173,7 @@ int MPIDU_Type_struct(int count,
 #endif
 
     /* allocate new datatype object and handle */
-    new_dtp = (MPIDU_Datatype *) MPIU_Handle_obj_alloc(&MPIDU_Datatype_mem);
+    new_dtp = (MPIDU_Datatype *) MPIR_Handle_obj_alloc(&MPIDU_Datatype_mem);
     /* --BEGIN ERROR HANDLING-- */
     if (!new_dtp)
     {
@@ -185,8 +185,8 @@ int MPIDU_Type_struct(int count,
     }
     /* --END ERROR HANDLING-- */
 
-    /* handle is filled in by MPIU_Handle_obj_alloc() */
-    MPIU_Object_set_ref(new_dtp, 1);
+    /* handle is filled in by MPIR_Handle_obj_alloc() */
+    MPIR_Object_set_ref(new_dtp, 1);
     new_dtp->is_permanent = 0;
     new_dtp->is_committed = 0;
     new_dtp->attributes   = NULL;
@@ -206,7 +206,7 @@ int MPIDU_Type_struct(int count,
 
     if (i == count)
     {
-	MPIU_Handle_obj_free(&MPIDU_Datatype_mem, new_dtp);
+	MPIR_Handle_obj_free(&MPIDU_Datatype_mem, new_dtp);
 	return MPIDU_Type_zerolen(newtype);
     }
 
@@ -249,7 +249,7 @@ int MPIDU_Type_struct(int count,
 	    MPIDU_Datatype_get_ptr(oldtype_array[i], old_dtp);
 
 	    /* Ensure that "builtin_element_size" fits into an int datatype. */
-	    MPIU_Ensure_Aint_fits_in_int(old_dtp->builtin_element_size);
+	    MPIR_Ensure_Aint_fits_in_int(old_dtp->builtin_element_size);
 
 	    tmp_el_sz   = old_dtp->builtin_element_size;
 	    tmp_el_type = old_dtp->basic_type;
diff --git a/src/mpid/common/datatype/mpidu_type_vector.c b/src/mpid/common/datatype/mpidu_type_vector.c
index b926978..0eb4162 100644
--- a/src/mpid/common/datatype/mpidu_type_vector.c
+++ b/src/mpid/common/datatype/mpidu_type_vector.c
@@ -47,7 +47,7 @@ int MPIDU_Type_vector(int count,
     if (count == 0) return MPIDU_Type_zerolen(newtype);
 
     /* allocate new datatype object and handle */
-    new_dtp = (MPIDU_Datatype *) MPIU_Handle_obj_alloc(&MPIDU_Datatype_mem);
+    new_dtp = (MPIDU_Datatype *) MPIR_Handle_obj_alloc(&MPIDU_Datatype_mem);
     if (!new_dtp) {
 	/* --BEGIN ERROR HANDLING-- */
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
@@ -57,8 +57,8 @@ int MPIDU_Type_vector(int count,
 	/* --END ERROR HANDLING-- */
     }
 
-    /* handle is filled in by MPIU_Handle_obj_alloc() */
-    MPIU_Object_set_ref(new_dtp, 1);
+    /* handle is filled in by MPIR_Handle_obj_alloc() */
+    MPIR_Object_set_ref(new_dtp, 1);
     new_dtp->is_permanent = 0;
     new_dtp->is_committed = 0;
     new_dtp->attributes   = NULL;
diff --git a/src/mpid/common/datatype/mpidu_type_zerolen.c b/src/mpid/common/datatype/mpidu_type_zerolen.c
index 5059efd..5c46058 100644
--- a/src/mpid/common/datatype/mpidu_type_zerolen.c
+++ b/src/mpid/common/datatype/mpidu_type_zerolen.c
@@ -30,7 +30,7 @@ int MPIDU_Type_zerolen(MPI_Datatype *newtype)
     MPIDU_Datatype *new_dtp;
 
     /* allocate new datatype object and handle */
-    new_dtp = (MPIDU_Datatype *) MPIU_Handle_obj_alloc(&MPIDU_Datatype_mem);
+    new_dtp = (MPIDU_Datatype *) MPIR_Handle_obj_alloc(&MPIDU_Datatype_mem);
     /* --BEGIN ERROR HANDLING-- */
     if (!new_dtp)
     {
@@ -42,8 +42,8 @@ int MPIDU_Type_zerolen(MPI_Datatype *newtype)
     }
     /* --END ERROR HANDLING-- */
 
-    /* handle is filled in by MPIU_Handle_obj_alloc() */
-    MPIU_Object_set_ref(new_dtp, 1);
+    /* handle is filled in by MPIR_Handle_obj_alloc() */
+    MPIR_Object_set_ref(new_dtp, 1);
     new_dtp->is_permanent = 0;
     new_dtp->is_committed = 0;
     new_dtp->attributes   = NULL;
diff --git a/src/mpid/common/datatype/mpir_type_flatten.c b/src/mpid/common/datatype/mpir_type_flatten.c
index e12d880..4c3f514 100644
--- a/src/mpid/common/datatype/mpir_type_flatten.c
+++ b/src/mpid/common/datatype/mpir_type_flatten.c
@@ -41,8 +41,8 @@ int MPIR_Type_flatten(MPI_Datatype type,
     }
 
     MPIDU_Datatype_get_ptr(type, datatype_ptr);
-    MPIU_Assert(datatype_ptr->is_committed);
-    MPIU_Assert(*array_len_p >= datatype_ptr->max_contig_blocks);
+    MPIR_Assert(datatype_ptr->is_committed);
+    MPIR_Assert(*array_len_p >= datatype_ptr->max_contig_blocks);
 
     segp = MPIDU_Segment_alloc();
     err = MPIDU_Segment_init(0, 1, type, segp, 0); /* first 0 is bufptr,
diff --git a/src/mpid/common/hcoll/hcoll_init.c b/src/mpid/common/hcoll/hcoll_init.c
index 1935e40..f5ae9c3 100644
--- a/src/mpid/common/hcoll/hcoll_init.c
+++ b/src/mpid/common/hcoll/hcoll_init.c
@@ -176,7 +176,7 @@ int hcoll_comm_create(MPIR_Comm * comm_ptr, void *param)
     if (mpi_errno) {
         hcoll_destroy_context(comm_ptr->hcoll_priv.hcoll_context,
                               (rte_grp_handle_t) comm_ptr, &context_destroyed);
-        MPIU_Assert(context_destroyed);
+        MPIR_Assert(context_destroyed);
         comm_ptr->hcoll_priv.is_hcoll_init = 0;
         MPIR_ERR_POP(mpi_errno);
     }
diff --git a/src/mpid/common/sched/mpidu_sched.c b/src/mpid/common/sched/mpidu_sched.c
index 41872d8..3640f0c 100644
--- a/src/mpid/common/sched/mpidu_sched.c
+++ b/src/mpid/common/sched/mpidu_sched.c
@@ -130,7 +130,7 @@ static int MPIDU_Sched_start_entry(struct MPIDU_Sched *s, size_t idx, struct MPI
     MPIR_Request *r = s->req;
     MPIR_Comm *comm;
 
-    MPIU_Assert(e->status == MPIDU_SCHED_ENTRY_STATUS_NOT_STARTED);
+    MPIR_Assert(e->status == MPIDU_SCHED_ENTRY_STATUS_NOT_STARTED);
 
     switch (e->type) {
     case MPIDU_SCHED_ENTRY_SEND:
@@ -332,16 +332,16 @@ static int MPIDU_Sched_continue(struct MPIDU_Sched *s)
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 /* creates a new opaque schedule object and returns a handle to it in (*sp) */
-int MPIDU_Sched_create(MPID_Sched_t * sp)
+int MPIDU_Sched_create(MPIR_Sched_t * sp)
 {
     int mpi_errno = MPI_SUCCESS;
     struct MPIDU_Sched *s;
-    MPIU_CHKPMEM_DECL(2);
+    MPIR_CHKPMEM_DECL(2);
 
     *sp = NULL;
 
     /* this mem will be freed by the progress engine when the request is completed */
-    MPIU_CHKPMEM_MALLOC(s, struct MPIDU_Sched *, sizeof(struct MPIDU_Sched), mpi_errno,
+    MPIR_CHKPMEM_MALLOC(s, struct MPIDU_Sched *, sizeof(struct MPIDU_Sched), mpi_errno,
                         "schedule object");
 
     s->size = MPIDU_SCHED_INITIAL_ENTRIES;
@@ -354,18 +354,18 @@ int MPIDU_Sched_create(MPID_Sched_t * sp)
     s->prev = NULL;     /* only needed for sanity checks */
 
     /* this mem will be freed by the progress engine when the request is completed */
-    MPIU_CHKPMEM_MALLOC(s->entries, struct MPIDU_Sched_entry *,
+    MPIR_CHKPMEM_MALLOC(s->entries, struct MPIDU_Sched_entry *,
                         MPIDU_SCHED_INITIAL_ENTRIES * sizeof(struct MPIDU_Sched_entry), mpi_errno,
                         "schedule entries vector");
 
     /* TODO in a debug build, defensively mark all entries as status=INVALID */
 
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
     *sp = s;
   fn_exit:
     return mpi_errno;
   fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
@@ -374,12 +374,12 @@ int MPIDU_Sched_create(MPID_Sched_t * sp)
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 /* clones orig and returns a handle to the new schedule in (*cloned) */
-int MPIDU_Sched_clone(MPID_Sched_t orig, MPID_Sched_t * cloned)
+int MPIDU_Sched_clone(MPIR_Sched_t orig, MPIR_Sched_t * cloned)
 {
     int mpi_errno = MPI_SUCCESS;
     /* TODO implement this function for real */
-    MPIU_Assert_fmt_msg(FALSE, ("clone not yet implemented"));
-    MPIU_Assertp(FALSE);
+    MPIR_Assert_fmt_msg(FALSE, ("clone not yet implemented"));
+    MPIR_Assertp(FALSE);
     return mpi_errno;
 }
 
@@ -387,24 +387,24 @@ int MPIDU_Sched_clone(MPID_Sched_t orig, MPID_Sched_t * cloned)
 #define FUNCNAME MPIDU_Sched_start
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-/* sets (*sp) to MPID_SCHED_NULL and gives you back a request pointer in (*req).
+/* sets (*sp) to MPIR_SCHED_NULL and gives you back a request pointer in (*req).
  * The caller is giving up ownership of the opaque schedule object. */
-int MPIDU_Sched_start(MPID_Sched_t * sp, MPIR_Comm * comm, int tag, MPIR_Request ** req)
+int MPIDU_Sched_start(MPIR_Sched_t * sp, MPIR_Comm * comm, int tag, MPIR_Request ** req)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Request *r;
     struct MPIDU_Sched *s = *sp;
 
     *req = NULL;
-    *sp = MPID_SCHED_NULL;
+    *sp = MPIR_SCHED_NULL;
 
     /* sanity check the schedule */
-    MPIU_Assert(s->num_entries <= s->size);
-    MPIU_Assert(s->num_entries == 0 || s->idx < s->num_entries);
-    MPIU_Assert(s->req == NULL);
-    MPIU_Assert(s->next == NULL);
-    MPIU_Assert(s->prev == NULL);
-    MPIU_Assert(s->entries != NULL);
+    MPIR_Assert(s->num_entries <= s->size);
+    MPIR_Assert(s->num_entries == 0 || s->idx < s->num_entries);
+    MPIR_Assert(s->req == NULL);
+    MPIR_Assert(s->next == NULL);
+    MPIR_Assert(s->prev == NULL);
+    MPIR_Assert(s->entries != NULL);
 
     /* now create and populate the request */
     r = MPIR_Request_create(MPIR_REQUEST_KIND__COLL);
@@ -468,8 +468,8 @@ static int MPIDU_Sched_add_entry(struct MPIDU_Sched *s, int *idx, struct MPIDU_S
     int i;
     struct MPIDU_Sched_entry *ei;
 
-    MPIU_Assert(s->entries != NULL);
-    MPIU_Assert(s->size > 0);
+    MPIR_Assert(s->entries != NULL);
+    MPIR_Assert(s->size > 0);
 
     if (s->num_entries == s->size) {
         /* need to grow the entries array */
@@ -498,7 +498,7 @@ static int MPIDU_Sched_add_entry(struct MPIDU_Sched *s, int *idx, struct MPIDU_S
 #define FCNAME MPL_QUOTE(FUNCNAME)
 /* do these ops need an entry handle returned? */
 int MPIDU_Sched_send(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
-                     MPIR_Comm * comm, MPID_Sched_t s)
+                     MPIR_Comm * comm, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     struct MPIDU_Sched_entry *e = NULL;
@@ -538,7 +538,7 @@ int MPIDU_Sched_send(const void *buf, MPI_Aint count, MPI_Datatype datatype, int
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIDU_Sched_ssend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
-                      MPIR_Comm * comm, MPID_Sched_t s)
+                      MPIR_Comm * comm, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     struct MPIDU_Sched_entry *e = NULL;
@@ -578,7 +578,7 @@ int MPIDU_Sched_ssend(const void *buf, MPI_Aint count, MPI_Datatype datatype, in
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIDU_Sched_send_defer(const void *buf, const MPI_Aint * count, MPI_Datatype datatype, int dest,
-                           MPIR_Comm * comm, MPID_Sched_t s)
+                           MPIR_Comm * comm, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     struct MPIDU_Sched_entry *e = NULL;
@@ -617,7 +617,7 @@ int MPIDU_Sched_send_defer(const void *buf, const MPI_Aint * count, MPI_Datatype
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIDU_Sched_recv_status(void *buf, MPI_Aint count, MPI_Datatype datatype, int src,
-                            MPIR_Comm * comm, MPI_Status * status, MPID_Sched_t s)
+                            MPIR_Comm * comm, MPI_Status * status, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     struct MPIDU_Sched_entry *e = NULL;
@@ -652,7 +652,7 @@ int MPIDU_Sched_recv_status(void *buf, MPI_Aint count, MPI_Datatype datatype, in
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIDU_Sched_recv(void *buf, MPI_Aint count, MPI_Datatype datatype, int src, MPIR_Comm * comm,
-                     MPID_Sched_t s)
+                     MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     struct MPIDU_Sched_entry *e = NULL;
@@ -687,7 +687,7 @@ int MPIDU_Sched_recv(void *buf, MPI_Aint count, MPI_Datatype datatype, int src,
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIDU_Sched_reduce(const void *inbuf, void *inoutbuf, MPI_Aint count, MPI_Datatype datatype,
-                       MPI_Op op, MPID_Sched_t s)
+                       MPI_Op op, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     struct MPIDU_Sched_entry *e = NULL;
@@ -733,7 +733,7 @@ int MPIDU_Sched_reduce(const void *inbuf, void *inoutbuf, MPI_Aint count, MPI_Da
  * Packing/unpacking can be accomplished by passing MPI_PACKED as either intype
  * or outtype. */
 int MPIDU_Sched_copy(const void *inbuf, MPI_Aint incount, MPI_Datatype intype,
-                     void *outbuf, MPI_Aint outcount, MPI_Datatype outtype, MPID_Sched_t s)
+                     void *outbuf, MPI_Aint outcount, MPI_Datatype outtype, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     struct MPIDU_Sched_entry *e = NULL;
@@ -785,7 +785,7 @@ int MPIDU_Sched_copy(const void *inbuf, MPI_Aint incount, MPI_Datatype intype,
 #define FCNAME MPL_QUOTE(FUNCNAME)
 /* require that all previously added ops are complete before subsequent ops
  * may begin to execute */
-int MPIDU_Sched_barrier(MPID_Sched_t s)
+int MPIDU_Sched_barrier(MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
 
@@ -806,7 +806,7 @@ int MPIDU_Sched_barrier(MPID_Sched_t s)
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 /* buffer management, fancy reductions, etc */
-int MPIDU_Sched_cb(MPID_Sched_cb_t * cb_p, void *cb_state, MPID_Sched_t s)
+int MPIDU_Sched_cb(MPIR_Sched_cb_t * cb_p, void *cb_state, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     struct MPIDU_Sched_entry *e = NULL;
@@ -837,7 +837,7 @@ int MPIDU_Sched_cb(MPID_Sched_cb_t * cb_p, void *cb_state, MPID_Sched_t s)
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 /* buffer management, fancy reductions, etc */
-int MPIDU_Sched_cb2(MPID_Sched_cb2_t * cb_p, void *cb_state, void *cb_state2, MPID_Sched_t s)
+int MPIDU_Sched_cb2(MPIR_Sched_cb2_t * cb_p, void *cb_state, void *cb_state2, MPIR_Sched_t s)
 {
     int mpi_errno = MPI_SUCCESS;
     struct MPIDU_Sched_entry *e = NULL;
diff --git a/src/mpid/common/sched/mpidu_sched.h b/src/mpid/common/sched/mpidu_sched.h
index 85003ed..4673cc8 100644
--- a/src/mpid/common/sched/mpidu_sched.h
+++ b/src/mpid/common/sched/mpidu_sched.h
@@ -68,15 +68,15 @@ struct MPIDU_Sched_copy {
 /* nop entries have no args, so no structure is needed */
 
 enum MPIDU_Sched_cb_type {
-    MPIDU_SCHED_CB_TYPE_1 = 0,  /* single state arg type --> MPID_Sched_cb_t */
-    MPIDU_SCHED_CB_TYPE_2       /* double state arg type --> MPID_Sched_cb2_t */
+    MPIDU_SCHED_CB_TYPE_1 = 0,  /* single state arg type --> MPIR_Sched_cb_t */
+    MPIDU_SCHED_CB_TYPE_2       /* double state arg type --> MPIR_Sched_cb2_t */
 };
 
 struct MPIDU_Sched_cb {
     enum MPIDU_Sched_cb_type cb_type;
     union {
-        MPID_Sched_cb_t *cb_p;
-        MPID_Sched_cb2_t *cb2_p;
+        MPIR_Sched_cb_t *cb_p;
+        MPIR_Sched_cb2_t *cb2_p;
     } u;
     void *cb_state;
     void *cb_state2;            /* unused for single-param callbacks */
@@ -122,20 +122,20 @@ struct MPIDU_Sched {
 int MPIDU_Sched_progress(int *made_progress);
 int MPIDU_Sched_are_pending(void);
 int MPIDU_Sched_next_tag(struct MPIR_Comm *comm_ptr, int *tag);
-int MPIDU_Sched_create(MPID_Sched_t * sp);
-int MPIDU_Sched_clone(MPID_Sched_t orig, MPID_Sched_t * cloned);
-int MPIDU_Sched_start(MPID_Sched_t * sp, struct MPIR_Comm *comm, int tag,
+int MPIDU_Sched_create(MPIR_Sched_t * sp);
+int MPIDU_Sched_clone(MPIR_Sched_t orig, MPIR_Sched_t * cloned);
+int MPIDU_Sched_start(MPIR_Sched_t * sp, struct MPIR_Comm *comm, int tag,
                       struct MPIR_Request **req);
 int MPIDU_Sched_send(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
-                     struct MPIR_Comm *comm, MPID_Sched_t s);
+                     struct MPIR_Comm *comm, MPIR_Sched_t s);
 int MPIDU_Sched_recv(void *buf, MPI_Aint count, MPI_Datatype datatype, int src,
-                     struct MPIR_Comm *comm, MPID_Sched_t s);
-int MPID_Sched_ssend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
-                     struct MPIR_Comm *comm, MPID_Sched_t s);
-int MPID_Sched_reduce(const void *inbuf, void *inoutbuf, MPI_Aint count, MPI_Datatype datatype,
-                      MPI_Op op, MPID_Sched_t s);
+                     struct MPIR_Comm *comm, MPIR_Sched_t s);
+int MPIR_Sched_ssend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
+                     struct MPIR_Comm *comm, MPIR_Sched_t s);
+int MPIR_Sched_reduce(const void *inbuf, void *inoutbuf, MPI_Aint count, MPI_Datatype datatype,
+                      MPI_Op op, MPIR_Sched_t s);
 int MPIDU_Sched_copy(const void *inbuf, MPI_Aint incount, MPI_Datatype intype, void *outbuf,
-                     MPI_Aint outcount, MPI_Datatype outtype, MPID_Sched_t s);
-int MPIDU_Sched_barrier(MPID_Sched_t s);
+                     MPI_Aint outcount, MPI_Datatype outtype, MPIR_Sched_t s);
+int MPIDU_Sched_barrier(MPIR_Sched_t s);
 
 #endif /* !defined(MPIDU_SCHED_H_INCLUDED) */
diff --git a/src/mpid/common/sock/iocp/sock.c b/src/mpid/common/sock/iocp/sock.c
index fc59da1..9ff9311 100644
--- a/src/mpid/common/sock/iocp/sock.c
+++ b/src/mpid/common/sock/iocp/sock.c
@@ -415,9 +415,9 @@ int MPIDU_Sock_init()
     char *szNum, *szRange;
     WSADATA wsaData;
     int err;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_INIT);
 
     if (g_init_called)
     {
@@ -425,7 +425,7 @@ int MPIDU_Sock_init()
 	/*printf("sock init %d\n", g_init_called);fflush(stdout);*/
 	/*
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_init", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_INIT);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_INIT);
 	return mpi_errno;
 	*/
 	return MPI_SUCCESS;
@@ -435,7 +435,7 @@ int MPIDU_Sock_init()
     if ((err = WSAStartup(MAKEWORD(2, 0), &wsaData)) != 0)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**wsasock", "**wsasock %s %d", get_error_string(err), err);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_INIT);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_INIT);
 	return mpi_errno;
     }
 
@@ -482,7 +482,7 @@ int MPIDU_Sock_init()
     g_init_called = 1;
     /*printf("sock init %d\n", g_init_called);fflush(stdout);*/
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_INIT);
     return MPI_SUCCESS;
 }
 
@@ -494,13 +494,13 @@ int MPIDU_Sock_finalize()
 {
     int mpi_errno;
     sock_state_t *iter;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_FINALIZE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_FINALIZE);
     if (!g_init_called)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_init", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_FINALIZE);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_FINALIZE);
 	return mpi_errno;
     }
     g_init_called--;
@@ -525,7 +525,7 @@ int MPIDU_Sock_finalize()
 	}
 	WSACleanup();
     }
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_FINALIZE);
     return MPI_SUCCESS;
 }
 
@@ -672,13 +672,13 @@ int MPIDU_Sock_hostname_to_host_description(char *hostname, char *host_descripti
     int mpi_errno = MPI_SUCCESS;
     int str_errno = MPL_STR_SUCCESS;
     socki_host_name_t *iter, *list = NULL;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_HOSTNAME_TO_HOST_DESCRIPTION);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_HOSTNAME_TO_HOST_DESCRIPTION);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_HOSTNAME_TO_HOST_DESCRIPTION);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_HOSTNAME_TO_HOST_DESCRIPTION);
     if (!g_init_called)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_init", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_HOSTNAME_TO_HOST_DESCRIPTION);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_HOSTNAME_TO_HOST_DESCRIPTION);
 	return mpi_errno;
     }
 
@@ -702,7 +702,7 @@ int MPIDU_Sock_hostname_to_host_description(char *hostname, char *host_descripti
  fn_exit:
     socki_free_host_list(list);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_HOSTNAME_TO_HOST_DESCRIPTION);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_HOSTNAME_TO_HOST_DESCRIPTION);
     return mpi_errno;
  fn_fail:
     goto fn_exit;
@@ -719,16 +719,16 @@ int MPIDU_Sock_get_host_description(int myRank,
     char hostname[100];
     DWORD length = 100;
     char *env;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
 
     MPL_UNREFERENCED_ARG(myRank);
 
     if (!g_init_called)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_init", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
 	return mpi_errno;
     }
 
@@ -745,7 +745,7 @@ int MPIDU_Sock_get_host_description(int myRank,
 	{
 	    mpi_errno = WSAGetLastError();
 	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**sock_gethost", "**sock_gethost %s %d", get_error_string(mpi_errno), mpi_errno);
-	    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
 	    return mpi_errno;
 	}
     }
@@ -756,7 +756,7 @@ int MPIDU_Sock_get_host_description(int myRank,
 	mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0);
     }
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
     return mpi_errno;
 }
 
@@ -768,25 +768,25 @@ int MPIDU_Sock_create_set(MPIDU_Sock_set_t * set)
 {
     int mpi_errno;
     HANDLE port;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_CREATE_SET);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_CREATE_SET);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_CREATE_SET);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_CREATE_SET);
     if (!g_init_called)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_init", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_CREATE_SET);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_CREATE_SET);
 	return mpi_errno;
     }
     port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, g_num_cp_threads);
     if (port != NULL)
     {
 	*set = port;
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_CREATE_SET);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_CREATE_SET);
 	return MPI_SUCCESS;
     }
     mpi_errno = GetLastError();
     mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**iocp", "**iocp %s %d", get_error_string(mpi_errno), mpi_errno);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_CREATE_SET);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_CREATE_SET);
     return mpi_errno;
 }
 
@@ -797,13 +797,13 @@ int MPIDU_Sock_create_set(MPIDU_Sock_set_t * set)
 int MPIDU_Sock_destroy_set(MPIDU_Sock_set_t set)
 {
     int mpi_errno;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
     if (!g_init_called)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_init", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
 	return mpi_errno;
     }
     if (!CloseHandle(set))
@@ -812,14 +812,14 @@ int MPIDU_Sock_destroy_set(MPIDU_Sock_set_t set)
 	if (mpi_errno == ERROR_INVALID_HANDLE)
 	{
 	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SET, "**bad_set", 0);
-	    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
 	    return mpi_errno;
 	}
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %s %d", get_error_string(mpi_errno), mpi_errno);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
 	return mpi_errno;
     }
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
     return MPI_SUCCESS;
 }
 
@@ -833,13 +833,13 @@ int MPIDU_Sock_native_to_sock(MPIDU_Sock_set_t set, MPIDU_SOCK_NATIVE_FD fd, voi
     /*int ret_val;*/
     sock_state_t *sock_state;
     /*u_long optval;*/
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_NATIVE_TO_SOCK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_NATIVE_TO_SOCK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_NATIVE_TO_SOCK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_NATIVE_TO_SOCK);
     if (!g_init_called)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_init", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_NATIVE_TO_SOCK);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_NATIVE_TO_SOCK);
 	return mpi_errno;
     }
 
@@ -848,7 +848,7 @@ int MPIDU_Sock_native_to_sock(MPIDU_Sock_set_t set, MPIDU_SOCK_NATIVE_FD fd, voi
     if (sock_state == NULL)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_NOMEM, "**nomem", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_NATIVE_TO_SOCK);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_NATIVE_TO_SOCK);
 	return mpi_errno;
     }
     init_state_struct(sock_state);
@@ -870,14 +870,14 @@ int MPIDU_Sock_native_to_sock(MPIDU_Sock_set_t set, MPIDU_SOCK_NATIVE_FD fd, voi
     if (CreateIoCompletionPort((HANDLE)sock_state->sock, set, (ULONG_PTR)sock_state, g_num_cp_threads) == NULL)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_NOMEM, "**nomem", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_NATIVE_TO_SOCK);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_NATIVE_TO_SOCK);
 	return mpi_errno;
     }
 
     *sock_ptr = sock_state;
 
     /*printf("native socket %d\n", sock_state->sock);fflush(stdout);*/
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_NATIVE_TO_SOCK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_NATIVE_TO_SOCK);
     return MPI_SUCCESS;
 }
 
@@ -891,14 +891,14 @@ int MPIDU_Sock_listen(MPIDU_Sock_set_t set, void * user_ptr, int * port, MPIDU_S
     char host[100];
     sock_state_t * listen_state, **listener_copies;
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_LISTEN);
-    MPIDI_STATE_DECL(MPID_STATE_MEMCPY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_LISTEN);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MEMCPY);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_LISTEN);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_LISTEN);
     if (!g_init_called)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_init", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_LISTEN);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_LISTEN);
 	return mpi_errno;
     }
 
@@ -908,21 +908,21 @@ int MPIDU_Sock_listen(MPIDU_Sock_set_t set, void * user_ptr, int * port, MPIDU_S
     if (mpi_errno != MPI_SUCCESS)
     {
 	mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**sock_create", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_LISTEN);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_LISTEN);
 	return mpi_errno;
     }
     if (listen(listen_state->listen_sock, SOMAXCONN) == SOCKET_ERROR)
     {
 	mpi_errno = WSAGetLastError();
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**listen", "**listen %s %d", get_error_string(mpi_errno), mpi_errno);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_LISTEN);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_LISTEN);
 	return mpi_errno;
     }
     if (CreateIoCompletionPort((HANDLE)listen_state->listen_sock, set, (ULONG_PTR)listen_state, g_num_cp_threads) == NULL)
     {
 	mpi_errno = GetLastError();
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**iocp", "**iocp %s %d", get_error_string(mpi_errno), mpi_errno);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_LISTEN);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_LISTEN);
 	return mpi_errno;
     }
     easy_get_sock_info(listen_state->listen_sock, host, port);
@@ -937,9 +937,9 @@ int MPIDU_Sock_listen(MPIDU_Sock_set_t set, void * user_ptr, int * port, MPIDU_S
     for (i=0; i<g_num_posted_accepts; i++)
     {
 	listener_copies[i] = (sock_state_t*)MPL_malloc(sizeof(sock_state_t));
-	MPIDI_FUNC_ENTER(MPID_STATE_MEMCPY);
+	MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MEMCPY);
 	memcpy(listener_copies[i], listen_state, sizeof(*listen_state));
-	MPIDI_FUNC_EXIT(MPID_STATE_MEMCPY);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MEMCPY);
 	if (i > 0)
 	{
 	    listener_copies[i]->next = listener_copies[i-1];
@@ -949,7 +949,7 @@ int MPIDU_Sock_listen(MPIDU_Sock_set_t set, void * user_ptr, int * port, MPIDU_S
 	{
 	    MPL_free(listener_copies);
 	    mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**post_accept", 0);
-	    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_LISTEN);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_LISTEN);
 	    return mpi_errno;
 	}
     }
@@ -958,7 +958,7 @@ int MPIDU_Sock_listen(MPIDU_Sock_set_t set, void * user_ptr, int * port, MPIDU_S
 
     *sock = listen_state;
     /*printf("listening socket %d\n", listen_state->listen_sock);fflush(stdout);*/
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_LISTEN);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_LISTEN);
     return MPI_SUCCESS;
 }
 
@@ -974,13 +974,13 @@ int MPIDU_Sock_accept(MPIDU_Sock_t listener_sock, MPIDU_Sock_set_t set, void * u
     u_long optval;
     int len;
     sock_state_t *accept_state, *iter;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_ACCEPT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_ACCEPT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_ACCEPT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_ACCEPT);
     if (!g_init_called)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_init", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_ACCEPT);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_ACCEPT);
 	return mpi_errno;
     }
 
@@ -989,7 +989,7 @@ int MPIDU_Sock_accept(MPIDU_Sock_t listener_sock, MPIDU_Sock_set_t set, void * u
     {
 	*sock = NULL;
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_NOMEM, "**nomem", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_ACCEPT);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_ACCEPT);
 	return mpi_errno;
     }
     init_state_struct(accept_state);
@@ -1003,7 +1003,7 @@ int MPIDU_Sock_accept(MPIDU_Sock_t listener_sock, MPIDU_Sock_set_t set, void * u
     if (iter == NULL)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**sock_nop_accept", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_ACCEPT);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_ACCEPT);
 	return mpi_errno;
     }
     accept_state->sock = iter->sock;
@@ -1012,7 +1012,7 @@ int MPIDU_Sock_accept(MPIDU_Sock_t listener_sock, MPIDU_Sock_set_t set, void * u
     {
 	*sock = NULL;
 	mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**post_accept", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_ACCEPT);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_ACCEPT);
 	return mpi_errno;
     }
 
@@ -1059,7 +1059,7 @@ int MPIDU_Sock_accept(MPIDU_Sock_t listener_sock, MPIDU_Sock_set_t set, void * u
     {
 	mpi_errno = GetLastError();
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**iocp", "**iocp %s %d", get_error_string(mpi_errno), mpi_errno);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_ACCEPT);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_ACCEPT);
 	return mpi_errno;
     }
 
@@ -1068,7 +1068,7 @@ int MPIDU_Sock_accept(MPIDU_Sock_t listener_sock, MPIDU_Sock_set_t set, void * u
     *sock = accept_state;
 
     /*printf("accepted socket %d\n", accept_state->sock);fflush(stdout);*/
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_ACCEPT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_ACCEPT);
     return MPI_SUCCESS;
 }
 
@@ -1139,19 +1139,19 @@ int MPIDU_Sock_post_connect(MPIDU_Sock_set_t set, void * user_ptr, char * host_d
     char *pEnv, *token;
     unsigned int nNicNet=0, nNicMask=0;
     int use_subnet = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
     if (!g_init_called)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_init", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
 	return mpi_errno;
     }
     if (strlen(host_description) > SOCKI_DESCRIPTION_LENGTH)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_NOMEM, "**nomem", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
 	return mpi_errno;
     }
 
@@ -1168,7 +1168,7 @@ int MPIDU_Sock_post_connect(MPIDU_Sock_set_t set, void * user_ptr, char * host_d
     if (mpi_errno != MPI_SUCCESS)
     {
 	mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_create", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
 	return mpi_errno;
     }
 
@@ -1201,13 +1201,13 @@ int MPIDU_Sock_post_connect(MPIDU_Sock_set_t set, void * user_ptr, char * host_d
 		mpi_errno = MPIR_Err_create_code(connect_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_NOMEM, "**nomem", 0);
 	    else
 		mpi_errno = MPIR_Err_create_code(connect_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %d", mpi_errno);
-	    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
 	    return mpi_errno;
 	}
 	if (host[0] == '\0')
 	{
 	    mpi_errno = MPIR_Err_create_code(connect_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**sock_connect", "**sock_connect %s %d %s %d", connect_state->host_description, port, "exhausted all endpoints", -1);
-	    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
 	    return mpi_errno;
 	}
 
@@ -1224,7 +1224,7 @@ int MPIDU_Sock_post_connect(MPIDU_Sock_set_t set, void * user_ptr, char * host_d
 		mpi_errno = WSAGetLastError();
 		connect_errno = MPIR_Err_create_code(connect_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**gethostbyname", "**gethostbyname %s %d", get_error_string(mpi_errno), mpi_errno);
 		/*
-		MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
+		MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
 		return mpi_errno;
 		*/
 		continue;
@@ -1255,7 +1255,7 @@ int MPIDU_Sock_post_connect(MPIDU_Sock_set_t set, void * user_ptr, char * host_d
 		{
 		    connect_errno = MPIR_Err_create_code(connect_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_connect", "**sock_connect %s %d %s %d", host, port, get_error_string(error), error);
 		    /*
-		    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
+		    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
 		    return mpi_errno;
 		    */
 		    /* This code assumes that all errors other than WSAECONNREFUSED and WSAETIMEDOUT should not cause a connection retry */
@@ -1271,7 +1271,7 @@ int MPIDU_Sock_post_connect(MPIDU_Sock_set_t set, void * user_ptr, char * host_d
 		    error = WSAGetLastError();
 		    connect_errno = MPIR_Err_create_code(connect_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_connect", "**sock_connect %s %d %s %d", host, port, get_error_string(error), error);
 		    /*
-		    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
+		    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
 		    return mpi_errno;
 		    */
 		    break;
@@ -1283,7 +1283,7 @@ int MPIDU_Sock_post_connect(MPIDU_Sock_set_t set, void * user_ptr, char * host_d
 		    /* Warning: Loss of information.  We have two error stacks, one in connect_errno and the other in mpi_errno, that cannot be joined given the current error code interface. */
 		    /*connect_errno = MPIR_Err_create_code(connect_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_create", 0);*/
 		    mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_create", 0);
-		    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
+		    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
 		    return mpi_errno;
 		}
 		random_time = (int)((double)rand() / (double)RAND_MAX * 250.0);
@@ -1312,7 +1312,7 @@ int MPIDU_Sock_post_connect(MPIDU_Sock_set_t set, void * user_ptr, char * host_d
     {
 	mpi_errno = GetLastError();
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**iocp", "**iocp %s %d", get_error_string(mpi_errno), mpi_errno);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
 	return mpi_errno;
     }
 
@@ -1324,7 +1324,7 @@ int MPIDU_Sock_post_connect(MPIDU_Sock_set_t set, void * user_ptr, char * host_d
     *sock = connect_state;
 
     /*printf("connected socket %d\n", connect_state->sock);fflush(stdout);*/
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT);
     return MPI_SUCCESS;
 }
 
@@ -1335,23 +1335,23 @@ int MPIDU_Sock_post_connect(MPIDU_Sock_set_t set, void * user_ptr, char * host_d
 int MPIDU_Sock_set_user_ptr(MPIDU_Sock_t sock, void * user_ptr)
 {
     int mpi_errno;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
     if (!g_init_called)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_init", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
 	return mpi_errno;
     }
     if (sock == NULL)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SOCK, "**bad_sock", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
 	return mpi_errno;
     }
     sock->user_ptr = user_ptr;
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
     return MPI_SUCCESS;
 }
 
@@ -1363,27 +1363,27 @@ int MPIDU_Sock_post_close(MPIDU_Sock_t sock)
 {
     int mpi_errno;
     SOCKET s, *sp;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 
     if (!g_init_called)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_init", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 	return mpi_errno;
     }
 
     if (sock == NULL)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SOCK, "**nullptr", "**nullptr %s", "sock");
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 	return mpi_errno;
     }
     if (sock->closing)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**pctwice", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 	return mpi_errno;
     }
 
@@ -1403,13 +1403,13 @@ int MPIDU_Sock_post_close(MPIDU_Sock_t sock)
 	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SOCK, "**bad_listenersock", 0);
 	else
 	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SOCK, "**bad_sock", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 	return mpi_errno;
     }
 
     if (sock->pending_operations != 0)
     {
-	/*MPIU_Assert(sock->state != 0);*/ /* The state can be 0 if the operation was aborted */
+	/*MPIR_Assert(sock->state != 0);*/ /* The state can be 0 if the operation was aborted */
 #ifdef MPICH_DBG_OUTPUT
 	if (sock->state & SOCKI_CONNECTING)
 	    MPL_DBG_MSG_D(MPIR_DBG_OTHER,TERSE,"sock_post_close(%d) called while sock is connecting.\n", MPIDU_Sock_get_sock_id(sock));
@@ -1432,7 +1432,7 @@ int MPIDU_Sock_post_close(MPIDU_Sock_t sock)
 	fflush(stdout);
 #endif
 	/*
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 	return SOCK_ERR_OP_IN_PROGRESS;
 	*/
 	/* posting a close cancels all outstanding operations */
@@ -1454,17 +1454,17 @@ int MPIDU_Sock_post_close(MPIDU_Sock_t sock)
 	    {
 		mpi_errno = WSAGetLastError();
 		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**closesocket", "**closesocket %s %d", get_error_string(mpi_errno), mpi_errno);
-		MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+		MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 		return mpi_errno;
 	    }
 	    *sp = INVALID_SOCKET;
 	    if (!PostQueuedCompletionStatus(sock->set, 0, (ULONG_PTR)sock, NULL))
 	    {
 		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SOCK, "**fail", "**fail %d", GetLastError());
-		MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+		MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 		return mpi_errno;
 	    }
-	    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 	    return MPI_SUCCESS;
 	}
     }
@@ -1484,7 +1484,7 @@ int MPIDU_Sock_post_close(MPIDU_Sock_t sock)
 	}
 	if (mpi_errno == MPI_SUCCESS)
 	{
-	    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 	    return MPI_SUCCESS;
 	}
     }
@@ -1496,7 +1496,7 @@ int MPIDU_Sock_post_close(MPIDU_Sock_t sock)
 	{
 	    mpi_errno = WSAGetLastError();
 	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**shutdown", "**shutdown %s %d", get_error_string(mpi_errno), mpi_errno);
-	    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 	    return mpi_errno;
 	}
     }
@@ -1506,7 +1506,7 @@ int MPIDU_Sock_post_close(MPIDU_Sock_t sock)
     {
 	mpi_errno = WSAGetLastError();
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**closesocket", "**closesocket %s %d", get_error_string(mpi_errno), mpi_errno);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 	return mpi_errno;
 
     }
@@ -1514,10 +1514,10 @@ int MPIDU_Sock_post_close(MPIDU_Sock_t sock)
     if (!PostQueuedCompletionStatus(sock->set, 0, (ULONG_PTR)sock, NULL))
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_BAD_SOCK, "**fail", "**fail %d", GetLastError());
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 	return mpi_errno;
     }
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
     return MPI_SUCCESS;
 }
 
@@ -1529,14 +1529,14 @@ int MPIDU_Sock_post_read(MPIDU_Sock_t sock, void * buf, size_t minbr, size_t max
                          MPIDU_Sock_progress_update_func_t fn)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_READ);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_READ);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_POST_READ);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_POST_READ);
     MPL_UNREFERENCED_ARG(maxbr);
     sock->read.tiov.MPL_IOV_BUF = (MPL_IOV_BUF_CAST)buf;
     sock->read.tiov.MPL_IOV_LEN = minbr;
     mpi_errno = MPIDU_Sock_post_readv(sock, &sock->read.tiov, 1, fn);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_READ);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_READ);
     return mpi_errno;
 }
 
@@ -1553,16 +1553,16 @@ int MPIDU_Sock_post_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, MPIDU_Soc
     int i;
 #endif
     DWORD flags = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_READV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_READV);
 #ifdef USE_SOCK_IOV_COPY
-    MPIDI_STATE_DECL(MPID_STATE_MEMCPY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MEMCPY);
 #endif
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_POST_READV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_POST_READV);
     if (!g_init_called)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_INIT, "**sock_init", 0);
-	MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_READV);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_READV);
 	return mpi_errno;
     }
     /*sock->rt1 = PMPI_Wtime();*/
@@ -1571,9 +1571,9 @@ int MPIDU_Sock_post_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, MPIDU_Soc
 	iov_n--;
     sock->read.total = 0;
 #ifdef USE_SOCK_IOV_COPY
-    MPIDI_FUNC_ENTER(MPID_STATE_MEMCPY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MEMCPY);
     memcpy(sock->read.iov, iov, sizeof(MPL_IOV) * n);
-    MPIDI_FUNC_EXIT(MPID_STATE_MEMCPY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MEMCPY);
 #else
     sock->read.iov = iov;
 #endif
@@ -1605,7 +1605,7 @@ int MPIDU_Sock_post_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, MPIDU_Soc
 	    WSABUF tmp;
 	    tmp.buf = sock->read.iov[0].buf;
 	    tmp.len = sock->read.iov[0].len;
-	    MPIU_Assert(tmp.len > 0);
+	    MPIR_Assert(tmp.len > 0);
 	    while (mpi_errno == WSAENOBUFS)
 	    {
 		/*printf("[%d] receiving %d bytes\n", __LINE__, tmp.len);fflush(stdout);*/
@@ -1643,7 +1643,7 @@ int MPIDU_Sock_post_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, MPIDU_Soc
 	sock->pending_operations++;
     else
 	sock->state &= ~SOCKI_READING;
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_READV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_READV);
     return mpi_errno;
 }
 
@@ -1654,14 +1654,14 @@ int MPIDU_Sock_post_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, MPIDU_Soc
 int MPIDU_Sock_post_write(MPIDU_Sock_t sock, void * buf, size_t min, size_t max, MPIDU_Sock_progress_update_func_t fn)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_WRITE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_WRITE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_POST_WRITE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_POST_WRITE);
     MPL_UNREFERENCED_ARG(max);
     sock->write.tiov.MPL_IOV_BUF = (MPL_IOV_BUF_CAST)buf;
     sock->write.tiov.MPL_IOV_LEN = min;
     mpi_errno = MPIDU_Sock_post_writev(sock, &sock->write.tiov, 1, fn);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_WRITE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_WRITE);
     return mpi_errno;
 }
 
@@ -1676,18 +1676,18 @@ int MPIDU_Sock_post_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, MPIDU_So
 #ifdef MPICH_DBG_OUTPUT
     int i;
 #endif
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_WRITEV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_WRITEV);
 #ifdef USE_SOCK_IOV_COPY
-    MPIDI_STATE_DECL(MPID_STATE_MEMCPY);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MEMCPY);
 #endif
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_POST_WRITEV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_POST_WRITEV);
     /*sock->wt1 = PMPI_Wtime();*/
     sock->write.total = 0;
 #ifdef USE_SOCK_IOV_COPY
-    MPIDI_FUNC_ENTER(MPID_STATE_MEMCPY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MEMCPY);
     memcpy(sock->write.iov, iov, sizeof(MPL_IOV) * iov_n);
-    MPIDI_FUNC_EXIT(MPID_STATE_MEMCPY);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MEMCPY);
 #else
     sock->write.iov = iov;
 #endif
@@ -1755,7 +1755,7 @@ int MPIDU_Sock_post_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, MPIDU_So
 	sock->pending_operations++;
     else
 	sock->state &= ~SOCKI_WRITING;
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_WRITEV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_WRITEV);
     return mpi_errno;
 }
 
@@ -1772,15 +1772,15 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
     OVERLAPPED *ovl;
     DWORD dwFlags = 0;
     char error_msg[1024];
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_WAIT);
-    MPIDI_STATE_DECL(MPID_STATE_GETQUEUEDCOMPLETIONSTATUS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_WAIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_GETQUEUEDCOMPLETIONSTATUS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_WAIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_WAIT);
 
     for (;;) 
     {
 #if defined(MPICH_IS_THREADED)
-#       if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL)
+#       if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL)
 	{
             int err;
 	    /* Release the lock so that other threads may make progress while this thread waits for something to do */
@@ -1793,7 +1793,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 #           error selected multi-threaded implementation is not supported
 #       endif
 #endif
-	MPIDI_FUNC_ENTER(MPID_STATE_GETQUEUEDCOMPLETIONSTATUS);
+	MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_GETQUEUEDCOMPLETIONSTATUS);
 	/* initialize to NULL so we can compare the output of GetQueuedCompletionStatus */
 	sock = NULL;
 	ovl = NULL;
@@ -1803,9 +1803,9 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 	{
 	    /*t2 = PMPI_Wtime();*/
 	    /*printf("[%d] GetQueuedCompletionStatus took %.3f seconds for sock: %d\n", getpid(), t2-t1, sock->sock);*/
-	    MPIDI_FUNC_EXIT(MPID_STATE_GETQUEUEDCOMPLETIONSTATUS);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_GETQUEUEDCOMPLETIONSTATUS);
 #if defined(MPICH_IS_THREADED)
-#           if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL)
+#           if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL)
 	    {
                 int err;
 		/* Reaquire the lock before processing any of the information returned from GetQueuedCompletionStatus */
@@ -1840,12 +1840,12 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			{
 			    mpi_errno = WSAGetLastError();
 			    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**closesocket", "**closesocket %s %d", get_error_string(mpi_errno), mpi_errno);
-			    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			    return mpi_errno;
 			}
 			sock->sock = INVALID_SOCKET;
 		    }
-		    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+		    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 		    return MPI_SUCCESS;
 		}
 		if (ovl == &sock->read.ovl)
@@ -1871,7 +1871,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			    {
 				mpi_errno = WSAGetLastError();
 				mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**shutdown", "**shutdown %s %d", get_error_string(mpi_errno), mpi_errno);
-				MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				return mpi_errno;
 			    }
 			    /*printf("closing socket %d\n", sock->sock);fflush(stdout);*/
@@ -1879,12 +1879,12 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			    {
 				mpi_errno = WSAGetLastError();
 				mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**closesocket", "**closesocket %s %d", get_error_string(mpi_errno), mpi_errno);
-				MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				return mpi_errno;
 			    }
 			    sock->sock = INVALID_SOCKET;
 			}
-			MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			return MPI_SUCCESS;
 		    }
 		    MPL_DBG_MSG_D(MPIR_DBG_OTHER,TERSE,"sock_wait readv update: %d bytes\n", num_bytes);
@@ -1924,7 +1924,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			    {
 				mpi_errno = WSAGetLastError();
 				mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**shutdown", "**shutdown %s %d", get_error_string(mpi_errno), mpi_errno);
-				MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				return mpi_errno;
 			    }
 			    /*printf("closing socket %d\n", sock->sock);fflush(stdout);*/
@@ -1932,12 +1932,12 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			    {
 				mpi_errno = WSAGetLastError();
 				mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**closesocket", "**closesocket %s %d", get_error_string(mpi_errno), mpi_errno);
-				MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				return mpi_errno;
 			    }
 			    sock->sock = INVALID_SOCKET;
 			}
-			MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			return MPI_SUCCESS;
 		    }
 		    /* make the user upcall */
@@ -1967,7 +1967,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 				{
 				    mpi_errno = WSAGetLastError();
 				    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**shutdown", "**shutdown %s %d", get_error_string(mpi_errno), mpi_errno);
-				    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				    return mpi_errno;
 				}
 				/*printf("closing socket %d\n", sock->sock);fflush(stdout);*/
@@ -1975,12 +1975,12 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 				{
 				    mpi_errno = WSAGetLastError();
 				    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**closesocket", "**closesocket %s %d", get_error_string(mpi_errno), mpi_errno);
-				    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				    return mpi_errno;
 				}
 				sock->sock = INVALID_SOCKET;
 			    }
-			    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			    return MPI_SUCCESS;
 			}
 			if (mpi_errno == WSAENOBUFS)
@@ -2027,7 +2027,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 				{
 				    mpi_errno = WSAGetLastError();
 				    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**shutdown", "**shutdown %s %d", get_error_string(mpi_errno), mpi_errno);
-				    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				    return mpi_errno;
 				}
 				/*printf("closing socket %d\n", sock->sock);fflush(stdout);*/
@@ -2035,12 +2035,12 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 				{
 				    mpi_errno = WSAGetLastError();
 				    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**closesocket", "**closesocket %s %d", get_error_string(mpi_errno), mpi_errno);
-				    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				    return mpi_errno;
 				}
 				sock->sock = INVALID_SOCKET;
 			    }
-			    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			    return MPI_SUCCESS;
 			}
 		    }
@@ -2065,7 +2065,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			    {
 				mpi_errno = WSAGetLastError();
 				mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**shutdown", "**shutdown %s %d", get_error_string(mpi_errno), mpi_errno);
-				MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				return mpi_errno;
 			    }
 			    /*printf("closing socket %d\n", sock->sock);fflush(stdout);*/
@@ -2073,12 +2073,12 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			    {
 				mpi_errno = WSAGetLastError();
 				mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**closesocket", "**closesocket %s %d", get_error_string(mpi_errno), mpi_errno);
-				MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				return mpi_errno;
 			    }
 			    sock->sock = INVALID_SOCKET;
 			}
-			MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			return MPI_SUCCESS;
 		    }
 		    else
@@ -2104,7 +2104,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 				{
 				    mpi_errno = WSAGetLastError();
 				    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**shutdown", "**shutdown %s %d", get_error_string(mpi_errno), mpi_errno);
-				    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				    return mpi_errno;
 				}
 				/*printf("closing socket %d\n", sock->sock);fflush(stdout);*/
@@ -2112,12 +2112,12 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 				{
 				    mpi_errno = WSAGetLastError();
 				    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**closesocket", "**closesocket %s %d", get_error_string(mpi_errno), mpi_errno);
-				    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				    return mpi_errno;
 				}
 				sock->sock = INVALID_SOCKET;
 			    }
-			    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			    return MPI_SUCCESS;
 			}
 			MPL_DBG_MSG_FMT(MPIR_DBG_OTHER,TERSE,(MPL_DBG_FDEST,"sock_wait: write update, total = %d + %d = %d\n", sock->write.total, num_bytes, sock->write.total + num_bytes));
@@ -2164,7 +2164,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 				{
 				    mpi_errno = WSAGetLastError();
 				    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**shutdown", "**shutdown %s %d", get_error_string(mpi_errno), mpi_errno);
-				    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				    return mpi_errno;
 				}
 				/*printf("closing socket %d\n", sock->sock);fflush(stdout);*/
@@ -2172,12 +2172,12 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 				{
 				    mpi_errno = WSAGetLastError();
 				    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**closesocket", "**closesocket %s %d", get_error_string(mpi_errno), mpi_errno);
-				    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				    return mpi_errno;
 				}
 				sock->sock = INVALID_SOCKET;
 			    }
-			    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			    return MPI_SUCCESS;
 			}
 			/* make the user upcall */
@@ -2207,7 +2207,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 				    {
 					mpi_errno = WSAGetLastError();
 					mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**shutdown", "**shutdown %s %d", get_error_string(mpi_errno), mpi_errno);
-					MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+					MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 					return mpi_errno;
 				    }
 				    /*printf("closing socket %d\n", sock->sock);fflush(stdout);*/
@@ -2215,12 +2215,12 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 				    {
 					mpi_errno = WSAGetLastError();
 					mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**closesocket", "**closesocket %s %d", get_error_string(mpi_errno), mpi_errno);
-					MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+					MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 					return mpi_errno;
 				    }
 				    sock->sock = INVALID_SOCKET;
 				}
-				MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				return MPI_SUCCESS;
 			    }
 			    if (mpi_errno == WSAENOBUFS)
@@ -2264,7 +2264,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 				    {
 					mpi_errno = WSAGetLastError();
 					mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**shutdown", "**shutdown %s %d", get_error_string(mpi_errno), mpi_errno);
-					MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+					MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 					return mpi_errno;
 				    }
 				    /*printf("closing socket %d\n", sock->sock);fflush(stdout);*/
@@ -2272,12 +2272,12 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 				    {
 					mpi_errno = WSAGetLastError();
 					mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**closesocket", "**closesocket %s %d", get_error_string(mpi_errno), mpi_errno);
-					MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+					MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 					return mpi_errno;
 				    }
 				    sock->sock = INVALID_SOCKET;
 				}
-				MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				return MPI_SUCCESS;
 			    }
 			}
@@ -2299,7 +2299,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			    out->user_ptr = sock->user_ptr;
 			    sock->state &= ~SOCKI_READING;
 			    sock->pending_operations--;
-			    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			    return MPI_SUCCESS;
 			}
 			if ((sock->state & SOCKI_WRITING))/* && sock->sock != INVALID_SOCKET) */
@@ -2314,7 +2314,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			    out->user_ptr = sock->user_ptr;
 			    sock->state &= ~SOCKI_WRITING;
 			    sock->pending_operations--;
-			    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			    return MPI_SUCCESS;
 			}
 			MPL_DBG_MSG_D(MPIR_DBG_OTHER,TERSE,"ignoring EOF notification on unknown sock %d.\n", MPIDU_Sock_get_sock_id(sock));
@@ -2325,7 +2325,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			MPL_DBG_MSG_FMT(MPIR_DBG_OTHER,TERSE,(MPL_DBG_FDEST,"unmatched ovl: pending: %d, state = %d\n", sock->pending_operations, sock->state));
 			/*MPL_error_printf("In sock_wait(), returned overlapped structure does not match the current read or write ovl: 0x%x\n", ovl);*/
 			MPL_snprintf(error_msg, 1024, "In sock_wait(), returned overlapped structure does not match the current read or write ovl: 0x%p\n", ovl);
-			MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			return MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %s", error_msg);
 		    }
 		    else
@@ -2340,7 +2340,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 		out->num_bytes = 0;
 		out->error = MPI_SUCCESS;
 		out->user_ptr = NULL;
-		MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+		MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 		return MPI_SUCCESS;
 	    }
 	    else if (sock->type == SOCKI_LISTENER)
@@ -2362,7 +2362,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			CloseHandle(sock->write.ovl.hEvent);
 			sock->read.ovl.hEvent = NULL;
 			sock->write.ovl.hEvent = NULL;
-			MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			return MPI_SUCCESS;
 		    }
 		    else
@@ -2384,21 +2384,21 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 		out->num_bytes = num_bytes;
 		out->error = MPI_SUCCESS;
 		out->user_ptr = sock->user_ptr;
-		MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+		MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 		return MPI_SUCCESS;
 	    }
 	    else
 	    {
 		/*MPL_error_printf("sock type is not a SOCKET or a LISTENER, it's %d\n", sock->type);*/
 		MPL_snprintf(error_msg, 1024, "sock type is not a SOCKET or a LISTENER, it's %d\n", sock->type);
-		MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+		MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 		return MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %s", error_msg);
 	    }
 	}
 	else
 	{
 #if defined(MPICH_IS_THREADED)
-#           if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL)
+#           if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL)
 	    {
                 int err;
 		/* Reaquire the lock before processing any of the information returned from GetQueuedCompletionStatus */
@@ -2411,15 +2411,15 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 #               error selected multi-threaded implementation is not supported
 #           endif
 #endif
-	    /*MPIDI_FUNC_EXIT(MPID_STATE_GETQUEUEDCOMPLETIONSTATUS);*/ /* Maybe the logging will reset the last error? */
+	    /*MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_GETQUEUEDCOMPLETIONSTATUS);*/ /* Maybe the logging will reset the last error? */
 	    mpi_errno = GetLastError();
 	    /*t2 = PMPI_Wtime();*/
 	    /*printf("[%d] GetQueuedCompletionStatus took %.3f seconds for sock: %d\n", getpid(), t2-t1, sock != NULL ? sock->sock : 0);*/
-	    MPIDI_FUNC_EXIT(MPID_STATE_GETQUEUEDCOMPLETIONSTATUS);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_GETQUEUEDCOMPLETIONSTATUS);
 	    /* interpret error, return appropriate SOCK_ERR_... macro */
 	    if (mpi_errno == WAIT_TIMEOUT)
 	    {
-		MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+		MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 		return MPIDU_SOCK_ERR_TIMEOUT;
 	    }
 	    if (mpi_errno == ERROR_OPERATION_ABORTED)
@@ -2443,7 +2443,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 				out->error = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %s", "Unable to re-post an aborted readv operation");
 				out->user_ptr = sock->user_ptr;
 				sock->state &= ~SOCKI_READING; /* remove the SOCKI_READING bit */
-				MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				return MPI_SUCCESS;
 			    }
 			}
@@ -2460,7 +2460,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 				out->error = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %s", "Unable to re-post an aborted writev operation");
 				out->user_ptr = sock->user_ptr;
 				sock->state &= ~SOCKI_WRITING; /* remove the SOCKI_WRITING bit */
-				MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				return MPI_SUCCESS;
 			    }
 			}
@@ -2471,7 +2471,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			    out->num_bytes = 0;
 			    out->error = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %s", "Aborted socket operation");
 			    out->user_ptr = sock->user_ptr;
-			    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			    return MPI_SUCCESS;
 			}
 		    }
@@ -2482,7 +2482,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			out->num_bytes = 0;
 			out->error = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %s", "Aborted wakeup operation");
 			out->user_ptr = sock->user_ptr;
-			MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			return MPI_SUCCESS;
 		    }
 		    else if (sock->type == SOCKI_LISTENER)
@@ -2500,7 +2500,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 				out->num_bytes = 0;
 				out->error = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %s", "Unable to re-post an aborted accept operation");
 				out->user_ptr = sock->user_ptr;
-				MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				return MPI_SUCCESS;
 			    }
 			}
@@ -2511,7 +2511,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			    out->num_bytes = 0;
 			    out->error = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %s", "Aborted unknown listener socket operation");
 			    out->user_ptr = sock->user_ptr;
-			    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			    return MPI_SUCCESS;
 			}
 		    }
@@ -2522,7 +2522,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			out->num_bytes = 0;
 			out->error = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %s", "Aborted unknown socket operation");
 			out->user_ptr = sock->user_ptr;
-			MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			return MPI_SUCCESS;
 		    }
 		    continue;
@@ -2547,7 +2547,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			    {
 				mpi_errno = WSAGetLastError();
 				mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**closesocket", "**closesocket %s %d", get_error_string(mpi_errno), mpi_errno);
-				MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+				MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 				return mpi_errno;
 			    }
 			    sock->sock = INVALID_SOCKET;
@@ -2580,7 +2580,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 		    out->num_bytes = 0;
 		    out->error = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %s %d", get_error_string(mpi_errno), mpi_errno);
 		    out->user_ptr = sock->user_ptr;
-		    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+		    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 		    return MPI_SUCCESS;
 		}
 		if (sock->type == SOCKI_LISTENER)
@@ -2609,7 +2609,7 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			    else
 				out->error = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %s %d", get_error_string(mpi_errno), mpi_errno);
 			    out->user_ptr = sock->user_ptr;
-			    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			    return MPI_SUCCESS;
 			}
 		    }
@@ -2620,19 +2620,19 @@ int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int timeout, MPIDU_Sock_event_t * out)
 			out->num_bytes = 0;
 			out->error = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %s %d", get_error_string(mpi_errno), mpi_errno);
 			out->user_ptr = sock->user_ptr;
-			MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+			MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 			return MPI_SUCCESS;
 		    }
 		}
 	    }
 
-	    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
 	    return MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %s %d", get_error_string(mpi_errno), mpi_errno);
 	}
     }
     /* Unreachable code section - all sections exit/return before reaching this segment */
     /*
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
     return MPI_SUCCESS;
     */
 }
@@ -2656,13 +2656,13 @@ int MPIDU_Sock_read(MPIDU_Sock_t sock, void * buf, size_t len, size_t * num_read
 {
     int mpi_errno = MPI_SUCCESS;
     MPL_IOV iov;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_READ);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_READ);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_READ);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_READ);
     iov.buf = buf;
     iov.len = len;
     mpi_errno = MPIDU_Sock_readv(sock, &iov, 1, num_read);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_READ);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_READ);
     return mpi_errno;
 }
 
@@ -2675,9 +2675,9 @@ int MPIDU_Sock_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, size_t * num_r
     int mpi_errno = MPI_SUCCESS;
     DWORD nFlags = 0;
     DWORD num_read_local;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_READV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_READV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_READV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_READV);
 
     if (WSARecv(sock->sock, iov, iov_n, &num_read_local, &nFlags, NULL/*overlapped*/, NULL/*completion routine*/) == SOCKET_ERROR)
     {
@@ -2731,7 +2731,7 @@ int MPIDU_Sock_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, size_t * num_r
 	*num_read = num_read_local;
     }
     MPL_DBG_MSG_D(MPIR_DBG_OTHER,TERSE,"sock_readv %d bytes\n", *num_read);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_READV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_READV);
     return mpi_errno;
 }
 
@@ -2743,13 +2743,13 @@ int MPIDU_Sock_write(MPIDU_Sock_t sock, void * buf, size_t len, size_t * num_wri
 {
     int mpi_errno;
     MPL_IOV iov;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_WRITE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_WRITE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_WRITE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_WRITE);
     iov.len = len;
     iov.buf = buf;
     mpi_errno = MPIDU_Sock_writev(sock, &iov, 1, num_written);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WRITE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WRITE);
     return MPI_SUCCESS;
 }
 
@@ -2761,9 +2761,9 @@ int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, size_t * num_
 {
     int mpi_errno;
     DWORD num_written_local;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_WRITEV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_WRITEV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_WRITEV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_WRITEV);
     if (WSASend(sock->sock, iov, iov_n, &num_written_local, 0, NULL/*overlapped*/, NULL/*completion routine*/) == SOCKET_ERROR)
     {
 	mpi_errno = WSAGetLastError();
@@ -2796,7 +2796,7 @@ int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, size_t * num_
 	{
 	    MPL_DBG_MSG_D(MPIR_DBG_OTHER,TERSE,"WSASend failed: error %d\n", mpi_errno);
 	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**fail", "**fail %s %d", get_error_string(mpi_errno), mpi_errno);
-	    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WRITEV);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WRITEV);
 	    return mpi_errno;
 	}
     }
@@ -2806,7 +2806,7 @@ int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, size_t * num_
     }
     MPL_DBG_MSG_D(MPIR_DBG_OTHER,TERSE,"sock_writev %d bytes\n", *num_written);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WRITEV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WRITEV);
     return MPI_SUCCESS;
 }
 
@@ -2817,9 +2817,9 @@ int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, size_t * num_
 int MPIDU_Sock_get_sock_id(MPIDU_Sock_t sock)
 {
     int ret_val;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_SOCK_ID);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_SOCK_ID);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_GET_SOCK_ID);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_GET_SOCK_ID);
     if (sock == MPIDU_SOCK_INVALID_SOCK)
 	ret_val = -1;
     else
@@ -2829,7 +2829,7 @@ int MPIDU_Sock_get_sock_id(MPIDU_Sock_t sock)
 	else
 	    ret_val = (int)sock->sock;
     }
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_GET_SOCK_ID);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_GET_SOCK_ID);
     return ret_val;
 }
 
@@ -2840,11 +2840,11 @@ int MPIDU_Sock_get_sock_id(MPIDU_Sock_t sock)
 int MPIDU_Sock_get_sock_set_id(MPIDU_Sock_set_t set)
 {
     int ret_val;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_SOCK_SET_ID);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_SOCK_SET_ID);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_GET_SOCK_SET_ID);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_GET_SOCK_SET_ID);
     ret_val = PtrToInt(set);/*(int)set;*/
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_GET_SOCK_SET_ID);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_GET_SOCK_SET_ID);
     return ret_val;
 }
 
@@ -2854,9 +2854,9 @@ int MPIDU_Sock_get_sock_set_id(MPIDU_Sock_set_t set)
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIDU_Sock_get_error_class_string(int error, char *error_string, int length)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_ERROR_CLASS_STRING);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_ERROR_CLASS_STRING);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_GET_ERROR_CLASS_STRING);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_GET_ERROR_CLASS_STRING);
     switch (MPIR_ERR_GET_CLASS(error))
     {
     case MPIDU_SOCK_ERR_FAIL:
@@ -2914,6 +2914,6 @@ int MPIDU_Sock_get_error_class_string(int error, char *error_string, int length)
 	MPL_snprintf(error_string, length, "unknown socket error %d", error);
 	break;
     }
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_GET_ERROR_CLASS_STRING);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_GET_ERROR_CLASS_STRING);
     return MPI_SUCCESS;
 }
diff --git a/src/mpid/common/sock/poll/sock_immed.i b/src/mpid/common/sock/poll/sock_immed.i
index ffc7f71..f5203d1 100644
--- a/src/mpid/common/sock/poll/sock_immed.i
+++ b/src/mpid/common/sock/poll/sock_immed.i
@@ -32,9 +32,9 @@ int MPIDU_Sock_accept(struct MPIDU_Sock * listener,
     int nodelay;
     int rc;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_ACCEPT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_ACCEPT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_ACCEPT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_ACCEPT);
 
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
     MPIDU_SOCKI_VALIDATE_SOCK(listener, mpi_errno, fn_exit);
@@ -111,7 +111,7 @@ int MPIDU_Sock_accept(struct MPIDU_Sock * listener,
 	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
                            FCNAME, __LINE__, MPIDU_SOCK_ERR_NO_NEW_SOCK,
 			   "**sock|poll|accept", "**sock|poll|accept %d %s", 
-			   errno, MPIU_Strerror(errno));
+			   errno, MPIR_Strerror(errno));
 	}
 	
 	goto fn_fail;
@@ -126,7 +126,7 @@ int MPIDU_Sock_accept(struct MPIDU_Sock * listener,
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, 
 			 FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
 			 "**sock|poll|nonblock", "**sock|poll|nonblock %d %s", 
-			 errno, MPIU_Strerror(errno));
+			 errno, MPIR_Strerror(errno));
 	goto fn_fail;
     }
     /* --END ERROR HANDLING-- */
@@ -137,7 +137,7 @@ int MPIDU_Sock_accept(struct MPIDU_Sock * listener,
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, 
 			 FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
 			 "**sock|poll|nonblock", "**sock|poll|nonblock %d %s",
-			 errno, MPIU_Strerror(errno));
+			 errno, MPIR_Strerror(errno));
 	goto fn_fail;
     }
     /* --END ERROR HANDLING-- */
@@ -150,7 +150,7 @@ int MPIDU_Sock_accept(struct MPIDU_Sock * listener,
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, 
 			 FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
 			 "**sock|poll|nodelay", "**sock|poll|nodelay %d %s", 
-                         errno, MPIU_Strerror(errno));
+                         errno, MPIR_Strerror(errno));
 	goto fn_fail;
     }
     /* --END ERROR HANDLING-- */
@@ -238,7 +238,7 @@ int MPIDU_Sock_accept(struct MPIDU_Sock * listener,
     *sockp = sock;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_ACCEPT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_ACCEPT);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -265,10 +265,10 @@ int MPIDU_Sock_read(MPIDU_Sock_t sock, void * buf, size_t len,
     struct pollinfo * pollinfo;
     size_t nb;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_READ);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_READ);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_READ);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_READ);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_READ);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_READ);
 
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
     MPIDU_SOCKI_VALIDATE_SOCK(sock, mpi_errno, fn_exit);
@@ -294,9 +294,9 @@ int MPIDU_Sock_read(MPIDU_Sock_t sock, void * buf, size_t len,
     
     do
     {
-	MPIDI_FUNC_ENTER(MPID_STATE_READ);
+	MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_READ);
 	nb = read(pollinfo->fd, buf, len);
-	MPIDI_FUNC_EXIT(MPID_STATE_READ);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_READ);
     }
     while (nb == -1 && errno == EINTR);
 
@@ -367,7 +367,7 @@ int MPIDU_Sock_read(MPIDU_Sock_t sock, void * buf, size_t len,
     /* --END ERROR HANDLING-- */
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_READ);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_READ);
     return mpi_errno;
 }
 /* end MPIDU_Sock_read() */
@@ -384,10 +384,10 @@ int MPIDU_Sock_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n,
     struct pollinfo * pollinfo;
     ssize_t nb;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_READV);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_READV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_READV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_READV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_READV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_READV);
     
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
     MPIDU_SOCKI_VALIDATE_SOCK(sock, mpi_errno, fn_exit);
@@ -407,9 +407,9 @@ int MPIDU_Sock_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n,
      */
     do
     {
-	MPIDI_FUNC_ENTER(MPID_STATE_READV);
+	MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_READV);
 	nb = MPL_large_readv(pollinfo->fd, iov, iov_n);
-	MPIDI_FUNC_EXIT(MPID_STATE_READV);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_READV);
     }
     while (nb == -1 && errno == EINTR);
 
@@ -481,7 +481,7 @@ int MPIDU_Sock_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n,
     /* --END ERROR HANDLING-- */
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_READV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_READV);
     return mpi_errno;
 }
 /* end MPIDU_Sock_readv() */
@@ -498,10 +498,10 @@ int MPIDU_Sock_write(MPIDU_Sock_t sock, void * buf, size_t len,
     struct pollinfo * pollinfo;
     ssize_t nb;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_WRITE);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_WRITE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_WRITE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_WRITE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_WRITE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_WRITE);
     
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
     MPIDU_SOCKI_VALIDATE_SOCK(sock, mpi_errno, fn_exit);
@@ -521,9 +521,9 @@ int MPIDU_Sock_write(MPIDU_Sock_t sock, void * buf, size_t len,
     
     do
     {
-	MPIDI_FUNC_ENTER(MPID_STATE_WRITE);
+	MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_WRITE);
 	nb = write(pollinfo->fd, buf, len);
-	MPIDI_FUNC_EXIT(MPID_STATE_WRITE);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_WRITE);
     }
     while (nb == -1 && errno == EINTR);
 
@@ -567,7 +567,7 @@ int MPIDU_Sock_write(MPIDU_Sock_t sock, void * buf, size_t len,
     /* --END ERROR HANDLING-- */
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WRITE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WRITE);
     return mpi_errno;
 }
 /* end MPIDU_Sock_write() */
@@ -583,10 +583,10 @@ int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, size_t * num_
     struct pollinfo * pollinfo;
     ssize_t nb;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_WRITEV);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_WRITEV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_WRITEV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_WRITEV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_WRITEV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_WRITEV);
     
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
     MPIDU_SOCKI_VALIDATE_SOCK(sock, mpi_errno, fn_exit);
@@ -606,9 +606,9 @@ int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, size_t * num_
      */
     do
     {
-	MPIDI_FUNC_ENTER(MPID_STATE_WRITEV);
+	MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_WRITEV);
 	nb = MPL_large_writev(pollinfo->fd, iov, iov_n);
-	MPIDI_FUNC_EXIT(MPID_STATE_WRITEV);
+	MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_WRITEV);
     }
     while (nb == -1 && errno == EINTR);
 
@@ -652,7 +652,7 @@ int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, size_t * num_
     /* --END ERROR HANDLING-- */
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WRITEV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WRITEV);
     return mpi_errno;
 }
 /* end MPIDU_Sock_writev() */
@@ -665,9 +665,9 @@ int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, size_t * num_
 int MPIDU_Sock_wakeup(struct MPIDU_Sock_set * sock_set)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_WAKEUP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_WAKEUP);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_WAKEUP);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_WAKEUP);
 
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
     MPIDU_SOCKI_VALIDATE_SOCK_SET(sock_set, mpi_errno, fn_exit);
@@ -693,7 +693,7 @@ int MPIDU_Sock_wakeup(struct MPIDU_Sock_set * sock_set)
 #ifdef MPICH_IS_THREADED
     fn_exit:
 #endif
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAKEUP);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAKEUP);
     return mpi_errno;
 }
 /* end MPIDU_Sock_wakeup() */
diff --git a/src/mpid/common/sock/poll/sock_init.i b/src/mpid/common/sock/poll/sock_init.i
index 018db73..70ec261 100644
--- a/src/mpid/common/sock/poll/sock_init.i
+++ b/src/mpid/common/sock/poll/sock_init.i
@@ -17,9 +17,9 @@ MPL_dbg_class MPIDU_DBG_SOCK_CONNECT;
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIDU_Sock_init(void)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_INIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_INIT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_INIT);
 
 #if defined (MPL_USE_DBG_LOGGING)
     MPIDU_DBG_SOCK_CONNECT = MPL_dbg_class_alloc("SOCK_CONNECT", "sock_connect");
@@ -27,7 +27,7 @@ int MPIDU_Sock_init(void)
 
     MPIDU_Socki_initialized++;
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_INIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_INIT);
     return MPI_SUCCESS;
 }
 
@@ -39,11 +39,11 @@ int MPIDU_Sock_init(void)
 int MPIDU_Sock_finalize(void)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_FINALIZE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_FINALIZE);
 
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
     
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_FINALIZE);
 
     MPIDU_Socki_initialized--;
 
@@ -55,6 +55,6 @@ int MPIDU_Sock_finalize(void)
 #ifdef USE_SOCK_VERIFY
   fn_exit:
 #endif
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_FINALIZE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_FINALIZE);
     return mpi_errno;
 }
diff --git a/src/mpid/common/sock/poll/sock_misc.i b/src/mpid/common/sock/poll/sock_misc.i
index 983071b..bb8b1a1 100644
--- a/src/mpid/common/sock/poll/sock_misc.i
+++ b/src/mpid/common/sock/poll/sock_misc.i
@@ -20,9 +20,9 @@ int MPIDU_Sock_get_host_description(int myRank,
     char * env_hostname;
     int rc;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
     
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
     /* --BEGIN ERROR HANDLING-- */
@@ -80,14 +80,14 @@ int MPIDU_Sock_get_host_description(int myRank,
 	    else
 	    {
 		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
-						 "**sock|oserror", "**sock|poll|oserror %d %s", errno, MPIU_Strerror(errno));
+						 "**sock|oserror", "**sock|poll|oserror %d %s", errno, MPIR_Strerror(errno));
 	    }
 	}
 	/* --END ERROR HANDLING-- */
     }
 
  fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION);
     return mpi_errno;
 }
 
@@ -105,9 +105,9 @@ int MPIDU_Sock_native_to_sock(struct MPIDU_Sock_set * sock_set, MPIDU_SOCK_NATIV
     int rc;
     long flags;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_SOCK_NATIVE_TO_SOCK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SOCK_NATIVE_TO_SOCK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_SOCK_NATIVE_TO_SOCK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_SOCK_NATIVE_TO_SOCK);
 
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
 
@@ -131,7 +131,7 @@ int MPIDU_Sock_native_to_sock(struct MPIDU_Sock_set * sock_set, MPIDU_SOCK_NATIV
     if (flags == -1)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
-					 "**sock|poll|nonblock", "**sock|poll|nonblock %d %s", errno, MPIU_Strerror(errno));
+					 "**sock|poll|nonblock", "**sock|poll|nonblock %d %s", errno, MPIR_Strerror(errno));
 	goto fn_fail;
     }
     /* --END ERROR HANDLING-- */
@@ -140,7 +140,7 @@ int MPIDU_Sock_native_to_sock(struct MPIDU_Sock_set * sock_set, MPIDU_SOCK_NATIV
     if (rc == -1)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
-					 "**sock|poll|nonblock", "**sock|poll|nonblock %d %s", errno, MPIU_Strerror(errno));
+					 "**sock|poll|nonblock", "**sock|poll|nonblock %d %s", errno, MPIR_Strerror(errno));
 	goto fn_fail;
     }
     /* --END ERROR HANDLING-- */
@@ -158,7 +158,7 @@ int MPIDU_Sock_native_to_sock(struct MPIDU_Sock_set * sock_set, MPIDU_SOCK_NATIV
     *sockp = sock;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_SOCK_NATIVE_TO_SOCK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_SOCK_NATIVE_TO_SOCK);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -180,9 +180,9 @@ int MPIDU_Sock_native_to_sock(struct MPIDU_Sock_set * sock_set, MPIDU_SOCK_NATIV
 int MPIDU_Sock_set_user_ptr(struct MPIDU_Sock * sock, void * user_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
     
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
 
@@ -202,7 +202,7 @@ int MPIDU_Sock_set_user_ptr(struct MPIDU_Sock * sock, void * user_ptr)
 #ifdef USE_SOCK_VERIFY
   fn_exit:
 #endif
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_SET_USER_PTR);
     return mpi_errno;
 }
 
@@ -214,9 +214,9 @@ int MPIDU_Sock_set_user_ptr(struct MPIDU_Sock * sock, void * user_ptr)
 int MPIDU_Sock_get_sock_id(struct MPIDU_Sock * sock)
 {
     int id;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_SOCK_ID);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_SOCK_ID);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_GET_SOCK_ID);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_GET_SOCK_ID);
 
     if (sock != MPIDU_SOCK_INVALID_SOCK)
     {
@@ -234,7 +234,7 @@ int MPIDU_Sock_get_sock_id(struct MPIDU_Sock * sock)
 	id = -1;
     }
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_GET_SOCK_ID);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_GET_SOCK_ID);
     return id;
 }
 
@@ -245,9 +245,9 @@ int MPIDU_Sock_get_sock_id(struct MPIDU_Sock * sock)
 int MPIDU_Sock_get_sock_set_id(struct MPIDU_Sock_set * sock_set)
 {
     int id;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_SOCK_SET_ID);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_SOCK_SET_ID);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_GET_SOCK_SET_ID);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_GET_SOCK_SET_ID);
 
     if (sock_set != MPIDU_SOCK_INVALID_SET)
     {    
@@ -258,7 +258,7 @@ int MPIDU_Sock_get_sock_set_id(struct MPIDU_Sock_set * sock_set)
 	id = -1;
     }
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_GET_SOCK_SET_ID);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_GET_SOCK_SET_ID);
     return id;
 }
 
@@ -278,9 +278,9 @@ int MPIDU_Sock_get_sock_set_id(struct MPIDU_Sock_set * sock_set)
 /* --BEGIN ERROR HANDLING-- */
 int MPIDU_Sock_get_error_class_string(int error, char *error_string, size_t length)
 {
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_ERROR_CLASS_STRING);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_GET_ERROR_CLASS_STRING);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_GET_ERROR_CLASS_STRING);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_GET_ERROR_CLASS_STRING);
     switch (MPIR_ERR_GET_CLASS(error))
     {
     case MPIDU_SOCK_ERR_FAIL:
@@ -338,7 +338,7 @@ int MPIDU_Sock_get_error_class_string(int error, char *error_string, size_t leng
 	MPL_snprintf(error_string, length, "unknown socket error %d", error);
 	break;
     }
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_GET_ERROR_CLASS_STRING);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_GET_ERROR_CLASS_STRING);
     return MPI_SUCCESS;
 }
 /* --END ERROR HANDLING-- */
diff --git a/src/mpid/common/sock/poll/sock_post.i b/src/mpid/common/sock/poll/sock_post.i
index 9c1f6d7..61818d6 100644
--- a/src/mpid/common/sock/poll/sock_post.i
+++ b/src/mpid/common/sock/poll/sock_post.i
@@ -43,9 +43,9 @@ int MPIDU_Sock_post_connect_ifaddr( struct MPIDU_Sock_set * sock_set,
     int nodelay;
     int rc;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_CONNECT_IFADDR);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_CONNECT_IFADDR);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_POST_CONNECT_IFADDR);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_POST_CONNECT_IFADDR);
 
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
 
@@ -60,21 +60,21 @@ int MPIDU_Sock_post_connect_ifaddr( struct MPIDU_Sock_set * sock_set,
 	   but should be interpreted as an System Error string) */
 	MPIR_ERR_SETANDJUMP2(mpi_errno,MPIDU_SOCK_ERR_FAIL,
 			     "**sock|poll|socket", 
-		    "**sock|poll|socket %d %s", errno, MPIU_Strerror(errno));
+		    "**sock|poll|socket %d %s", errno, MPIR_Strerror(errno));
     }
 
     flags = fcntl(fd, F_GETFL, 0);
     if (flags == -1) {
 	MPIR_ERR_SETANDJUMP2(mpi_errno,MPIDU_SOCK_ERR_FAIL,
 			     "**sock|poll|nonblock", 
-                    "**sock|poll|nonblock %d %s", errno, MPIU_Strerror(errno));
+                    "**sock|poll|nonblock %d %s", errno, MPIR_Strerror(errno));
     }
     rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
     if (rc == -1) {
 	MPIR_ERR_SETANDJUMP2( mpi_errno, MPIDU_SOCK_ERR_FAIL,
 			      "**sock|poll|nonblock", 
 			      "**sock|poll|nonblock %d %s",
-			      errno, MPIU_Strerror(errno));
+			      errno, MPIR_Strerror(errno));
     }
 
     nodelay = 1;
@@ -83,7 +83,7 @@ int MPIDU_Sock_post_connect_ifaddr( struct MPIDU_Sock_set * sock_set,
 	MPIR_ERR_SETANDJUMP2(mpi_errno,MPIDU_SOCK_ERR_FAIL,
 			     "**sock|poll|nodelay", 
 			     "**sock|poll|nodelay %d %s", 
-			     errno, MPIU_Strerror(errno));
+			     errno, MPIR_Strerror(errno));
     }
 
     /*
@@ -171,7 +171,7 @@ int MPIDU_Sock_post_connect_ifaddr( struct MPIDU_Sock_set * sock_set,
 	    MPIDU_SOCKI_EVENT_ENQUEUE(pollinfo, MPIDU_SOCK_OP_CONNECT, 0, user_ptr, MPIR_Err_create_code(
 		MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_CONN_FAILED,
 		"**sock|oserror", "**sock|poll|oserror %d %d %d %s", pollinfo->sock_set->id, pollinfo->sock_id, errno,
-		MPIU_Strerror(errno)), mpi_errno, fn_fail);
+		MPIR_Strerror(errno)), mpi_errno, fn_fail);
 	}
     }
     /* --END ERROR HANDLING-- */
@@ -179,7 +179,7 @@ int MPIDU_Sock_post_connect_ifaddr( struct MPIDU_Sock_set * sock_set,
     *sockp = sock;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT_IFADDR);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CONNECT_IFADDR);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -263,9 +263,9 @@ int MPIDU_Sock_listen(struct MPIDU_Sock_set * sock_set, void * user_ptr,
     socklen_t addr_len;
     int rc;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_LISTEN);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_LISTEN);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_LISTEN);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_LISTEN);
 
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
     /* --BEGIN ERROR HANDLING-- */
@@ -285,7 +285,7 @@ int MPIDU_Sock_listen(struct MPIDU_Sock_set * sock_set, void * user_ptr,
     if (fd == -1)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
-					 "**sock|poll|socket", "**sock|poll|socket %d %s", errno, MPIU_Strerror(errno));
+					 "**sock|poll|socket", "**sock|poll|socket %d %s", errno, MPIR_Strerror(errno));
 	goto fn_fail;
     }
     /* --END ERROR HANDLING-- */
@@ -299,7 +299,7 @@ int MPIDU_Sock_listen(struct MPIDU_Sock_set * sock_set, void * user_ptr,
 	if (rc == -1)
 	{
 	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
-					     "**sock|poll|reuseaddr", "**sock|poll|reuseaddr %d %s", errno, MPIU_Strerror(errno));
+					     "**sock|poll|reuseaddr", "**sock|poll|reuseaddr %d %s", errno, MPIR_Strerror(errno));
 	    goto fn_fail;
 	}
 	/* --END ERROR HANDLING-- */
@@ -311,7 +311,7 @@ int MPIDU_Sock_listen(struct MPIDU_Sock_set * sock_set, void * user_ptr,
     if (flags == -1)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
-					 "**sock|poll|nonblock", "**sock|poll|nonblock %d %s", errno, MPIU_Strerror(errno));
+					 "**sock|poll|nonblock", "**sock|poll|nonblock %d %s", errno, MPIR_Strerror(errno));
 	goto fn_fail;
     }
     /* --END ERROR HANDLING-- */
@@ -320,7 +320,7 @@ int MPIDU_Sock_listen(struct MPIDU_Sock_set * sock_set, void * user_ptr,
     if (rc == -1)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
-					 "**sock|poll|nonblock", "**sock|poll|nonblock %d %s", errno, MPIU_Strerror(errno));
+					 "**sock|poll|nonblock", "**sock|poll|nonblock %d %s", errno, MPIR_Strerror(errno));
 	goto fn_fail;
     }
     /* --END ERROR HANDLING-- */
@@ -364,7 +364,7 @@ int MPIDU_Sock_listen(struct MPIDU_Sock_set * sock_set, void * user_ptr,
     if (rc == -1)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
-					 "**sock|poll|bind", "**sock|poll|bind %d %d %s", *port, errno, MPIU_Strerror(errno));
+					 "**sock|poll|bind", "**sock|poll|bind %d %d %s", *port, errno, MPIR_Strerror(errno));
 	goto fn_fail;
     }
     /* --END ERROR HANDLING-- */
@@ -383,7 +383,7 @@ int MPIDU_Sock_listen(struct MPIDU_Sock_set * sock_set, void * user_ptr,
     if (rc == -1)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
-					 "**sock|poll|listen", "**sock|poll|listen %d %s", errno, MPIU_Strerror(errno));
+					 "**sock|poll|listen", "**sock|poll|listen %d %s", errno, MPIR_Strerror(errno));
 	goto fn_fail;
     }
     /* --END ERROR HANDLING-- */
@@ -398,7 +398,7 @@ int MPIDU_Sock_listen(struct MPIDU_Sock_set * sock_set, void * user_ptr,
     if (rc == -1)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
-					 "**sock|getport", "**sock|poll|getport %d %s", errno, MPIU_Strerror(errno));
+					 "**sock|getport", "**sock|poll|getport %d %s", errno, MPIR_Strerror(errno));
 	goto fn_fail;
     }
     /* --END ERROR HANDLING-- */
@@ -432,7 +432,7 @@ int MPIDU_Sock_listen(struct MPIDU_Sock_set * sock_set, void * user_ptr,
     *sockp = sock;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_LISTEN);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_LISTEN);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -459,9 +459,9 @@ int MPIDU_Sock_post_read(struct MPIDU_Sock * sock, void * buf, size_t minlen, si
     struct pollfd * pollfd;
     struct pollinfo * pollinfo;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_READ);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_READ);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_POST_READ);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_POST_READ);
 
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
     MPIDU_SOCKI_VALIDATE_SOCK(sock, mpi_errno, fn_exit);
@@ -493,7 +493,7 @@ int MPIDU_Sock_post_read(struct MPIDU_Sock * sock, void * buf, size_t minlen, si
     MPIDU_SOCKI_POLLFD_OP_SET(pollfd, pollinfo, POLLIN);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_READ);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_READ);
     return mpi_errno;
 }
 /* end MPIDU_Sock_post_read() */
@@ -508,9 +508,9 @@ int MPIDU_Sock_post_readv(struct MPIDU_Sock * sock, MPL_IOV * iov, int iov_n, MP
     struct pollfd * pollfd;
     struct pollinfo * pollinfo;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_READV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_READV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_POST_READV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_POST_READV);
 
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
     MPIDU_SOCKI_VALIDATE_SOCK(sock, mpi_errno, fn_exit);
@@ -542,7 +542,7 @@ int MPIDU_Sock_post_readv(struct MPIDU_Sock * sock, MPL_IOV * iov, int iov_n, MP
     MPIDU_SOCKI_POLLFD_OP_SET(pollfd, pollinfo, POLLIN);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_READV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_READV);
     return mpi_errno;
 }
 /* end MPIDU_Sock_post_readv() */
@@ -558,9 +558,9 @@ int MPIDU_Sock_post_write(struct MPIDU_Sock * sock, void * buf, size_t minlen, s
     struct pollfd * pollfd;
     struct pollinfo * pollinfo;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_WRITE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_WRITE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_POST_WRITE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_POST_WRITE);
     
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
     MPIDU_SOCKI_VALIDATE_SOCK(sock, mpi_errno, fn_exit);
@@ -592,7 +592,7 @@ int MPIDU_Sock_post_write(struct MPIDU_Sock * sock, void * buf, size_t minlen, s
     MPIDU_SOCKI_POLLFD_OP_SET(pollfd, pollinfo, POLLOUT);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_WRITE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_WRITE);
     return mpi_errno;
 }
 /* end MPIDU_Sock_post_write() */
@@ -607,9 +607,9 @@ int MPIDU_Sock_post_writev(struct MPIDU_Sock * sock, MPL_IOV * iov, int iov_n, M
     struct pollfd * pollfd;
     struct pollinfo * pollinfo;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_WRITEV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_WRITEV);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_POST_WRITEV);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_POST_WRITEV);
     
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
     MPIDU_SOCKI_VALIDATE_SOCK(sock, mpi_errno, fn_exit);
@@ -641,7 +641,7 @@ int MPIDU_Sock_post_writev(struct MPIDU_Sock * sock, MPL_IOV * iov, int iov_n, M
     MPIDU_SOCKI_POLLFD_OP_SET(pollfd, pollinfo, POLLOUT);
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_WRITEV);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_WRITEV);
     return mpi_errno;
 }
 /* end MPIDU_Sock_post_writev() */
@@ -657,9 +657,9 @@ int MPIDU_Sock_post_close(struct MPIDU_Sock * sock)
     struct pollinfo * pollinfo;
     
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
 
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
     MPIDU_SOCKI_VALIDATE_SOCK(sock, mpi_errno, fn_exit);
@@ -722,7 +722,7 @@ int MPIDU_Sock_post_close(struct MPIDU_Sock * sock)
     pollinfo->state = MPIDU_SOCKI_STATE_CLOSING;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_POST_CLOSE);
     return mpi_errno;
 }
 
diff --git a/src/mpid/common/sock/poll/sock_set.i b/src/mpid/common/sock/poll/sock_set.i
index f1dabce..fcdd78c 100644
--- a/src/mpid/common/sock/poll/sock_set.i
+++ b/src/mpid/common/sock/poll/sock_set.i
@@ -13,9 +13,9 @@ int MPIDU_Sock_create_set(struct MPIDU_Sock_set ** sock_setp)
 {
     struct MPIDU_Sock_set * sock_set = NULL;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_CREATE_SET);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_CREATE_SET);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_CREATE_SET);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_CREATE_SET);
     
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
 
@@ -80,7 +80,7 @@ int MPIDU_Sock_create_set(struct MPIDU_Sock_set ** sock_setp)
 	if (rc != 0)
 	{
 	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
-					     "**sock|poll|pipe", "**sock|poll|pipe %d %s", errno, MPIU_Strerror(errno));
+					     "**sock|poll|pipe", "**sock|poll|pipe %d %s", errno, MPIR_Strerror(errno));
 	    goto fn_fail;
 	}
 	/* --END ERROR HANDLING-- */
@@ -91,7 +91,7 @@ int MPIDU_Sock_create_set(struct MPIDU_Sock_set ** sock_setp)
 	{
 	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
 					     "**sock|poll|pipenonblock", "**sock|poll|pipenonblock %d %s",
-					     errno, MPIU_Strerror(errno));
+					     errno, MPIR_Strerror(errno));
 	    goto fn_fail;
 	}
 	/* --END ERROR HANDLING-- */
@@ -102,7 +102,7 @@ int MPIDU_Sock_create_set(struct MPIDU_Sock_set ** sock_setp)
 	{
 	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
 					     "**sock|poll|pipenonblock", "**sock|poll|pipenonblock %d %s",
-					     errno, MPIU_Strerror(errno));
+					     errno, MPIR_Strerror(errno));
 	    goto fn_fail;
 	}
 	/* --END ERROR HANDLING-- */
@@ -140,7 +140,7 @@ int MPIDU_Sock_create_set(struct MPIDU_Sock_set ** sock_setp)
     *sock_setp = sock_set;
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_CREATE_SET);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_CREATE_SET);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -180,9 +180,9 @@ int MPIDU_Sock_close_open_sockets(struct MPIDU_Sock_set * sock_set, void** user_
     int mpi_errno = MPI_SUCCESS;
     struct pollinfo * pollinfos = NULL;
     pollinfos = sock_set->pollinfos;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_CLOSE_OPEN_SOCKETS);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_CLOSE_OPEN_SOCKETS);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_CLOSE_OPEN_SOCKETS);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_CLOSE_OPEN_SOCKETS);
 
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
     /* wakeup waiting socket if mullti-threades */
@@ -198,7 +198,7 @@ int MPIDU_Sock_close_open_sockets(struct MPIDU_Sock_set * sock_set, void** user_
 #ifdef USE_SOCK_VERIFY
   fn_exit:
 #endif
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_CLOSE_OPEN_SOCKETS);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_CLOSE_OPEN_SOCKETS);
     return mpi_errno;
 }
 
@@ -212,9 +212,9 @@ int MPIDU_Sock_destroy_set(struct MPIDU_Sock_set * sock_set)
     int elem;
     struct MPIDU_Sock_event event;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
 
     MPIDU_SOCKI_VERIFY_INIT(mpi_errno, fn_exit);
 
@@ -278,6 +278,6 @@ int MPIDU_Sock_destroy_set(struct MPIDU_Sock_set * sock_set)
 #ifdef USE_SOCK_VERIFY
   fn_exit:
 #endif
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_DESTROY_SET);
     return mpi_errno;
 }
diff --git a/src/mpid/common/sock/poll/sock_wait.i b/src/mpid/common/sock/poll/sock_wait.i
index a9cb59d..658ac23 100644
--- a/src/mpid/common/sock/poll/sock_wait.i
+++ b/src/mpid/common/sock/poll/sock_wait.i
@@ -7,7 +7,7 @@
 
 /* Make sure that we can properly ensure atomic access to the poll routine */
 #ifdef MPICH_IS_THREADED
-#if !(MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL)
+#if !(MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL)
 #error selected multi-threaded implementation is not supported
 #endif
 #endif
@@ -47,10 +47,10 @@ int MPIDU_Sock_wait(struct MPIDU_Sock_set * sock_set, int millisecond_timeout,
 		    struct MPIDU_Sock_event * eventp)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_WAIT);
-    MPIDI_STATE_DECL(MPID_STATE_POLL);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCK_WAIT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_POLL);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCK_WAIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCK_WAIT);
 
     for (;;)
     { 
@@ -106,10 +106,10 @@ int MPIDU_Sock_wait(struct MPIDU_Sock_set * sock_set, int millisecond_timeout,
 	{
 #	    ifndef MPICH_IS_THREADED
 	    {
-		MPIDI_FUNC_ENTER(MPID_STATE_POLL);
+		MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_POLL);
 		n_fds = poll(sock_set->pollfds, sock_set->poll_array_elems, 
 			     millisecond_timeout);
-		MPIDI_FUNC_EXIT(MPID_STATE_POLL);
+		MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_POLL);
 	    }
 #	    else /* MPICH_IS_THREADED */
 	    {
@@ -119,10 +119,10 @@ int MPIDU_Sock_wait(struct MPIDU_Sock_set * sock_set, int millisecond_timeout,
 		 multithreaded code (and we don't then need the 
 		 MPIR_THREAD_CHECK_BEGIN/END macros) */
 		if (!MPIR_ThreadInfo.isThreaded) {
-		    MPIDI_FUNC_ENTER(MPID_STATE_POLL);
+		    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_POLL);
 		    n_fds = poll(sock_set->pollfds, sock_set->poll_array_elems, 
 				 millisecond_timeout);
-		    MPIDI_FUNC_EXIT(MPID_STATE_POLL);
+		    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_POLL);
 		}
 		else
 		{    
@@ -131,9 +131,9 @@ int MPIDU_Sock_wait(struct MPIDU_Sock_set * sock_set, int millisecond_timeout,
 		 * progress can be made.  This avoids the lock manipulation
 		 * overhead.
 		 */
-		MPIDI_FUNC_ENTER(MPID_STATE_POLL);
+		MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_POLL);
 		n_fds = poll(sock_set->pollfds, sock_set->poll_array_elems, 0);
-		MPIDI_FUNC_EXIT(MPID_STATE_POLL);
+		MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_POLL);
 		
 		if (n_fds == 0 && millisecond_timeout != 0)
 		{
@@ -160,10 +160,10 @@ int MPIDU_Sock_wait(struct MPIDU_Sock_set * sock_set, int millisecond_timeout,
 				    MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); */
 		    MPID_Thread_mutex_unlock(&MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX, &err);
 			    
-		    MPIDI_FUNC_ENTER(MPID_STATE_POLL);
+		    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_POLL);
 		    n_fds = poll(sock_set->pollfds_active, 
 				 pollfds_active_elems, millisecond_timeout);
-		    MPIDI_FUNC_EXIT(MPID_STATE_POLL);
+		    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_POLL);
 		    
 		    /* Reaquire the lock before processing any of the 
 		       information returned from poll */
@@ -217,7 +217,7 @@ int MPIDU_Sock_wait(struct MPIDU_Sock_set * sock_set, int millisecond_timeout,
 	    else
 	    {
 		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL,
-						 "**sock|oserror", "**sock|poll|oserror %d %s", errno, MPIU_Strerror(errno));
+						 "**sock|oserror", "**sock|poll|oserror %d %s", errno, MPIR_Strerror(errno));
 		goto fn_exit;
 	    }
 	    /* --END ERROR HANDLING-- */
@@ -236,8 +236,8 @@ int MPIDU_Sock_wait(struct MPIDU_Sock_set * sock_set, int millisecond_timeout,
 	    struct pollfd * const pollfd = &sock_set->pollfds[elem];
 	    struct pollinfo * const pollinfo = &sock_set->pollinfos[elem];
 	
-	    MPIU_Assert((pollfd->events & (POLLIN | POLLOUT)) || pollfd->fd == -1);
-	    MPIU_Assert(pollfd->fd >= 0 || pollfd->fd == -1);
+	    MPIR_Assert((pollfd->events & (POLLIN | POLLOUT)) || pollfd->fd == -1);
+	    MPIR_Assert(pollfd->fd >= 0 || pollfd->fd == -1);
 			
 	    if (pollfd->fd < 0 || pollfd->revents == 0)
 	    {
@@ -389,7 +389,7 @@ int MPIDU_Sock_wait(struct MPIDU_Sock_set * sock_set, int millisecond_timeout,
     }
     
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCK_WAIT);
     return mpi_errno;
 }
 
@@ -400,9 +400,9 @@ int MPIDU_Sock_wait(struct MPIDU_Sock_set * sock_set, int millisecond_timeout,
 static int MPIDU_Socki_handle_pollhup(struct pollfd * const pollfd, struct pollinfo * const pollinfo)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCKI_HANDLE_POLLHUP);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCKI_HANDLE_POLLHUP);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCKI_HANDLE_POLLHUP);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCKI_HANDLE_POLLHUP);
     
     if (pollinfo->state == MPIDU_SOCKI_STATE_CONNECTED_RW)
     {
@@ -435,21 +435,21 @@ static int MPIDU_Socki_handle_pollhup(struct pollfd * const pollfd, struct polli
 	 * data has been read, the POLLIN handler will change the connection state and remove the connection from the active poll
 	 * list.
 	 */
-	MPIU_Assert(pollinfo->state == MPIDU_SOCKI_STATE_CONNECTED_RO && (pollfd->events & POLLIN) && (pollfd->revents & POLLIN));
+	MPIR_Assert(pollinfo->state == MPIDU_SOCKI_STATE_CONNECTED_RO && (pollfd->events & POLLIN) && (pollfd->revents & POLLIN));
     }
     else if (pollinfo->state == MPIDU_SOCKI_STATE_DISCONNECTED)
     {
 	/*
 	 * We should never reach this state because pollfd->fd should be set to -1 if we are in the disconnected state.
 	 */
-	MPIU_Assert(pollinfo->state == MPIDU_SOCKI_STATE_DISCONNECTED && pollfd->fd == -1);
+	MPIR_Assert(pollinfo->state == MPIDU_SOCKI_STATE_DISCONNECTED && pollfd->fd == -1);
     }
     else if (pollinfo->state == MPIDU_SOCKI_STATE_CONNECTING)
     {
 	/*
 	 * The process we were connecting to died.  Let the POLLOUT handler deal with the error.
 	 */
-	MPIU_Assert(pollinfo->state == MPIDU_SOCKI_STATE_CONNECTING && (pollfd->events & POLLOUT));
+	MPIR_Assert(pollinfo->state == MPIDU_SOCKI_STATE_CONNECTING && (pollfd->events & POLLOUT));
 	pollfd->revents = POLLOUT;
     }
     /* --BEGIN ERROR HANDLING-- */
@@ -463,7 +463,7 @@ static int MPIDU_Socki_handle_pollhup(struct pollfd * const pollfd, struct polli
     /* --END ERROR HANDLING-- */
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCKI_HANDLE_POLLHUP);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCKI_HANDLE_POLLHUP);
     return mpi_errno;
 }
 /* end MPIDU_Socki_handle_pollhup() */
@@ -476,9 +476,9 @@ static int MPIDU_Socki_handle_pollhup(struct pollfd * const pollfd, struct polli
 static int MPIDU_Socki_handle_pollerr(struct pollfd * const pollfd, struct pollinfo * const pollinfo)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCKI_HANDLE_POLLERR);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCKI_HANDLE_POLLERR);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCKI_HANDLE_POLLERR);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCKI_HANDLE_POLLERR);
 
     /* --BEGIN ERROR HANDLING-- */
     if (pollinfo->type != MPIDU_SOCKI_TYPE_COMMUNICATION)
@@ -497,7 +497,7 @@ static int MPIDU_Socki_handle_pollerr(struct pollfd * const pollfd, struct polli
 	 * continuously triggered.  We remove the socket from the poll list (pollfd->fd = 1) in order to prevent this issue.
 	 * Here, we simple check that things are as we expect them to be.
 	 */
-	MPIU_Assert((pollfd->events & (POLLIN | POLLOUT)) || pollfd->fd == -1);
+	MPIR_Assert((pollfd->events & (POLLIN | POLLOUT)) || pollfd->fd == -1);
 
 	/* If a write was posted then cancel it and generate an write completion event */
 	if (pollfd->events & POLLOUT)
@@ -533,20 +533,20 @@ static int MPIDU_Socki_handle_pollerr(struct pollfd * const pollfd, struct polli
 	 * data has been read, the POLLIN handler will change the connection state and remove the connection from the active poll
 	 * list.
 	 */
-	MPIU_Assert(pollinfo->state == MPIDU_SOCKI_STATE_CONNECTED_RO && (pollfd->events & POLLIN) && (pollfd->revents & POLLIN));
+	MPIR_Assert(pollinfo->state == MPIDU_SOCKI_STATE_CONNECTED_RO && (pollfd->events & POLLIN) && (pollfd->revents & POLLIN));
     }
     else if (pollinfo->state == MPIDU_SOCKI_STATE_CONNECTING)
     {
 	/*
 	 * The process we were connecting to died.  Let the POLLOUT handler deal with the error.
 	 */
-	MPIU_Assert(pollinfo->state == MPIDU_SOCKI_STATE_CONNECTING && (pollfd->events & POLLOUT));
+	MPIR_Assert(pollinfo->state == MPIDU_SOCKI_STATE_CONNECTING && (pollfd->events & POLLOUT));
 	pollfd->revents = POLLOUT;
     }
     else if (pollinfo->state == MPIDU_SOCKI_STATE_DISCONNECTED)
     {
 	/* We are already disconnected!  Why are we handling an error? */
-	MPIU_Assert(pollfd->fd == -1);
+	MPIR_Assert(pollfd->fd == -1);
     }
     /* --BEGIN ERROR HANDLING-- */
     else
@@ -559,7 +559,7 @@ static int MPIDU_Socki_handle_pollerr(struct pollfd * const pollfd, struct polli
     /* --END ERROR HANDLING-- */
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCKI_HANDLE_POLLERR);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCKI_HANDLE_POLLERR);
     return mpi_errno;
 }
 /* end MPIDU_Socki_handle_pollerr() */
@@ -573,27 +573,27 @@ static int MPIDU_Socki_handle_read(struct pollfd * const pollfd, struct pollinfo
 {
     ssize_t nb;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_READ);
-    MPIDI_STATE_DECL(MPID_STATE_READV);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCKI_HANDLE_READ);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_READ);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_READV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCKI_HANDLE_READ);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCKI_HANDLE_READ);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCKI_HANDLE_READ);
 
     do
     {
 	if (pollinfo->read_iov_flag)
 	{ 
-	    MPIDI_FUNC_ENTER(MPID_STATE_READV);
+	    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_READV);
 	    nb = MPL_large_readv(pollinfo->fd, pollinfo->read.iov.ptr + pollinfo->read.iov.offset,
 		       pollinfo->read.iov.count - pollinfo->read.iov.offset);
-	    MPIDI_FUNC_EXIT(MPID_STATE_READV);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_READV);
 	}
 	else
 	{
-	    MPIDI_FUNC_ENTER(MPID_STATE_READ);
+	    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_READ);
 	    nb = read(pollinfo->fd, pollinfo->read.buf.ptr + pollinfo->read_nb,
 		      pollinfo->read.buf.max - pollinfo->read_nb);
-	    MPIDI_FUNC_EXIT(MPID_STATE_READ);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_READ);
 	}
     }
     while (nb < 0 && errno == EINTR);
@@ -678,7 +678,7 @@ static int MPIDU_Socki_handle_read(struct pollfd * const pollfd, struct pollinfo
     /* --END ERROR HANDLING-- */
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCKI_HANDLE_READ);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCKI_HANDLE_READ);
     return mpi_errno;
 }
 /* end MPIDU_Socki_handle_read() */
@@ -692,27 +692,27 @@ static int MPIDU_Socki_handle_write(struct pollfd * const pollfd, struct pollinf
 {
     ssize_t nb;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_WRITE);
-    MPIDI_STATE_DECL(MPID_STATE_WRITEV);
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCKI_HANDLE_WRITE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_WRITE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_WRITEV);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCKI_HANDLE_WRITE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCKI_HANDLE_WRITE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCKI_HANDLE_WRITE);
 
     do
     {
 	if (pollinfo->write_iov_flag)
 	{ 
-	    MPIDI_FUNC_ENTER(MPID_STATE_WRITEV);
+	    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_WRITEV);
 	    nb = MPL_large_writev(pollinfo->fd, pollinfo->write.iov.ptr + pollinfo->write.iov.offset,
 			pollinfo->write.iov.count - pollinfo->write.iov.offset);
-	    MPIDI_FUNC_EXIT(MPID_STATE_WRITEV);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_WRITEV);
 	}
 	else
 	{
-	    MPIDI_FUNC_ENTER(MPID_STATE_WRITE);
+	    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_WRITE);
 	    nb = write(pollinfo->fd, pollinfo->write.buf.ptr + pollinfo->write_nb,
 		       pollinfo->write.buf.max - pollinfo->write_nb);
-	    MPIDI_FUNC_EXIT(MPID_STATE_WRITE);
+	    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_WRITE);
 	}
     }
     while (nb < 0 && errno == EINTR);
@@ -771,7 +771,7 @@ static int MPIDU_Socki_handle_write(struct pollfd * const pollfd, struct pollinf
     /* --END ERROR HANDLING-- */
 
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCKI_HANDLE_WRITE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCKI_HANDLE_WRITE);
     return mpi_errno;
 }
 /* end MPIDU_Socki_handle_write() */
@@ -787,9 +787,9 @@ static int MPIDU_Socki_handle_connect(struct pollfd * const pollfd, struct polli
     socklen_t addr_len;
     int rc;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCKI_HANDLE_CONNECT);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCKI_HANDLE_CONNECT);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCKI_HANDLE_CONNECT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCKI_HANDLE_CONNECT);
 
     addr_len = sizeof(struct sockaddr_in);
     rc = getpeername(pollfd->fd, (struct sockaddr *) &addr, &addr_len);
@@ -807,7 +807,7 @@ static int MPIDU_Socki_handle_connect(struct pollfd * const pollfd, struct polli
 	event_mpi_errno = MPIR_Err_create_code(
 	    MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_CONN_FAILED, "**sock|connfailed",
 	    "**sock|poll|connfailed %d %d %d %s", pollinfo->sock_set->id, pollinfo->sock_id, pollinfo->os_errno,
-	    MPIU_Strerror(pollinfo->os_errno));
+	    MPIR_Strerror(pollinfo->os_errno));
 	MPIDU_SOCKI_EVENT_ENQUEUE(pollinfo, MPIDU_SOCK_OP_CONNECT, 0, pollinfo->user_ptr, event_mpi_errno, mpi_errno, fn_exit);
 	pollinfo->state = MPIDU_SOCKI_STATE_DISCONNECTED;
     }
@@ -816,7 +816,7 @@ static int MPIDU_Socki_handle_connect(struct pollfd * const pollfd, struct polli
     MPIDU_SOCKI_POLLFD_OP_CLEAR(pollfd, pollinfo, POLLOUT);
     
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCKI_HANDLE_CONNECT);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCKI_HANDLE_CONNECT);
     return mpi_errno;
 }
 /* end MPIDU_Socki_handle_connect() */
diff --git a/src/mpid/common/sock/poll/socki_util.i b/src/mpid/common/sock/poll/socki_util.i
index 2033801..f3d1f0a 100644
--- a/src/mpid/common/sock/poll/socki_util.i
+++ b/src/mpid/common/sock/poll/socki_util.i
@@ -142,7 +142,7 @@ static struct MPIDU_Socki_eventq_table *MPIDU_Socki_eventq_table_head=NULL;
 	    mpi_errno = MPIR_Err_create_code(			\
 		MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPIDU_SOCK_ERR_FAIL, "**sock|oserror",		\
 		"**sock|poll|oserror %s %d %d %d %s", "getsockopt", pollinfo->sock_set->id, pollinfo->sock_id,	\
-		 (os_errno_), MPIU_Strerror(os_errno_));	\
+		 (os_errno_), MPIR_Strerror(os_errno_));	\
 	}							\
 								\
         goto fail_label_;					\
@@ -218,7 +218,7 @@ static struct MPIDU_Socki_eventq_table *MPIDU_Socki_eventq_table_head=NULL;
 		(mpi_errno_) = MPIR_Err_create_code(			\
 		    (mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_CONN_FAILED, "**sock|connfailed",	\
 		    "**sock|poll|connfailed %d %d %d %s", (pollinfo_)->sock_set->id, (pollinfo_)->sock_id,			\
-		    (pollinfo_)->os_errno, MPIU_Strerror((pollinfo_)->os_errno));						\
+		    (pollinfo_)->os_errno, MPIR_Strerror((pollinfo_)->os_errno));						\
 	    }								\
 	    goto fail_label_;						\
 	}								\
@@ -272,7 +272,7 @@ static struct MPIDU_Socki_eventq_table *MPIDU_Socki_eventq_table_head=NULL;
 		(mpi_errno_) = MPIR_Err_create_code(										 \
 		    (mpi_errno_), MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPIDU_SOCK_ERR_CONN_FAILED, "**sock|connfailed",	 \
 		    "**sock|poll|connfailed %d %d %d %s", (pollinfo_)->sock_set->id, (pollinfo_)->sock_id,			 \
-		    (pollinfo_)->os_errno, MPIU_Strerror((pollinfo_)->os_errno));						 \
+		    (pollinfo_)->os_errno, MPIR_Strerror((pollinfo_)->os_errno));						 \
 	    }								\
 	    goto fail_label_;						\
 	}								\
@@ -375,7 +375,7 @@ static int MPIDU_Socki_wakeup(struct MPIDU_Sock_set * sock_set)
 		break;
 	    }
 
-	    MPIU_Assertp(nb == 0 || errno == EINTR);
+	    MPIR_Assertp(nb == 0 || errno == EINTR);
 	}
 	
 	sock_set->wakeup_posted = TRUE;
@@ -394,9 +394,9 @@ int MPIDI_Sock_update_sock_set( struct MPIDU_Sock_set *sock_set,
 {
     int mpi_errno = MPI_SUCCESS;
     int elem;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_SOCK_UPDATE_SOCK_SET);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_SOCK_UPDATE_SOCK_SET);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_SOCK_UPDATE_SOCK_SET);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_SOCK_UPDATE_SOCK_SET);
     for (elem = 0; elem < sock_set->poll_array_elems; elem++) {
 	sock_set->pollfds[elem].events = sock_set->pollinfos[elem].pollfd_events;
 	if ((sock_set->pollfds[elem].events & (POLLIN | POLLOUT)) != 0) {
@@ -426,7 +426,7 @@ int MPIDI_Sock_update_sock_set( struct MPIDU_Sock_set *sock_set,
 
     sock_set->pollfds_updated = FALSE;
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_SOCK_UPDATE_SOCK_SET);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_SOCK_UPDATE_SOCK_SET);
     return mpi_errno;
 
 }
@@ -453,28 +453,28 @@ static int MPIDU_Socki_os_to_mpi_errno(struct pollinfo * pollinfo, int os_errno,
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, fcname, line, MPIDU_SOCK_ERR_NOMEM,
 					 "**sock|osnomem", "**sock|poll|osnomem %d %d %d %s",
-					 pollinfo->sock_set->id, pollinfo->sock_id, os_errno, MPIU_Strerror(os_errno));
+					 pollinfo->sock_set->id, pollinfo->sock_id, os_errno, MPIR_Strerror(os_errno));
 	*disconnected = FALSE;
     }
     else if (os_errno == EFAULT || os_errno == EINVAL)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, fcname, line, MPIDU_SOCK_ERR_BAD_BUF,
 					 "**sock|badbuf", "**sock|poll|badbuf %d %d %d %s",
-					 pollinfo->sock_set->id, pollinfo->sock_id, os_errno, MPIU_Strerror(os_errno));
+					 pollinfo->sock_set->id, pollinfo->sock_id, os_errno, MPIR_Strerror(os_errno));
 	*disconnected = FALSE;
     }
     else if (os_errno == EPIPE)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, fcname, line, MPIDU_SOCK_ERR_CONN_CLOSED,
 					 "**sock|connclosed", "**sock|poll|connclosed %d %d %d %s",
-					 pollinfo->sock_set->id, pollinfo->sock_id, os_errno, MPIU_Strerror(os_errno));
+					 pollinfo->sock_set->id, pollinfo->sock_id, os_errno, MPIR_Strerror(os_errno));
 	*disconnected = TRUE;
     }
     else if (os_errno == ECONNRESET || os_errno == ENOTCONN || os_errno == ETIMEDOUT)
     {
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, fcname, line, MPIDU_SOCK_ERR_CONN_FAILED,
 					 "**sock|connfailed", "**sock|poll|connfailed %d %d %d %s",
-					 pollinfo->sock_set->id, pollinfo->sock_id, os_errno, MPIU_Strerror(os_errno));
+					 pollinfo->sock_set->id, pollinfo->sock_id, os_errno, MPIR_Strerror(os_errno));
 	pollinfo->os_errno = os_errno;
 	*disconnected = TRUE;
     }
@@ -506,7 +506,7 @@ static int MPIDU_Socki_os_to_mpi_errno(struct pollinfo * pollinfo, int os_errno,
 	 */
 	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, fcname, line, MPIDU_SOCK_ERR_CONN_FAILED,
 					 "**sock|oserror", "**sock|poll|oserror %d %d %d %s",
-					 pollinfo->sock_set->id, pollinfo->sock_id, os_errno, MPIU_Strerror(os_errno));
+					 pollinfo->sock_set->id, pollinfo->sock_id, os_errno, MPIR_Strerror(os_errno));
 	pollinfo->os_errno = os_errno;
 	*disconnected = TRUE;
     }
@@ -571,9 +571,9 @@ static int MPIDU_Socki_sock_alloc(struct MPIDU_Sock_set * sock_set, struct MPIDU
     struct pollfd * pollfds = NULL;
     struct pollinfo * pollinfos = NULL;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCKI_SOCK_ALLOC);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCKI_SOCK_ALLOC);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCKI_SOCK_ALLOC);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCKI_SOCK_ALLOC);
     
     /* FIXME: Should this use the CHKPMEM macros (perm malloc)? */
     sock = MPL_malloc(sizeof(struct MPIDU_Sock));
@@ -695,16 +695,16 @@ static int MPIDU_Socki_sock_alloc(struct MPIDU_Sock_set * sock_set, struct MPIDU
     /*
      * Verify that memory hasn't been messed up.
      */
-    MPIU_Assert(sock_set->pollinfos[avail_elem].sock_set == sock_set);
-    MPIU_Assert(sock_set->pollinfos[avail_elem].elem == avail_elem);
-    MPIU_Assert(sock_set->pollinfos[avail_elem].fd == -1);
-    MPIU_Assert(sock_set->pollinfos[avail_elem].sock == NULL);
-    MPIU_Assert(sock_set->pollinfos[avail_elem].sock_id == -1);
-    MPIU_Assert(sock_set->pollinfos[avail_elem].type == MPIDU_SOCKI_TYPE_FIRST);
-    MPIU_Assert(sock_set->pollinfos[avail_elem].state == MPIDU_SOCKI_STATE_FIRST);
+    MPIR_Assert(sock_set->pollinfos[avail_elem].sock_set == sock_set);
+    MPIR_Assert(sock_set->pollinfos[avail_elem].elem == avail_elem);
+    MPIR_Assert(sock_set->pollinfos[avail_elem].fd == -1);
+    MPIR_Assert(sock_set->pollinfos[avail_elem].sock == NULL);
+    MPIR_Assert(sock_set->pollinfos[avail_elem].sock_id == -1);
+    MPIR_Assert(sock_set->pollinfos[avail_elem].type == MPIDU_SOCKI_TYPE_FIRST);
+    MPIR_Assert(sock_set->pollinfos[avail_elem].state == MPIDU_SOCKI_STATE_FIRST);
 #   ifdef MPICH_IS_THREADED
     {
-	MPIU_Assert(sock_set->pollinfos[avail_elem].pollfd_events == 0);
+	MPIR_Assert(sock_set->pollinfos[avail_elem].pollfd_events == 0);
     }
 #   endif
 
@@ -734,7 +734,7 @@ static int MPIDU_Socki_sock_alloc(struct MPIDU_Sock_set * sock_set, struct MPIDU
     *sockp = sock;
     
   fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCKI_SOCK_ALLOC);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCKI_SOCK_ALLOC);
     return mpi_errno;
 
     /* --BEGIN ERROR HANDLING-- */
@@ -769,9 +769,9 @@ static void MPIDU_Socki_sock_free(struct MPIDU_Sock * sock)
     struct pollfd * pollfd = MPIDU_Socki_sock_get_pollfd(sock);
     struct pollinfo * pollinfo = MPIDU_Socki_sock_get_pollinfo(sock);
     struct MPIDU_Sock_set * sock_set = sock->sock_set;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCKI_SOCK_FREE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDU_SOCKI_SOCK_FREE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDU_SOCKI_SOCK_FREE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDU_SOCKI_SOCK_FREE);
 
     /* FIXME: We need an abstraction for the thread sync operations */
 #   ifdef MPICH_IS_THREADED
@@ -779,7 +779,7 @@ static void MPIDU_Socki_sock_free(struct MPIDU_Sock * sock)
 	/*
 	 * Freeing a sock while Sock_wait() is blocked in poll() is not supported
 	 */
-	MPIU_Assert(sock_set->pollfds_active == NULL);
+	MPIR_Assert(sock_set->pollfds_active == NULL);
     }
 #   endif
 
@@ -823,7 +823,7 @@ static void MPIDU_Socki_sock_free(struct MPIDU_Sock * sock)
     
     MPL_free(sock);
     
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDU_SOCKI_SOCK_FREE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDU_SOCKI_SOCK_FREE);
 }
 /* end MPIDU_Socki_sock_free() */
 
@@ -838,9 +838,9 @@ static int MPIDU_Socki_event_enqueue(struct pollinfo * pollinfo, MPIDU_Sock_op_t
     struct MPIDU_Sock_set * sock_set = pollinfo->sock_set;
     struct MPIDU_Socki_eventq_elem * eventq_elem;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_SOCKI_EVENT_ENQUEUE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SOCKI_EVENT_ENQUEUE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_SOCKI_EVENT_ENQUEUE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_SOCKI_EVENT_ENQUEUE);
 
     if (MPIDU_Socki_eventq_pool != NULL)
     {
@@ -895,7 +895,7 @@ static int MPIDU_Socki_event_enqueue(struct pollinfo * pollinfo, MPIDU_Sock_op_t
     }
     sock_set->eventq_tail = eventq_elem;
 fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_SOCKI_EVENT_ENQUEUE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_SOCKI_EVENT_ENQUEUE);
     return mpi_errno;
 }
 /* end MPIDU_Socki_event_enqueue() */
@@ -909,9 +909,9 @@ static inline int MPIDU_Socki_event_dequeue(struct MPIDU_Sock_set * sock_set, in
 {
     struct MPIDU_Socki_eventq_elem * eventq_elem;
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_SOCKI_EVENT_DEQUEUE);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SOCKI_EVENT_DEQUEUE);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_SOCKI_EVENT_DEQUEUE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_SOCKI_EVENT_DEQUEUE);
 
     if (sock_set->eventq_head != NULL)
     {
@@ -937,7 +937,7 @@ static inline int MPIDU_Socki_event_dequeue(struct MPIDU_Sock_set * sock_set, in
     }
     /* --END ERROR HANDLING-- */
 
-    MPIDI_FUNC_EXIT(MPID_STATE_SOCKI_EVENT_DEQUEUE);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_SOCKI_EVENT_DEQUEUE);
     return mpi_errno;
 }
 /* end MPIDU_Socki_event_dequeue() */
@@ -952,9 +952,9 @@ static inline int MPIDU_Socki_event_dequeue(struct MPIDU_Sock_set * sock_set, in
 static void MPIDU_Socki_free_eventq_mem(void)
 {
     struct MPIDU_Socki_eventq_table *eventq_table, *eventq_table_next;
-    MPIDI_STATE_DECL(MPID_STATE_SOCKI_FREE_EVENTQ_MEM);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_SOCKI_FREE_EVENTQ_MEM);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_SOCKI_FREE_EVENTQ_MEM);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_SOCKI_FREE_EVENTQ_MEM);
 
     eventq_table = MPIDU_Socki_eventq_table_head;
     while (eventq_table) {
@@ -964,7 +964,7 @@ static void MPIDU_Socki_free_eventq_mem(void)
     }
     MPIDU_Socki_eventq_table_head = NULL;
 
-    MPIDI_FUNC_EXIT(MPID_STATE_SOCKI_FREE_EVENTQ_MEM);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_SOCKI_FREE_EVENTQ_MEM);
 }
 
 /* Provide a standard mechanism for setting the socket buffer size.
@@ -1002,7 +1002,7 @@ int MPIDU_Sock_SetSockBufferSize( int fd, int firm )
 	    MPIR_ERR_SETANDJUMP3(mpi_errno,MPIDU_SOCK_ERR_FAIL, 
 				 "**sock|poll|setsndbufsz",
 				 "**sock|poll|setsndbufsz %d %d %s", 
-				 bufsz, errno, MPIU_Strerror(errno));
+				 bufsz, errno, MPIR_Strerror(errno));
 	}
 	bufsz     = sockBufSize;
 	bufsz_len = sizeof(bufsz);
@@ -1011,7 +1011,7 @@ int MPIDU_Sock_SetSockBufferSize( int fd, int firm )
 	    MPIR_ERR_SETANDJUMP3(mpi_errno,MPIDU_SOCK_ERR_FAIL, 
 				 "**sock|poll|setrcvbufsz",
 				 "**sock|poll|setrcvbufsz %d %d %s", 
-				 bufsz, errno, MPIU_Strerror(errno));
+				 bufsz, errno, MPIR_Strerror(errno));
 	}
 	bufsz_len = sizeof(bufsz);
 
diff --git a/src/mpid/common/thread/mpidu_thread_fallback.h b/src/mpid/common/thread/mpidu_thread_fallback.h
index b441e7f..fec107f 100644
--- a/src/mpid/common/thread/mpidu_thread_fallback.h
+++ b/src/mpid/common/thread/mpidu_thread_fallback.h
@@ -79,7 +79,7 @@ M*/
 
 #if defined(MPICH_IS_THREADED)
 
-#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL
+#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL
 
 #define MPIDUI_THREAD_CS_ENTER_GLOBAL(mutex)                            \
     do {                                                                \
@@ -90,12 +90,12 @@ M*/
             MPL_DBG_MSG(MPIR_DBG_THREAD, TYPICAL, "recursive locking GLOBAL mutex"); \
             MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key, \
                                          MPIR_Per_thread, per_thread, &rec_err_); \
-            MPIU_Assert(rec_err_ == 0);                                 \
+            MPIR_Assert(rec_err_ == 0);                                 \
                                                                         \
             if (per_thread->lock_depth == 0) {                          \
                 int err_ = 0;                                           \
                 MPIDU_Thread_mutex_lock(&mutex, &err_);                 \
-                MPIU_Assert(err_ == 0);                                 \
+                MPIR_Assert(err_ == 0);                                 \
             }                                                           \
             per_thread->lock_depth++;                                   \
         }                                                               \
@@ -112,7 +112,7 @@ M*/
             MPL_DBG_MSG_P(MPIR_DBG_THREAD,VERBOSE,"enter MPIDU_Thread_mutex_lock %p", &mutex); \
             MPIDU_Thread_mutex_lock(&mutex, &err_);                     \
             MPL_DBG_MSG_P(MPIR_DBG_THREAD,VERBOSE,"exit MPIDU_Thread_mutex_lock %p", &mutex); \
-            MPIU_Assert(err_ == 0);                                     \
+            MPIR_Assert(err_ == 0);                                     \
         }                                                               \
     } while (0)
 #define MPIDUI_THREAD_CS_ENTER_GLOBAL(mutex) do {} while (0)
@@ -138,7 +138,7 @@ M*/
 
 #if defined(MPICH_IS_THREADED)
 
-#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL
+#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL
 
 #define MPIDUI_THREAD_CS_EXIT_GLOBAL(mutex)                             \
     do {                                                                \
@@ -147,16 +147,16 @@ M*/
             MPIR_Per_thread_t *per_thread = NULL;                              \
                                                                         \
             MPL_DBG_MSG(MPIR_DBG_THREAD, TYPICAL, "recursive unlocking GLOBAL mutex"); \
-            MPIU_Assert(rec_err_ == 0);                                 \
+            MPIR_Assert(rec_err_ == 0);                                 \
             MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key, \
                                          MPIR_Per_thread, per_thread, &rec_err_); \
-            MPIU_Assert(rec_err_ == 0);                                 \
+            MPIR_Assert(rec_err_ == 0);                                 \
                                                                         \
             if (per_thread->lock_depth == 1) {                          \
                 int err_ = 0;                                           \
                 MPL_DBG_MSG_P(MPIR_DBG_THREAD,VERBOSE,"MPIDU_Thread_mutex_unlock %p", &mutex); \
                 MPIDU_Thread_mutex_unlock(&mutex, &err_);               \
-                MPIU_Assert(err_ == 0);                                 \
+                MPIR_Assert(err_ == 0);                                 \
             }                                                           \
             per_thread->lock_depth--;                                   \
         }                                                               \
@@ -173,7 +173,7 @@ M*/
             MPL_DBG_MSG(MPIR_DBG_THREAD, TYPICAL, "non-recursive unlocking POBJ mutex"); \
             MPL_DBG_MSG_P(MPIR_DBG_THREAD,VERBOSE,"MPIDU_Thread_mutex_unlock %p", &mutex); \
             MPIDU_Thread_mutex_unlock(&mutex, &err_);                   \
-            MPIU_Assert(err_ == 0);                                     \
+            MPIR_Assert(err_ == 0);                                     \
         }                                                               \
     } while (0)
 #define MPIDUI_THREAD_CS_EXIT_GLOBAL(mutex) do {} while (0)
@@ -200,7 +200,7 @@ M*/
 
 #if defined(MPICH_IS_THREADED)
 
-#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL
+#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL
 
 #define MPIDUI_THREAD_CS_YIELD_GLOBAL(mutex)                            \
     do {                                                                \
@@ -210,7 +210,7 @@ M*/
             MPL_DBG_MSG(MPIR_DBG_THREAD,VERBOSE,"enter MPIDU_Thread_yield"); \
             MPIDU_Thread_yield(&mutex, &err_);                          \
             MPL_DBG_MSG(MPIR_DBG_THREAD,VERBOSE,"exit MPIDU_Thread_yield"); \
-            MPIU_Assert(err_ == 0);                                     \
+            MPIR_Assert(err_ == 0);                                     \
         }                                                               \
     } while (0)
 #define MPIDUI_THREAD_CS_YIELD_POBJ(mutex) do {} while (0)
@@ -223,7 +223,7 @@ M*/
             int err_ = 0;                                               \
             MPL_DBG_MSG(MPIR_DBG_THREAD, TYPICAL, "non-recursive yielding POBJ mutex"); \
             MPIDU_Thread_yield(&mutex, &err_);                          \
-            MPIU_Assert(err_ == 0);                                     \
+            MPIR_Assert(err_ == 0);                                     \
         }                                                               \
     } while (0)
 #define MPIDUI_THREAD_CS_YIELD_GLOBAL(mutex) do {} while (0)
@@ -258,7 +258,7 @@ M*/
 #define MPIDU_Thread_create(func_, data_, id_, err_ptr_)        \
     do {                                                        \
         MPL_thread_create(func_, data_, id_, err_ptr_);         \
-        MPIU_Assert(*err_ptr_ == 0);                            \
+        MPIR_Assert(*err_ptr_ == 0);                            \
     } while (0)
 
 /*@
@@ -294,10 +294,10 @@ M*/
         if (OPA_load_int(&(mutex_ptr_)->num_queued_threads) == 0)       \
             break;                                                      \
         MPIDU_Thread_mutex_unlock(mutex_ptr_, err_ptr_);                \
-        MPIU_Assert(*err_ptr_ == 0);                                    \
+        MPIR_Assert(*err_ptr_ == 0);                                    \
         MPL_thread_yield();                                             \
         MPIDU_Thread_mutex_lock(mutex_ptr_, err_ptr_);                  \
-        MPIU_Assert(*err_ptr_ == 0);                                    \
+        MPIR_Assert(*err_ptr_ == 0);                                    \
     } while (0)
 
 /*
@@ -315,7 +315,7 @@ M*/
     do {                                                                \
         OPA_store_int(&(mutex_ptr_)->num_queued_threads, 0);            \
         MPL_thread_mutex_create(&(mutex_ptr_)->mutex, err_ptr_);        \
-        MPIU_Assert(*err_ptr_ == 0);                                    \
+        MPIR_Assert(*err_ptr_ == 0);                                    \
         MPL_DBG_MSG_P(MPIR_DBG_THREAD,TYPICAL,"Created MPL_thread_mutex %p", (mutex_ptr_)); \
     } while (0)
 
@@ -332,7 +332,7 @@ M*/
     do {                                                                \
         MPL_DBG_MSG_P(MPIR_DBG_THREAD,TYPICAL,"About to destroy MPL_thread_mutex %p", (mutex_ptr_)); \
         MPL_thread_mutex_destroy(&(mutex_ptr_)->mutex, err_ptr_);       \
-        MPIU_Assert(*err_ptr_ == 0);                                    \
+        MPIR_Assert(*err_ptr_ == 0);                                    \
     } while (0)
 
 /*@
@@ -346,7 +346,7 @@ M*/
         OPA_incr_int(&(mutex_ptr_)->num_queued_threads);                \
         MPL_DBG_MSG_P(MPIR_DBG_THREAD,VERBOSE,"enter MPL_thread_mutex_lock %p", &(mutex_ptr_)->mutex); \
         MPL_thread_mutex_lock(&(mutex_ptr_)->mutex, err_ptr_);          \
-        MPIU_Assert(*err_ptr_ == 0);                                    \
+        MPIR_Assert(*err_ptr_ == 0);                                    \
         MPL_DBG_MSG_P(MPIR_DBG_THREAD,VERBOSE,"exit MPL_thread_mutex_lock %p", &(mutex_ptr_)->mutex); \
         OPA_decr_int(&(mutex_ptr_)->num_queued_threads);                \
     } while (0)
@@ -360,7 +360,7 @@ M*/
 #define MPIDU_Thread_mutex_unlock(mutex_ptr_, err_ptr_)                 \
     do {                                                                \
         MPL_thread_mutex_unlock(&(mutex_ptr_)->mutex, err_ptr_);        \
-        MPIU_Assert(*err_ptr_ == 0);                                    \
+        MPIR_Assert(*err_ptr_ == 0);                                    \
     } while (0)
 
 /*
@@ -377,7 +377,7 @@ M*/
 #define MPIDU_Thread_cond_create(cond_ptr_, err_ptr_)                   \
     do {                                                                \
         MPL_thread_cond_create(cond_ptr_, err_ptr_);                    \
-        MPIU_Assert(*err_ptr_ == 0);                                    \
+        MPIR_Assert(*err_ptr_ == 0);                                    \
         MPL_DBG_MSG_P(MPIR_DBG_THREAD,TYPICAL,"Created MPL_thread_cond %p", (cond_ptr_)); \
     } while (0)
 
@@ -395,7 +395,7 @@ M*/
     do {                                                                \
         MPL_DBG_MSG_P(MPIR_DBG_THREAD,TYPICAL,"About to destroy MPL_thread_cond %p", (cond_ptr_)); \
         MPL_thread_cond_destroy(cond_ptr_, err_ptr_);                   \
-        MPIU_Assert(*err_ptr_ == 0);                                    \
+        MPIR_Assert(*err_ptr_ == 0);                                    \
     } while (0)
 
 /*@
@@ -417,7 +417,7 @@ M*/
         OPA_incr_int(&(mutex_ptr_)->num_queued_threads);                \
         MPL_DBG_MSG_FMT(MPIR_DBG_THREAD,TYPICAL,(MPL_DBG_FDEST,"Enter cond_wait on cond=%p mutex=%p",(cond_ptr_),&(mutex_ptr_)->mutex)); \
         MPL_thread_cond_wait(cond_ptr_, &(mutex_ptr_)->mutex, err_ptr_); \
-        MPIU_Assert_fmt_msg(*((int *) err_ptr_) == 0,                   \
+        MPIR_Assert_fmt_msg(*((int *) err_ptr_) == 0,                   \
                             ("cond_wait failed, err=%d (%s)", *((int *) err_ptr_), strerror(*((int *) err_ptr_)))); \
         MPL_DBG_MSG_FMT(MPIR_DBG_THREAD,TYPICAL,(MPL_DBG_FDEST,"Exit cond_wait on cond=%p mutex=%p",(cond_ptr_),&(mutex_ptr_)->mutex)); \
         OPA_decr_int(&(mutex_ptr_)->num_queued_threads);                \
@@ -433,7 +433,7 @@ M*/
     do {                                                                \
         MPL_DBG_MSG_P(MPIR_DBG_THREAD,TYPICAL,"About to cond_broadcast on MPL_thread_cond %p", (cond_ptr_)); \
         MPL_thread_cond_broadcast(cond_ptr_, err_ptr_);                 \
-        MPIU_Assert_fmt_msg(*((int *) err_ptr_) == 0,                   \
+        MPIR_Assert_fmt_msg(*((int *) err_ptr_) == 0,                   \
                             ("cond_broadcast failed, err=%d (%s)", *((int *) err_ptr_), strerror(*((int *) err_ptr_)))); \
     } while (0)
 
@@ -447,7 +447,7 @@ M*/
     do {                                                                \
         MPL_DBG_MSG_P(MPIR_DBG_THREAD,TYPICAL,"About to cond_signal on MPL_thread_cond %p", (cond_ptr_)); \
         MPL_thread_cond_signal(cond_ptr_, err_ptr_);                    \
-        MPIU_Assert_fmt_msg(*((int *) err_ptr_) == 0,                   \
+        MPIR_Assert_fmt_msg(*((int *) err_ptr_) == 0,                   \
                             ("cond_signal failed, err=%d (%s)", *((int *) err_ptr_), strerror(*((int *) err_ptr_)))); \
     } while (0)
 
@@ -469,7 +469,7 @@ M*/
 #define MPIDU_Thread_tls_create(exit_func_ptr_, tls_ptr_, err_ptr_)     \
     do {                                                                \
         MPL_thread_tls_create(exit_func_ptr_, tls_ptr_, err_ptr_);      \
-        MPIU_Assert(*(int *) err_ptr_ == 0);                            \
+        MPIR_Assert(*(int *) err_ptr_ == 0);                            \
     } while (0)
 
 /*@
@@ -489,7 +489,7 @@ M*/
 #define MPIDU_Thread_tls_destroy(tls_ptr_, err_ptr_)    \
     do {                                                \
         MPL_thread_tls_destroy(tls_ptr_, err_ptr_);     \
-        MPIU_Assert(*(int *) err_ptr_ == 0);            \
+        MPIR_Assert(*(int *) err_ptr_ == 0);            \
     } while (0)
 
 /*@
@@ -503,7 +503,7 @@ M*/
 #define MPIDU_Thread_tls_set(tls_ptr_, value_, err_ptr_)                \
     do {                                                                \
         MPL_thread_tls_set(tls_ptr_, value_, err_ptr_);                 \
-        MPIU_Assert_fmt_msg(*((int *) err_ptr_) == 0,                   \
+        MPIR_Assert_fmt_msg(*((int *) err_ptr_) == 0,                   \
                             ("tls_set failed, err=%d (%s)", *((int *) err_ptr_), strerror(*((int *) err_ptr_)))); \
     } while (0)
 
@@ -520,7 +520,7 @@ M*/
 #define MPIDU_Thread_tls_get(tls_ptr_, value_ptr_, err_ptr_)            \
     do {                                                                \
         MPL_thread_tls_get(tls_ptr_, value_ptr_, err_ptr_);             \
-        MPIU_Assert_fmt_msg(*((int *) err_ptr_) == 0,                   \
+        MPIR_Assert_fmt_msg(*((int *) err_ptr_) == 0,                   \
                             ("tls_get failed, err=%d (%s)", *((int *) err_ptr_), strerror(*((int *) err_ptr_)))); \
     } while (0)
 
@@ -529,7 +529,7 @@ M*/
     do {                                                                \
         int err_ = 0;                                                   \
         MPL_THREADPRIV_KEY_CREATE(MPIR_Per_thread_key, MPIR_Per_thread, &err_); \
-        MPIU_Assert(err_ == 0);                                         \
+        MPIR_Assert(err_ == 0);                                         \
     } while (0)
 
 #define MPIDU_THREADPRIV_KEY_GET_ADDR  MPL_THREADPRIV_KEY_GET_ADDR
@@ -538,7 +538,7 @@ M*/
     do {                                                        \
         int err_ = 0;                                           \
         MPL_THREADPRIV_KEY_DESTROY(MPIR_Per_thread_key, &err_);  \
-        MPIU_Assert(err_ == 0);                                 \
+        MPIR_Assert(err_ == 0);                                 \
     } while (0)
 
 #endif /* !defined(MPIDU_THREAD_H_INCLUDED) */
diff --git a/src/mpid/include/mpidu_pre.h b/src/mpid/include/mpidu_pre.h
index 3ad4314..6903a0c 100644
--- a/src/mpid/include/mpidu_pre.h
+++ b/src/mpid/include/mpidu_pre.h
@@ -17,9 +17,9 @@ struct MPIR_Comm;
 /* Scheduling forward declarations */
 
 struct MPIDU_Sched;
-typedef struct MPIDU_Sched *MPID_Sched_t;
+typedef struct MPIDU_Sched *MPIR_Sched_t;
 
-typedef int (MPID_Sched_cb_t) (struct MPIR_Comm * comm, int tag, void *state);
-typedef int (MPID_Sched_cb2_t) (struct MPIR_Comm * comm, int tag, void *state, void *state2);
+typedef int (MPIR_Sched_cb_t) (struct MPIR_Comm * comm, int tag, void *state);
+typedef int (MPIR_Sched_cb2_t) (struct MPIR_Comm * comm, int tag, void *state, void *state2);
 
 #endif
diff --git a/src/mpid/pamid/include/mpidi_hooks.h b/src/mpid/pamid/include/mpidi_hooks.h
index 00d4844..f636731 100644
--- a/src/mpid/pamid/include/mpidi_hooks.h
+++ b/src/mpid/pamid/include/mpidi_hooks.h
@@ -59,7 +59,6 @@ typedef size_t              intptr_t;
 /** \brief This defines the portion of MPIR_Win that is specific to the Device */
 #define MPID_DEV_WIN_DECL        struct MPIDI_Win     mpid;
 
-#define HAVE_DEV_COMM_HOOK
 #define MPID_Dev_comm_create_hook(a)  ({ int MPIDI_Comm_create (MPIR_Comm *comm); MPIDI_Comm_create (a); })
 #define MPID_Dev_comm_destroy_hook(a) ({ int MPIDI_Comm_destroy(MPIR_Comm *comm); MPIDI_Comm_destroy(a); })
 
diff --git a/src/mpid/pamid/include/mpidi_macros.h b/src/mpid/pamid/include/mpidi_macros.h
index d1a62c9..9fd3cb4 100644
--- a/src/mpid/pamid/include/mpidi_macros.h
+++ b/src/mpid/pamid/include/mpidi_macros.h
@@ -157,7 +157,7 @@ MPIDI_Context_post(pami_context_t       context,
                    pami_work_function   fn,
                    void               * cookie)
 {
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
   /* It is possible that a work function posted to a context may attempt to
    * initiate a communication operation and, if context post were disabled, that
    * operation would be performed directly on the context BY TAKING A LOCK that
@@ -176,7 +176,7 @@ MPIDI_Context_post(pami_context_t       context,
   pami_result_t rc;
   rc = PAMI_Context_post(context, work, fn, cookie);
   MPID_assert(rc == PAMI_SUCCESS);
-#else /* (MPICH_THREAD_GRANULARITY != MPICH_THREAD_GRANULARITY_PER_OBJECT) */
+#else /* (MPICH_THREAD_GRANULARITY != MPICH_THREAD_GRANULARITY__POBJ) */
   /*
    * It is not necessary to lock the context before access in the "global"
    * mpich lock mode because all threads, application and async progress,
@@ -186,7 +186,7 @@ MPIDI_Context_post(pami_context_t       context,
 #endif
 }
 
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
 #define MPIDI_Send_post(__func, __req)                          \
 ({                                                              \
   pami_context_t context = MPIDI_Context_local(__req);          \
@@ -207,11 +207,11 @@ MPIDI_Context_post(pami_context_t       context,
       PAMI_Context_unlock(context);                             \
     }                                                           \
 })
-#else /* (MPICH_THREAD_GRANULARITY != MPICH_THREAD_GRANULARITY_PER_OBJECT) */
+#else /* (MPICH_THREAD_GRANULARITY != MPICH_THREAD_GRANULARITY__POBJ) */
 #define MPIDI_Send_post(__func, __req)                          \
 ({                                                              \
   __func(MPIDI_Context[0], __req);                              \
 })
-#endif /* #if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT) */
+#endif /* #if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ) */
 
 #endif
diff --git a/src/mpid/pamid/include/mpidi_mutex.h b/src/mpid/pamid/include/mpidi_mutex.h
index 197eecc..0fdb0f4 100644
--- a/src/mpid/pamid/include/mpidi_mutex.h
+++ b/src/mpid/pamid/include/mpidi_mutex.h
@@ -310,11 +310,11 @@ MPIDI_Mutex_initialize()
   extern int pthread_mutexattr_settype(pthread_mutexattr_t *__attr, int __kind);
 #endif
 #ifndef __PE__
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
    rc = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE_NP);
-#else /*(MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)*/
+#else /*(MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)*/
    rc = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK_NP);
-#endif /*(MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)*/
+#endif /*(MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)*/
 #else /* __PE__ */
 #if !defined(__AIX__)
    rc = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE_NP);
diff --git a/src/mpid/pamid/include/mpidi_thread.h b/src/mpid/pamid/include/mpidi_thread.h
index d279f27..f543565 100644
--- a/src/mpid/pamid/include/mpidi_thread.h
+++ b/src/mpid/pamid/include/mpidi_thread.h
@@ -67,7 +67,7 @@
 #define MPIU_THREAD_CS_SCHED_YIELD(name,_context) MPIU_THREAD_CS_##name##_SCHED_YIELD(_context)
 #define MPIU_THREAD_CS_TRY(name,_context)   MPIU_THREAD_CS_##name##_TRY(_context)
 
-#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL
+#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL
 
 #define MPIDI_CS_ENTER(m) ({ if (MPIR_ThreadInfo.isThreaded) {                                             MPIDI_Mutex_acquire(m); } })
 #define MPIDI_CS_EXIT(m)  ({ if (MPIR_ThreadInfo.isThreaded) { MPIDI_Mutex_sync(); MPIDI_Mutex_release(m);                         } })
@@ -105,7 +105,7 @@
 #define MPIU_THREAD_CS_ASYNC_ENTER(_context)        MPIDI_CS_ENTER(8)
 #define MPIU_THREAD_CS_ASYNC_EXIT(_context)         MPIDI_CS_EXIT (8)
 
-#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT
+#elif MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ
 
 #define MPIDI_CS_ENTER(m)                               \
     do {                                                \
@@ -178,7 +178,7 @@
 #define MPIU_THREAD_CS_ASYNC_ENTER(_context)        MPIDI_CS_ENTER(8)
 #define MPIU_THREAD_CS_ASYNC_EXIT(_context)         MPIDI_CS_EXIT (8)
 
-#endif /* MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL */
+#endif /* MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL */
 
 
 #endif /* !MPICH_MPIDTHREAD_H_INCLUDED */
diff --git a/src/mpid/pamid/include/mpidimpl.h b/src/mpid/pamid/include/mpidimpl.h
index 645b982..553b644 100644
--- a/src/mpid/pamid/include/mpidimpl.h
+++ b/src/mpid/pamid/include/mpidimpl.h
@@ -53,7 +53,7 @@ typedef struct MPIDI_PG
        MPIU_Object system, but we do use the associated reference counting
        routines.  Therefore, handle must be present, but is not used
        except by debugging routines */
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    MPIR_OBJECT_HEADER; /* adds handle and ref_count fields */
 
     /* Next pointer used to maintain a list of all process groups known to
        this process */
@@ -143,19 +143,19 @@ typedef struct MPIDI_Port_Ops {
 
 
 #define MPIDI_VC_add_ref( _vc )                                 \
-    do { MPIU_Object_add_ref( _vc ); } while (0)
+    do { MPIR_Object_add_ref( _vc ); } while (0)
 
 #define MPIDI_PG_add_ref(pg_)                   \
 do {                                            \
-    MPIU_Object_add_ref(pg_);                   \
+    MPIR_Object_add_ref(pg_);                   \
 } while (0)
 #define MPIDI_PG_release_ref(pg_, inuse_)       \
 do {                                            \
-    MPIU_Object_release_ref(pg_, inuse_);       \
+    MPIR_Object_release_ref(pg_, inuse_);       \
 } while (0)
 
 #define MPIDI_VC_release_ref( _vc, _inuse ) \
-    do { MPIU_Object_release_ref( _vc, _inuse ); } while (0)
+    do { MPIR_Object_release_ref( _vc, _inuse ); } while (0)
 
 
 /* Initialize a new VC */
diff --git a/src/mpid/pamid/src/coll/allgather/mpido_allgather.c b/src/mpid/pamid/src/coll/allgather/mpido_allgather.c
index 0775cdb..38b46bd 100644
--- a/src/mpid/pamid/src/coll/allgather/mpido_allgather.c
+++ b/src/mpid/pamid/src/coll/allgather/mpido_allgather.c
@@ -174,7 +174,7 @@ int MPIDO_Allgather_bcast(const void *sendbuf,
   np = comm_ptr ->local_size;
   MPIDU_Datatype_get_extent_macro(recvtype, extent);
 
-  MPIU_Ensure_Aint_fits_in_pointer ((MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
+  MPIR_Ensure_Aint_fits_in_pointer ((MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
 				     np * recvcount * extent));
   if (sendbuf != MPI_IN_PLACE)
   {
diff --git a/src/mpid/pamid/src/comm/mpid_comm.c b/src/mpid/pamid/src/comm/mpid_comm.c
index ab21b69..34bb908 100644
--- a/src/mpid/pamid/src/comm/mpid_comm.c
+++ b/src/mpid/pamid/src/comm/mpid_comm.c
@@ -375,7 +375,7 @@ void MPIDI_Coll_comm_destroy(MPIR_Comm *comm)
   if(comm->comm_kind != MPIR_COMM_KIND__INTRACOMM)
     return;
 
-  /* It's possible (MPIR_Setup_intercomm_localcomm) to have an intracomm
+  /* It's possible (MPII_Setup_intercomm_localcomm) to have an intracomm
      without a geometry even when using optimized collectives */
   if(comm->mpid.geometry == PAMI_GEOMETRY_NULL)
     return;
diff --git a/src/mpid/pamid/src/dyntask/mpid_comm_disconnect.c b/src/mpid/pamid/src/dyntask/mpid_comm_disconnect.c
index 1135fd6..c573c82 100644
--- a/src/mpid/pamid/src/dyntask/mpid_comm_disconnect.c
+++ b/src/mpid/pamid/src/dyntask/mpid_comm_disconnect.c
@@ -320,8 +320,8 @@ int MPID_Comm_disconnect(MPIR_Comm *comm_ptr)
 
 	  /* sanity: the INVALID context ID value could potentially conflict with the
 	   * dynamic proccess space */
-	  MPIU_Assert(lcomm->context_id     != MPIU_INVALID_CONTEXT_ID);
-	  MPIU_Assert(lcomm->recvcontext_id != MPIU_INVALID_CONTEXT_ID);
+          MPIR_Assert(lcomm->context_id     != MPIR_INVALID_CONTEXT_ID);
+          MPIR_Assert(lcomm->recvcontext_id != MPIR_INVALID_CONTEXT_ID);
 
 	  /* FIXME - we probably need a unique context_id. */
 
diff --git a/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c b/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c
index 5de7b1a..6e3b21c 100644
--- a/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c
+++ b/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c
@@ -172,13 +172,13 @@ int MPIDI_Comm_spawn_multiple(int count, char **commands,
 
 	/* Spawn the processes */
 #ifdef USE_PMI2_API
-        MPIU_Assert(count > 0);
+        MPIR_Assert(count > 0);
         {
             int *argcs = MPL_malloc(count*sizeof(int));
             struct MPIR_Info preput;
             struct MPIR_Info *preput_p[2] = { &preput, &tmp_info_ptr };
 
-            MPIU_Assert(argcs);
+            MPIR_Assert(argcs);
 
             info_keyval_sizes = MPL_malloc(count * sizeof(int));
 
diff --git a/src/mpid/pamid/src/dyntask/mpidi_pg.c b/src/mpid/pamid/src/dyntask/mpidi_pg.c
index bbed28c..a344b6a 100644
--- a/src/mpid/pamid/src/dyntask/mpidi_pg.c
+++ b/src/mpid/pamid/src/dyntask/mpidi_pg.c
@@ -204,10 +204,10 @@ int MPIDI_PG_Finalize(void)
         fails to use MPI_Comm_disconnect on communicators that
         were created with the dynamic process routines.*/
 	/* XXX DJG FIXME-MT should we be checking this? */
-     if (MPIU_Object_get_ref(pg) == 0 ) {
+     if (MPIR_Object_get_ref(pg) == 0 ) {
        if (pg == MPIDI_Process.my_pg)
          MPIDI_Process.my_pg = NULL;
-       MPIU_Object_set_ref(pg, 0); /* satisfy assertions in PG_Destroy */
+       MPIR_Object_set_ref(pg, 0); /* satisfy assertions in PG_Destroy */
        MPIDI_PG_Destroy( pg );
      }
      pg     = pgNext;
@@ -247,7 +247,7 @@ int MPIDI_PG_Create(int vct_sz, void * pg_id, MPIDI_PG_t ** pg_ptr)
     /* The reference count indicates the number of vc's that are or
        have been in use and not disconnected. It starts at zero,
        except for MPI_COMM_WORLD. */
-    MPIU_Object_set_ref(pg, 0);
+    MPIR_Object_set_ref(pg, 0);
     pg->size = vct_sz;
     pg->id   = MPL_strdup(pg_id);
     TRACE_ERR("PG_Create - pg=%x pg->id=%s pg->vct=%x\n", pg, pg->id, pg->vct);
@@ -302,7 +302,7 @@ int MPIDI_PG_Destroy(MPIDI_PG_t * pg)
     int i;
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_Object_get_ref(pg) == 0);
+    MPIR_Assert(MPIR_Object_get_ref(pg) == 0);
 
     pg_prev = NULL;
     pg_cur = MPIDI_PG_list;
@@ -683,7 +683,7 @@ int MPIDI_connToStringKVS( char **buf_p, int *slen, MPIDI_PG_t *pg )
     }
 #endif
 
-    MPIU_Assert(len <= curSlen);
+    MPIR_Assert(len <= curSlen);
 
     *buf_p = string;
     *slen  = len;
@@ -819,7 +819,7 @@ static int MPIDI_connToString( char **buf_p, int *slen, MPIDI_PG_t *pg )
     /* XXX DJG TODO figure out what this little bit is all about. */
     if (strstr( pg_id, "singinit_kvs" ) == pg_id) {
 #ifdef USE_PMI2_API
-        MPIU_Assertp(0); /* don't know what to do here for pmi2 yet.  DARIUS */
+        MPIR_Assertp(0); /* don't know what to do here for pmi2 yet.  DARIUS */
 #else
 	PMI_KVS_Get_my_name( pg->id, 256 );
 #endif
@@ -938,10 +938,10 @@ int MPIDI_PG_Dup_vcr( MPIDI_PG_t *pg, int rank, pami_task_t taskid, MPID_VCR *vc
        process group *and* the reference count of the vc (this
        allows us to distinquish between Comm_free and Comm_disconnect) */
     /* FIXME-MT: This should be a fetch and increment for thread-safety */
-    /*if (MPIU_Object_get_ref(vcr_p) == 0) { */
+    /*if (MPIR_Object_get_ref(vcr_p) == 0) { */
 	TRACE_ERR("MPIDI_PG_add_ref on pg=%s pg=%x\n", pg->id, pg);
 	MPIDI_PG_add_ref(pg);
-        inuse=MPIU_Object_get_ref(pg);
+        inuse=MPIR_Object_get_ref(pg);
 	TRACE_ERR("after MPIDI_PG_add_ref on pg=%s inuse=%d\n", pg->id, inuse);
 /*	MPIDI_VC_add_ref(vcr_p);
     }
@@ -967,10 +967,10 @@ int MPIU_PG_Printall( FILE *fp )
     while (pg) {
         /* XXX DJG FIXME-MT should we be checking this? */
 	fprintf( fp, "size = %d, refcount = %d, id = %s\n",
-		 pg->size, MPIU_Object_get_ref(pg), (char *)pg->id );
+                 pg->size, MPIR_Object_get_ref(pg), (char *)pg->id );
 	for (i=0; i<pg->size; i++) {
 	    fprintf( fp, "\tVCT rank = %d, refcount = %d, taskid = %d\n",
-		     pg->vct[i].pg_rank, MPIU_Object_get_ref(pg),
+                     pg->vct[i].pg_rank, MPIR_Object_get_ref(pg),
 		     pg->vct[i].taskid );
 	}
 	fflush(fp);
diff --git a/src/mpid/pamid/src/dyntask/mpidi_port.c b/src/mpid/pamid/src/dyntask/mpidi_port.c
index 10c6c86..d539323 100644
--- a/src/mpid/pamid/src/dyntask/mpidi_port.c
+++ b/src/mpid/pamid/src/dyntask/mpidi_port.c
@@ -91,7 +91,7 @@ void MPIDI_Recvfrom_remote_world(pami_context_t    context,
   q_item = MPL_malloc(sizeof(MPIDI_Acceptq_t));
   q_item->vcr = MPL_malloc(sizeof(struct MPID_VCR_t));
   q_item->vcr->pg = MPL_malloc(sizeof(MPIDI_PG_t));
-  MPIU_Object_set_ref(q_item->vcr->pg, 0);
+  MPIR_Object_set_ref(q_item->vcr->pg, 0);
   TRACE_ERR("ENTER MPIDI_Acceptq_enqueue-1 q_item=%llx _msginfo=%llx (AM_struct *)_msginfo=%llx ((AM_struct *)_msginfo)->vcr=%llx\n", q_item, _msginfo, (AM_struct *)_msginfo, ((AM_struct *)_msginfo)->vcr);
   q_item->port_name_tag = ((AM_struct *)_msginfo)->port_name_tag;
   q_item->vcr->taskid = PAMIX_Endpoint_query(sender);
@@ -226,7 +226,7 @@ int MPIDI_Connect_to_root(const char * port_name,
        connection routine) */
     vc = MPL_malloc(sizeof(struct MPID_VCR_t));
     vc->pg = MPL_malloc(sizeof(MPIDI_PG_t));
-    MPIU_Object_set_ref(vc->pg, 0);
+    MPIR_Object_set_ref(vc->pg, 0);
     TRACE_ERR("vc from MPIDI_Connect_to_root=%llx vc->pg=%llx\n", vc, vc->pg);
     /* FIXME - where does this vc get freed? */
 
@@ -400,8 +400,8 @@ static int MPIDI_Initialize_tmp_comm(struct MPIR_Comm **comm_pptr,
 
     /* sanity: the INVALID context ID value could potentially conflict with the
      * dynamic proccess space */
-    MPIU_Assert(tmp_comm->context_id     != MPIU_INVALID_CONTEXT_ID);
-    MPIU_Assert(tmp_comm->recvcontext_id != MPIU_INVALID_CONTEXT_ID);
+    MPIR_Assert(tmp_comm->context_id     != MPIR_INVALID_CONTEXT_ID);
+    MPIR_Assert(tmp_comm->recvcontext_id != MPIR_INVALID_CONTEXT_ID);
 
     /* FIXME - we probably need a unique context_id. */
     tmp_comm->remote_size = 1;
@@ -509,7 +509,7 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root,
     pg_translation *local_translation = NULL, *remote_translation = NULL;
     pg_node *pg_list = NULL;
     MPIDI_PG_t **remote_pg = NULL;
-    MPIU_Context_id_t recvcontext_id = MPIU_INVALID_CONTEXT_ID;
+    MPIR_Context_id_t recvcontext_id = MPIR_INVALID_CONTEXT_ID;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
     long long comm_cntr, lcomm_cntr;
 
@@ -540,7 +540,7 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root,
 	/* Make an array to translate local ranks to process group index
 	   and rank */
         local_translation = MPL_malloc(local_comm_size*sizeof(pg_translation));
-/*	MPIU_CHKLMEM_MALLOC(local_translation,pg_translation*,
+/*	MPIR_CHKLMEM_MALLOC(local_translation,pg_translation*,
 			    local_comm_size*sizeof(pg_translation),
 			    mpi_errno,"local_translation"); */
 
@@ -737,8 +737,8 @@ static int MPIDI_ExtractLocalPGInfo( struct MPIR_Comm *comm_p,
     pg_list->index = cur_index++;
     pg_list->next = NULL;
     /* XXX DJG FIXME-MT should we be checking this?  the add/release macros already check this */
-    TRACE_ERR("MPIU_Object_get_ref(comm_p->vcr[0]->pg) comm_p=%x vsr=%x pg=%x %d\n", comm_p, comm_p->vcr[0], comm_p->vcr[0]->pg, MPIU_Object_get_ref(comm_p->vcr[0]->pg));
-    MPIU_Assert( MPIU_Object_get_ref(comm_p->vcr[0]->pg));
+    TRACE_ERR("MPIR_Object_get_ref(comm_p->vcr[0]->pg) comm_p=%x vsr=%x pg=%x %d\n", comm_p, comm_p->vcr[0], comm_p->vcr[0]->pg, MPIR_Object_get_ref(comm_p->vcr[0]->pg));
+    MPIR_Assert( MPIR_Object_get_ref(comm_p->vcr[0]->pg));
     mpi_errno = MPIDI_PG_To_string(comm_p->vcr[0]->pg, &pg_list->str,
 				   &pg_list->lenStr );
     TRACE_ERR("pg_list->str=%s pg_list->lenStr=%d\n", pg_list->str, pg_list->lenStr);
@@ -757,7 +757,7 @@ static int MPIDI_ExtractLocalPGInfo( struct MPIR_Comm *comm_p,
 	while (pg_iter != NULL) {
 	    /* Check to ensure pg is (probably) valid */
             /* XXX DJG FIXME-MT should we be checking this?  the add/release macros already check this */
-	    MPIU_Assert(MPIU_Object_get_ref(comm_p->vcr[i]->pg) != 0);
+            MPIR_Assert(MPIR_Object_get_ref(comm_p->vcr[i]->pg) != 0);
 	    if (MPIDI_PG_Id_compare(comm_p->vcr[i]->pg->id, pg_iter->pg_id)) {
 		local_translation[i].pg_index = pg_iter->index;
 		local_translation[i].pg_rank  = comm_p->vcr[i]->pg_rank;
@@ -1187,7 +1187,7 @@ int MPIDI_Comm_accept(const char *port_name, MPIR_Info *info, int root,
 	/* Make an array to translate local ranks to process group index and
 	   rank */
         local_translation = MPL_malloc(local_comm_size*sizeof(pg_translation));
-/*	MPIU_CHKLMEM_MALLOC(local_translation,pg_translation*,
+/*	MPIR_CHKLMEM_MALLOC(local_translation,pg_translation*,
 			    local_comm_size*sizeof(pg_translation),
 			    mpi_errno,"local_translation"); */
 
@@ -1684,7 +1684,7 @@ int MPID_PG_BCast( MPIR_Comm *peercomm_p, MPIR_Comm *comm_p, int root )
     pg_node *pg_list, *pg_next, *pg_head = 0;
     int rank, i, peer_comm_size;
     MPIR_Errflag_t errflag = MPIR_ERR_NONE;
-    MPIU_CHKLMEM_DECL(1);
+    MPIR_CHKLMEM_DECL(1);
 
     peer_comm_size = comm_p->local_size;
     rank            = comm_p->rank;
diff --git a/src/mpid/pamid/src/mpid_aint.c b/src/mpid/pamid/src/mpid_aint.c
index 39c50b9..e41459d 100644
--- a/src/mpid/pamid/src/mpid_aint.c
+++ b/src/mpid/pamid/src/mpid_aint.c
@@ -24,12 +24,12 @@ MPI_Aint MPID_Aint_add(MPI_Aint base, MPI_Aint disp)
 {
     MPI_Aint result;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_AINT_ADD);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_AINT_ADD);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_AINT_ADD);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_AINT_ADD);
 
-    result =  MPIU_VOID_PTR_CAST_TO_MPI_AINT ((char*)MPIU_AINT_CAST_TO_VOID_PTR(base) + disp);
+    result =  MPIR_VOID_PTR_CAST_TO_MPI_AINT ((char*)MPIR_AINT_CAST_TO_VOID_PTR(base) + disp);
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_AINT_ADD);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_AINT_ADD);
     return result;
 }
 
@@ -51,11 +51,11 @@ MPI_Aint MPID_Aint_diff(MPI_Aint addr1, MPI_Aint addr2)
 {
     MPI_Aint result;
 
-    MPIDI_STATE_DECL(MPID_STATE_MPID_AINT_DIFF);
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_AINT_DIFF);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_AINT_DIFF);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_AINT_DIFF);
 
-    result =  MPIU_PTR_DISP_CAST_TO_MPI_AINT ((char*)MPIU_AINT_CAST_TO_VOID_PTR(addr1) - (char*)MPIU_AINT_CAST_TO_VOID_PTR(addr2));
+    result =  MPIR_PTR_DISP_CAST_TO_MPI_AINT ((char*)MPIR_AINT_CAST_TO_VOID_PTR(addr1) - (char*)MPIR_AINT_CAST_TO_VOID_PTR(addr2));
 
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_AINT_DIFF);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_AINT_DIFF);
     return result;
 }
diff --git a/src/mpid/pamid/src/mpid_imrecv.c b/src/mpid/pamid/src/mpid_imrecv.c
index 426707e..93ad50b 100644
--- a/src/mpid/pamid/src/mpid_imrecv.c
+++ b/src/mpid/pamid/src/mpid_imrecv.c
@@ -27,8 +27,8 @@ int MPID_Imrecv(void *buf, int count, MPI_Datatype datatype,
 
     }
 
-  MPIU_Assert(message != NULL);
-  MPIU_Assert(message->kind == MPIR_REQUEST_KIND__MPROBE);
+  MPIR_Assert(message != NULL);
+  MPIR_Assert(message->kind == MPIR_REQUEST_KIND__MPROBE);
 
   /* promote the request object to be a "real" recv request */
   message->kind = MPIR_REQUEST_KIND__RECV;
diff --git a/src/mpid/pamid/src/mpid_init.c b/src/mpid/pamid/src/mpid_init.c
index bfc2cf5..20f3236 100644
--- a/src/mpid/pamid/src/mpid_init.c
+++ b/src/mpid/pamid/src/mpid_init.c
@@ -56,7 +56,7 @@ MPIDI_Process_t  MPIDI_Process = {
   .verbose               = 0,
   .statistics            = 0,
 
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
   .avail_contexts        = MPIDI_MAX_CONTEXTS,
   .async_progress = {
     .active              = 0,
@@ -675,7 +675,7 @@ MPIDI_PAMI_context_init(int* threading, int *size)
 #endif
   int  numTasks;
 
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
   /*
    * ASYNC_PROGRESS_MODE_LOCKED requires context post because the async thread
    * will hold the context lock indefinitely; the only option for an application
@@ -694,7 +694,7 @@ MPIDI_PAMI_context_init(int* threading, int *size)
       MPIDI_Process.perobj.context_post.requested == 0)
     MPID_Abort (NULL, 0, 1, "'locking' async progress requires context post");
 
-#else /* MPICH_THREAD_GRANULARITY != MPICH_THREAD_GRANULARITY_PER_OBJECT */
+#else /* MPICH_THREAD_GRANULARITY != MPICH_THREAD_GRANULARITY__POBJ */
   /*
    * ASYNC_PROGRESS_MODE_LOCKED is not applicable in the "global lock" thread
    * mode. See discussion in src/mpid/pamid/src/mpid_progress.h for more
@@ -709,7 +709,7 @@ MPIDI_PAMI_context_init(int* threading, int *size)
   /* ----------------------------------
    *  Figure out the context situation
    * ---------------------------------- */
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
 
   /* Limit the number of requested contexts by the maximum number of contexts
    * allowed.  The default number of requested contexts depends on the mpich
@@ -744,7 +744,7 @@ MPIDI_PAMI_context_init(int* threading, int *size)
     --MPIDI_Process.avail_contexts;
   MPID_assert_always(MPIDI_Process.avail_contexts);
 
-#else /* (MPICH_THREAD_GRANULARITY != MPICH_THREAD_GRANULARITY_PER_OBJECT) */
+#else /* (MPICH_THREAD_GRANULARITY != MPICH_THREAD_GRANULARITY__POBJ) */
 
   /* Only a single context is supported in the 'global' mpich lock mode.
    *
@@ -1072,7 +1072,7 @@ MPIDI_PAMI_init(int* rank, int* size, int* threading)
             break;
         }
       printf("MPICH_THREAD_GRANULARITY : '%s'\n",
-             (MPICH_THREAD_GRANULARITY==MPICH_THREAD_GRANULARITY_PER_OBJECT)?"per object":"global");
+             (MPICH_THREAD_GRANULARITY==MPICH_THREAD_GRANULARITY__POBJ)?"per object":"global");
 #ifdef ASSERT_LEVEL
       printf("ASSERT_LEVEL            : %d\n", ASSERT_LEVEL);
 #else
@@ -1371,7 +1371,7 @@ int MPID_Init(int * argc,
 	}
 
 	MPIR_Process.comm_parent = comm;
-	MPIU_Assert(MPIR_Process.comm_parent != NULL);
+        MPIR_Assert(MPIR_Process.comm_parent != NULL);
 	MPL_strncpy(comm->name, "MPI_COMM_PARENT", MPI_MAX_OBJECT_NAME);
 
 	/* FIXME: Check that this intercommunicator gets freed in MPI_Finalize
diff --git a/src/mpid/pamid/src/mpid_progress.c b/src/mpid/pamid/src/mpid_progress.c
index 4a93f95..fd3d321 100644
--- a/src/mpid/pamid/src/mpid_progress.c
+++ b/src/mpid/pamid/src/mpid_progress.c
@@ -39,9 +39,9 @@ int MPIDI_Progress_register_hook(int (*progress_fn)(int*), int *id)
 {
     int mpi_errno = MPI_SUCCESS;
     int i;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PROGRESS_REGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PROGRESS_REGISTER_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PROGRESS_REGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PROGRESS_REGISTER_HOOK);
     MPIU_THREAD_CS_ENTER(ASYNC,);
 
     for (i = 0; i < MAX_PROGRESS_HOOKS; i++) {
@@ -62,7 +62,7 @@ int MPIDI_Progress_register_hook(int (*progress_fn)(int*), int *id)
 
   fn_exit:
     MPIU_THREAD_CS_EXIT(ASYNC,);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PROGRESS_REGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PROGRESS_REGISTER_HOOK);
     return mpi_errno;
 
   fn_fail:
@@ -76,19 +76,19 @@ int MPIDI_Progress_register_hook(int (*progress_fn)(int*), int *id)
 int MPIDI_Progress_deregister_hook(int id)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PROGRESS_DEREGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PROGRESS_DEREGISTER_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PROGRESS_DEREGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PROGRESS_DEREGISTER_HOOK);
     MPIU_THREAD_CS_ENTER(ASYNC,);
 
-    MPIU_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS && progress_hooks[id].func_ptr != NULL);
+    MPIR_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS && progress_hooks[id].func_ptr != NULL);
 
     progress_hooks[id].func_ptr = NULL;
     progress_hooks[id].active = FALSE;
 
   fn_exit:
     MPIU_THREAD_CS_EXIT(ASYNC,);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PROGRESS_DEREGISTER_HOOK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PROGRESS_DEREGISTER_HOOK);
     return mpi_errno;
 
   fn_fail:
@@ -102,18 +102,18 @@ int MPIDI_Progress_deregister_hook(int id)
 int MPIDI_CH3I_Progress_activate_hook(int id)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
     MPIU_THREAD_CS_ENTER(MPIDCOMM,);
 
-    MPIU_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS &&
+    MPIR_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS &&
                 progress_hooks[id].active == FALSE && progress_hooks[id].func_ptr != NULL);
     progress_hooks[id].active = TRUE;
 
   fn_exit:
     MPIU_THREAD_CS_EXIT(MPIDCOMM,);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_ACTIVATE_HOOK);
     return mpi_errno;
 
   fn_fail:
@@ -128,18 +128,18 @@ int MPIDI_CH3I_Progress_activate_hook(int id)
 int MPIDI_CH3I_Progress_deactivate_hook(int id)
 {
     int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
 
-    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
     MPIU_THREAD_CS_ENTER(MPIDCOMM,);
 
-    MPIU_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS &&
+    MPIR_Assert(id >= 0 && id < MAX_PROGRESS_HOOKS &&
                 progress_hooks[id].active == TRUE && progress_hooks[id].func_ptr != NULL);
     progress_hooks[id].active = FALSE;
 
   fn_exit:
     MPIU_THREAD_CS_EXIT(MPIDCOMM,);
-    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_DEACTIVATE_HOOK);
     return mpi_errno;
 
   fn_fail:
@@ -159,7 +159,7 @@ MPIDI_Progress_init()
   pamix_progress_function progress_fn = MPIDI_Progress_async_poll;
   uintptr_t i;
 
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
   /* In the "per object" mpich lock mode the only possible progress functions
    * are the "context lock" trigger progress function and the 'NULL' progress
    * function.
@@ -227,7 +227,7 @@ MPIDI_Progress_async_start(pami_context_t context, void *cookie)
    */
   MPIU_THREAD_CS_ENTER(ASYNC,);
 
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
   if (MPIDI_Process.async_progress.active == 0)
     {
       /* Asynchronous progress was completely disabled and now async progress
@@ -260,7 +260,7 @@ MPIDI_Progress_async_end  (pami_context_t context, void *cookie)
 
   MPIDI_Process.async_progress.active -= 1;
 
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
   if (MPIDI_Process.async_progress.active == 0)
     {
       /* Asynchronous progress is now completely disabled on all contexts. */
@@ -298,7 +298,7 @@ MPIDI_Progress_async_poll (pami_context_t context, void *cookie)
     {
       for (i = 0; i < MAX_PROGRESS_HOOKS; i++) {
         if (progress_hooks[i].active == TRUE) {
-          MPIU_Assert(progress_hooks[i].func_ptr != NULL);
+          MPIR_Assert(progress_hooks[i].func_ptr != NULL);
           progress_hooks[i].func_ptr(&made_progress);
         }
       }
@@ -330,7 +330,7 @@ MPIDI_Progress_async_poll_perobj (pami_context_t context, void *cookie)
 
   for (i = 0; i < MAX_PROGRESS_HOOKS; i++) {
     if (progress_hooks[i].active == TRUE) {
-      MPIU_Assert(progress_hooks[i].func_ptr != NULL);
+      MPIR_Assert(progress_hooks[i].func_ptr != NULL);
       progress_hooks[i].func_ptr(&made_progress);
     }
   }
diff --git a/src/mpid/pamid/src/mpid_progress.h b/src/mpid/pamid/src/mpid_progress.h
index aace473..be2b7fc 100644
--- a/src/mpid/pamid/src/mpid_progress.h
+++ b/src/mpid/pamid/src/mpid_progress.h
@@ -239,7 +239,7 @@ MPID_Progress_wait_inline(unsigned loop_count)
 {
   pami_result_t rc = 0;
 
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
   /*
    * In the "per object" thread mode the "global" lock is defined as a noop and
    * therefore no locks are held at this point.
diff --git a/src/mpid/pamid/src/mpid_request.c b/src/mpid/pamid/src/mpid_request.c
index 1102cda..0a1607d 100644
--- a/src/mpid/pamid/src/mpid_request.c
+++ b/src/mpid/pamid/src/mpid_request.c
@@ -22,7 +22,7 @@
 #include <mpidimpl.h>
 
 #ifndef MPIR_REQUEST_PREALLOC
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__GLOBAL)
 #define  MPIR_REQUEST_PREALLOC 16
 #elif (MPIU_HANDLE_ALLOCATION_METHOD == MPIU_HANDLE_ALLOCATION_THREAD_LOCAL)
 #define  MPIR_REQUEST_PREALLOC 512  //Have direct more reqyests for all threads
@@ -40,7 +40,7 @@
 
 /* these are referenced by src/mpi/pt2pt/wait.c in PMPI_Wait! */
 MPIR_Request MPIR_Request_direct[MPIR_REQUEST_PREALLOC] __attribute__((__aligned__(64)));
-MPIU_Object_alloc_t MPIR_Request_mem =
+MPIR_Object_alloc_t MPIR_Request_mem =
   {
     0, 0, 0, 0, MPIR_REQUEST, sizeof(MPIR_Request),
     MPIR_Request_direct,
@@ -55,11 +55,11 @@ void MPIDI_Request_allocate_pool()
   MPIR_Request *prev, *cur;
   /* batch allocate a linked list of requests */
   MPIU_THREAD_CS_ENTER(HANDLEALLOC,);
-  prev = MPIU_Handle_obj_alloc_unsafe(&MPIR_Request_mem);
+  prev = MPIR_Handle_obj_alloc_unsafe(&MPIR_Request_mem);
   MPID_assert(prev != NULL);
   prev->mpid.next = NULL;
   for (i = 1; i < MPIR_REQUEST_TLS_MAX; ++i) {
-    cur = MPIU_Handle_obj_alloc_unsafe(&MPIR_Request_mem);
+    cur = MPIR_Handle_obj_alloc_unsafe(&MPIR_Request_mem);
     MPID_assert(cur != NULL);
     cur->mpid.next = prev;
     prev = cur;
@@ -75,7 +75,7 @@ void
 MPIDI_Request_uncomplete(MPIR_Request *req)
 {
   int count;
-  MPIU_Object_add_ref(req);
+  MPIR_Object_add_ref(req);
   MPIR_cc_incr(req->cc_ptr, &count);
 }
 
diff --git a/src/mpid/pamid/src/mpid_request.h b/src/mpid/pamid/src/mpid_request.h
index c260864..ad283cf 100644
--- a/src/mpid/pamid/src/mpid_request.h
+++ b/src/mpid/pamid/src/mpid_request.h
@@ -37,7 +37,7 @@
 #define MPID_Request_discard   MPID_Request_discard_inline
 
 
-extern MPIU_Object_alloc_t MPIR_Request_mem;
+extern MPIR_Object_alloc_t MPIR_Request_mem;
 #if TOKEN_FLOW_CONTROL
 extern void MPIDI_mm_free(void *,size_t);
 #endif
@@ -118,7 +118,7 @@ void    MPIDI_Request_allocate_pool();
     }                                                                   \
   else                                                                  \
     {                                                                   \
-      MPIU_Handle_obj_free(&MPIR_Request_mem, req);                     \
+      MPIR_Handle_obj_free(&MPIR_Request_mem, req);                     \
     }                                                                   \
 })
 
@@ -126,12 +126,12 @@ void    MPIDI_Request_allocate_pool();
 
 #  define MPIDI_Request_tls_alloc(req)                                  \
 ({                                                                      \
-  (req) = MPIU_Handle_obj_alloc(&MPIR_Request_mem);                     \
+  (req) = MPIR_Handle_obj_alloc(&MPIR_Request_mem);                     \
   if (req == NULL)                                                      \
     MPID_Abort(NULL, MPI_ERR_NO_SPACE, -1, "Cannot allocate Request");  \
 })
 
-#  define MPIDI_Request_tls_free(req) MPIU_Handle_obj_free(&MPIR_Request_mem, (req))
+#  define MPIDI_Request_tls_free(req) MPIR_Handle_obj_free(&MPIR_Request_mem, (req))
 
 #endif
 
@@ -176,7 +176,7 @@ MPIDI_Request_create2_fast()
 {
   MPIR_Request * req;
   req = MPIDI_Request_create_basic();
-  MPIU_Object_set_ref(req, 2);
+  MPIR_Object_set_ref(req, 2);
 
   return req;
 }
@@ -222,7 +222,7 @@ MPID_Request_create_inline()
 {
   MPIR_Request * req;
   req = MPIDI_Request_create_basic();
-  MPIU_Object_set_ref(req, 1);
+  MPIR_Object_set_ref(req, 1);
 
   MPIDI_Request_initialize(req);
   req->comm=NULL;
@@ -239,7 +239,7 @@ MPIDI_Request_create2()
 {
   MPIR_Request * req;
   req = MPIR_Request_create();
-  MPIU_Object_set_ref(req, 2);
+  MPIR_Object_set_ref(req, 2);
 
   return req;
 }
@@ -249,7 +249,7 @@ MPIDI_Request_create1()
 {
   MPIR_Request * req;
   req = MPIR_Request_create();
-  MPIU_Object_set_ref(req, 1);
+  MPIR_Object_set_ref(req, 1);
 
   return req;
 }
@@ -272,7 +272,7 @@ MPID_Request_free_inline(MPIR_Request *req)
 {
   int count;
   MPID_assert(HANDLE_GET_MPI_KIND(req->handle) == MPIR_REQUEST);
-  MPIU_Object_release_ref(req, &count);
+  MPIR_Object_release_ref(req, &count);
   MPID_assert(count >= 0);
 
 
diff --git a/src/mpid/pamid/src/mpid_vc.c b/src/mpid/pamid/src/mpid_vc.c
index 6f57cf3..fed17b7 100644
--- a/src/mpid/pamid/src/mpid_vc.c
+++ b/src/mpid/pamid/src/mpid_vc.c
@@ -32,7 +32,7 @@ extern int mpidi_dynamic_tasking;
  */
 struct MPIDI_VCRT
 {
-  MPIU_OBJECT_HEADER;
+  MPIR_OBJECT_HEADER;
   unsigned size;          /**< Number of entries in the table */
   MPID_VCR *vcr_table;  /**< Array of virtual connection references */
 };
@@ -75,7 +75,7 @@ int MPID_VCRT_Create(int size, MPID_VCRT *vcrt_ptr)
     }
     if (vcrt != NULL)
     {
-        MPIU_Object_set_ref(vcrt, 1);
+        MPIR_Object_set_ref(vcrt, 1);
         vcrt->size = size;
         *vcrt_ptr = vcrt;
         result = MPI_SUCCESS;
@@ -89,7 +89,7 @@ int MPID_VCRT_Create(int size, MPID_VCRT *vcrt_ptr)
 
 int MPID_VCRT_Add_ref(MPID_VCRT vcrt)
 {
-    MPIU_Object_add_ref(vcrt);
+    MPIR_Object_add_ref(vcrt);
     return MPI_SUCCESS;
 }
 
@@ -97,7 +97,7 @@ int MPID_VCRT_Release(MPID_VCRT vcrt, int isDisconnect)
 {
     int count, i;
 
-    MPIU_Object_release_ref(vcrt, &count);
+    MPIR_Object_release_ref(vcrt, &count);
 
     if (count == 0) {
 #ifdef DYNAMIC_TASKING
@@ -111,7 +111,7 @@ int MPID_VCRT_Release(MPID_VCRT vcrt, int isDisconnect)
                 vcr->pg_rank == MPIDI_Process.my_pg_rank)
               {
 	        TRACE_ERR("before MPIDI_PG_release_ref on vcr=%x pg=%x pg=%s inuse=%d\n", vcr, vcr->pg, (vcr->pg)->id, inuse);
-                inuse=MPIU_Object_get_ref(vcr->pg);
+                inuse=MPIR_Object_get_ref(vcr->pg);
                 MPIDI_PG_release_ref(vcr->pg, &inuse);
                 if (inuse == 0)
                  {
@@ -120,7 +120,7 @@ int MPID_VCRT_Release(MPID_VCRT vcrt, int isDisconnect)
                  }
                  continue;
               }
-            inuse=MPIU_Object_get_ref(vcr->pg);
+            inuse=MPIR_Object_get_ref(vcr->pg);
 
             MPIDI_PG_release_ref(vcr->pg, &inuse);
             if (inuse == 0)
@@ -315,7 +315,7 @@ int MPID_GPID_GetAllInComm( MPIR_Comm *comm_ptr, int local_size,
     int lastPGID = -1, pgid;
     MPID_VCR vc;
 
-    MPIU_Assert(comm_ptr->local_size == local_size);
+    MPIR_Assert(comm_ptr->local_size == local_size);
 
     if(mpidi_dynamic_tasking) {
       *singlePG = 1;
diff --git a/src/mpid/pamid/src/mpidi_env.c b/src/mpid/pamid/src/mpidi_env.c
index e00282a..740bcee 100644
--- a/src/mpid/pamid/src/mpidi_env.c
+++ b/src/mpid/pamid/src/mpidi_env.c
@@ -547,7 +547,7 @@ MPIDI_Env_setup(int rank, int requested)
     unsigned value = (unsigned)-1;
     char* names[] = {"PAMID_THREAD_MULTIPLE", NULL};
     ENV_Unsigned(names, &value, 1, &found_deprecated_env_var, rank);
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
     /* Any mpich work function posted to a context that eventually initiates
      * other communcation transfers will hang on a lock attempt if the
      * 'context post' feature is not enabled. Until this code flow is fixed
@@ -610,7 +610,7 @@ MPIDI_Env_setup(int rank, int requested)
 
     if (value != -1)
     {
-#if (MPICH_THREAD_GRANULARITY != MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY != MPICH_THREAD_GRANULARITY__POBJ)
       /* The 'global' mpich lock mode only supports a single context.
        * See discussion in mpich/src/mpid/pamid/src/mpid_init.c for more
        * information.
@@ -643,7 +643,7 @@ MPIDI_Env_setup(int rank, int requested)
     ENV_Unsigned(names, &value, 1, &found_deprecated_env_var, rank);
     if (value != -1)
     {
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
       MPIDI_Process.perobj.context_post.requested = (value > 0);
 #else
       found_deprecated_env_var++;
@@ -665,7 +665,7 @@ MPIDI_Env_setup(int rank, int requested)
     {
       if (value != ASYNC_PROGRESS_MODE_DISABLED)
       {
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
         if (value == ASYNC_PROGRESS_MODE_LOCKED &&
             MPIDI_Process.perobj.context_post.requested == 0)
         {
@@ -679,7 +679,7 @@ MPIDI_Env_setup(int rank, int requested)
             fprintf(stderr, "The environment variable \"PAMID_ASYNC_PROGRESS=1\" requires \"PAMID_CONTEXT_POST=1\".\n");
         }
 
-#else /* (MPICH_THREAD_GRANULARITY != MPICH_THREAD_GRANULARITY_PER_OBJECT) */
+#else /* (MPICH_THREAD_GRANULARITY != MPICH_THREAD_GRANULARITY__POBJ) */
         if (value == ASYNC_PROGRESS_MODE_LOCKED)
         {
           /* The only valid async progress mode when using the 'global' mpich
@@ -1091,7 +1091,7 @@ MPIDI_Env_setup(int rank, int requested)
         MPIDI_Process.mp_interrupts=user_interrupts;
         MPIDI_Process.perobj.context_post.requested = 0;
         MPIDI_Process.async_progress.mode    = ASYNC_PROGRESS_MODE_TRIGGER;
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
         MPIDI_Process.avail_contexts         = MPIDI_MAX_CONTEXTS;
 #else
         MPIDI_Process.avail_contexts         = 1;
diff --git a/src/mpid/pamid/src/mpidi_pami_datatype.c b/src/mpid/pamid/src/mpidi_pami_datatype.c
index 0296dd6..296ef4f 100644
--- a/src/mpid/pamid/src/mpidi_pami_datatype.c
+++ b/src/mpid/pamid/src/mpidi_pami_datatype.c
@@ -44,7 +44,7 @@ void MPIDI_PAMI_datatype_commit_hook (MPI_Datatype *ptr)
       pami_result_t pami_dtop_result;
       datatype_ptr->device_datatype = (pami_type_t *) MPL_malloc(sizeof(pami_type_t));
       pami_dtop_result = PAMI_Type_create ((pami_type_t *)datatype_ptr->device_datatype);
-      MPIU_Assert(pami_dtop_result == PAMI_SUCCESS);
+      MPIR_Assert(pami_dtop_result == PAMI_SUCCESS);
 
       /* Flatten the non-contiguous data type into arrays describing the contiguous chunks.
        */
@@ -66,13 +66,13 @@ void MPIDI_PAMI_datatype_commit_hook (MPI_Datatype *ptr)
         else
           cursor_offset = (size_t) dt_offset_array[i] - (size_t)dt_offset_array[i-1];
         pami_dtop_result = PAMI_Type_add_simple (*(pami_type_t*)(datatype_ptr->device_datatype), num_bytes_this_entry, cursor_offset,  1, 0);
-        MPIU_Assert(pami_dtop_result == PAMI_SUCCESS);
+        MPIR_Assert(pami_dtop_result == PAMI_SUCCESS);
       }
 
       /* Complete the PAMI datatype and free arrays.
        */
       pami_dtop_result = PAMI_Type_complete (*(pami_type_t*)(datatype_ptr->device_datatype),1);
-      MPIU_Assert(pami_dtop_result == PAMI_SUCCESS);
+      MPIR_Assert(pami_dtop_result == PAMI_SUCCESS);
       MPL_free(dt_offset_array);
       MPL_free(dt_size_array);
     }
@@ -92,7 +92,7 @@ void MPIDI_PAMI_datatype_destroy_hook (MPIDU_Datatype*ptr)
       if (ptr->device_datatype) {
         pami_result_t pami_dtop_result;
         pami_dtop_result = PAMI_Type_destroy ((pami_type_t *)ptr->device_datatype);
-        MPIU_Assert(pami_dtop_result == PAMI_SUCCESS);
+        MPIR_Assert(pami_dtop_result == PAMI_SUCCESS);
         MPL_free(ptr->device_datatype);
       }
     }
@@ -119,7 +119,7 @@ void MPIDI_PAMI_datatype_dup_hook (MPI_Datatype *ptr)
       pami_result_t pami_dtop_result;
       datatype_ptr->device_datatype = (pami_type_t *) MPL_malloc(sizeof(pami_type_t));
       pami_dtop_result = PAMI_Type_create ((pami_type_t *)datatype_ptr->device_datatype);
-      MPIU_Assert(pami_dtop_result == PAMI_SUCCESS);
+      MPIR_Assert(pami_dtop_result == PAMI_SUCCESS);
 
       /* Flatten the non-contiguous data type into arrays describing the contiguous chunks.
        */
@@ -141,13 +141,13 @@ void MPIDI_PAMI_datatype_dup_hook (MPI_Datatype *ptr)
         else
           cursor_offset = (size_t) dt_offset_array[i] - (size_t)dt_offset_array[i-1];
         pami_dtop_result = PAMI_Type_add_simple (*(pami_type_t*)(datatype_ptr->device_datatype), num_bytes_this_entry, cursor_offset,  1, 0);
-        MPIU_Assert(pami_dtop_result == PAMI_SUCCESS);
+        MPIR_Assert(pami_dtop_result == PAMI_SUCCESS);
       }
 
       /* Complete the PAMI datatype and free arrays.
        */
       pami_dtop_result = PAMI_Type_complete (*(pami_type_t*)(datatype_ptr->device_datatype),1);
-      MPIU_Assert(pami_dtop_result == PAMI_SUCCESS);
+      MPIR_Assert(pami_dtop_result == PAMI_SUCCESS);
       MPL_free(dt_offset_array);
       MPL_free(dt_size_array);
     }
diff --git a/src/mpid/pamid/src/onesided/mpid_win_create.c b/src/mpid/pamid/src/onesided/mpid_win_create.c
index f218d10..ae7a062 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_create.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_create.c
@@ -45,7 +45,7 @@ MPIDI_Win_init( MPI_Aint length,
   /* ----------------------------------------- */
   /*  Setup the common sections of the window  */
   /* ----------------------------------------- */
-  MPIR_Win *win = (MPIR_Win*)MPIU_Handle_obj_alloc(&MPIR_Win_mem);
+  MPIR_Win *win = (MPIR_Win*)MPIR_Handle_obj_alloc(&MPIR_Win_mem);
 
   MPIU_ERR_CHKANDSTMT(win == NULL, mpi_errno, MPI_ERR_NO_MEM,
                      return mpi_errno, "**nomem");
diff --git a/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c b/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
index 816e2fb..c3e6583 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_fetch_and_op.c
@@ -71,21 +71,21 @@ MPIDI_WinAtomicCB(pami_context_t    context,
   if (ahdr->atomic_type == MPIDI_WIN_REQUEST_COMPARE_AND_SWAP) {
 
     //overwrite value with result in ack_hdr
-    MPIU_Memcpy(ack_hdr.buf, dest_addr, len);
+    MPIR_Memcpy(ack_hdr.buf, dest_addr, len);
     
     if (MPIR_Compare_equal (&ahdr->test, dest_addr, ahdr->datatype))
-      MPIU_Memcpy(dest_addr, ahdr->buf, len);      
+      MPIR_Memcpy(dest_addr, ahdr->buf, len);
   }    
   else if (ahdr->atomic_type == MPIDI_WIN_REQUEST_FETCH_AND_OP) {
     //overwrite value with result
-    MPIU_Memcpy(ack_hdr.buf, dest_addr, len);
+    MPIR_Memcpy(ack_hdr.buf, dest_addr, len);
 
     MPI_User_function *uop;
     int one = 1;
     uop = MPIR_OP_HDL_TO_FN(ahdr->op);
 
     if (ahdr->op == MPI_REPLACE) 
-      MPIU_Memcpy(dest_addr, ahdr->buf, len);
+      MPIR_Memcpy(dest_addr, ahdr->buf, len);
     else if (ahdr->op == MPI_NO_OP);
     else
       (*uop) ((void *)ahdr->buf, dest_addr, &one, &ahdr->datatype);
@@ -126,7 +126,7 @@ MPIDI_WinAtomicAckCB(pami_context_t    context,
   //We have a valid result addr
   if (ahdr->result_addr != NULL) {
     len = MPIDU_Datatype_get_basic_size (ahdr->datatype);
-    MPIU_Memcpy(ahdr->result_addr, ahdr->buf, len);
+    MPIR_Memcpy(ahdr->result_addr, ahdr->buf, len);
   }
     
   MPIDI_Win_DoneCB(context, ahdr->request_addr, PAMI_SUCCESS);
@@ -145,9 +145,9 @@ MPIDI_Atomic (pami_context_t   context,
   len = MPIDU_Datatype_get_basic_size (req->origin.datatype);
   assert(len <= MAX_ATOMIC_TYPE_SIZE);
   if (req->buffer)
-    MPIU_Memcpy(atomic_hdr.buf, req->buffer, len);
+    MPIR_Memcpy(atomic_hdr.buf, req->buffer, len);
   if (req->type == MPIDI_WIN_REQUEST_COMPARE_AND_SWAP)
-    MPIU_Memcpy(atomic_hdr.test, req->compare_buffer, len);
+    MPIR_Memcpy(atomic_hdr.test, req->compare_buffer, len);
   
   atomic_hdr.result_addr = req->user_buffer;
   atomic_hdr.remote_addr = req->win->mpid.info[req->target.rank].base_addr + req->offset;
diff --git a/src/mpid/pamid/src/onesided/mpid_win_free.c b/src/mpid/pamid/src/onesided/mpid_win_free.c
index 740834f..2533706 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_free.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_free.c
@@ -125,7 +125,7 @@ MPID_Win_free(MPIR_Win **win_ptr)
 
   MPIR_Comm_release(win->comm_ptr, 0);
 
-  MPIU_Handle_obj_free(&MPIR_Win_mem, win);
+  MPIR_Handle_obj_free(&MPIR_Win_mem, win);
 
 fn_fail:
   return mpi_errno;
diff --git a/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c b/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c
index 54b90da..d1a6ae0 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_get_accumulate.c
@@ -45,7 +45,7 @@ MPIDI_Fetch_data_op(const void   * origin_addr,
         dest_addr = (char *) base + disp_unit * target_disp;
 
         MPIDU_Datatype_get_size_macro(origin_datatype, len);
-        MPIU_Memcpy(result_addr, dest_addr, len);
+        MPIR_Memcpy(result_addr, dest_addr, len);
         if (op != MPI_NO_OP) {
             uop = MPIR_OP_HDL_TO_FN(op);
             one = 1;
diff --git a/src/mpid/pamid/src/onesided/mpid_win_get_info.c b/src/mpid/pamid/src/onesided/mpid_win_get_info.c
index 1cb1156..5768e99 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_get_info.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_get_info.c
@@ -96,7 +96,7 @@ MPID_Win_get_info(MPIR_Win     *win,
     int mpi_errno = MPI_SUCCESS;
 
     /* Allocate an empty info object */
-    mpi_errno = MPIU_Info_alloc(info_p);
+    mpi_errno = MPIR_Info_alloc(info_p);
     MPID_assert(mpi_errno == MPI_SUCCESS);
     mpi_errno = MPIDI_Win_get_info(win, info_p);
     MPID_assert(mpi_errno == MPI_SUCCESS);
diff --git a/src/mpid/pamid/src/pt2pt/mpid_cancel.c b/src/mpid/pamid/src/pt2pt/mpid_cancel.c
index d40443e..9b8d30f 100644
--- a/src/mpid/pamid/src/pt2pt/mpid_cancel.c
+++ b/src/mpid/pamid/src/pt2pt/mpid_cancel.c
@@ -98,11 +98,11 @@ MPID_Cancel_send(MPIR_Request * sreq)
     return MPI_SUCCESS;
 
   MPIDI_Request_uncomplete(sreq);
-  /* TRACE_ERR("Posting cancel for request=%p   cc(curr)=%d ref(curr)=%d\n", sreq, val+1, MPIU_Object_get_ref(sreq)); */
+  /* TRACE_ERR("Posting cancel for request=%p   cc(curr)=%d ref(curr)=%d\n", sreq, val+1, MPIR_Object_get_ref(sreq)); */
 
   pami_context_t context = MPIDI_Context_local(sreq);
 
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
   if (likely(MPIDI_Process.perobj.context_post.active > 0))
     {
       /* This leaks intentionally.  At this time, the amount of work
@@ -123,7 +123,7 @@ MPID_Cancel_send(MPIR_Request * sreq)
        MPIDI_CancelReq_post(context, sreq);
        PAMI_Context_unlock(context);
     }
-#else /* (MPICH_THREAD_GRANULARITY != MPICH_THREAD_GRANULARITY_PER_OBJECT) */
+#else /* (MPICH_THREAD_GRANULARITY != MPICH_THREAD_GRANULARITY__POBJ) */
   /*
    * It is not necessary to lock the context before access in the "global" mpich
    * lock mode because all application threads must first acquire the global
diff --git a/src/mpid/pamid/src/pt2pt/mpid_isend.h b/src/mpid/pamid/src/pt2pt/mpid_isend.h
index c2aa646..7928908 100644
--- a/src/mpid/pamid/src/pt2pt/mpid_isend.h
+++ b/src/mpid/pamid/src/pt2pt/mpid_isend.h
@@ -30,7 +30,7 @@
 static inline unsigned
 MPIDI_Context_hash(pami_task_t rank, unsigned ctxt, unsigned bias, unsigned ncontexts)
 {
-#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT)
+#if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ)
   return (( rank + ctxt + bias ) & (ncontexts-1));
 #else
   /* The 'global' mpich lock mode only supports a single context. See
diff --git a/src/mpid/pamid/src/pt2pt/persistent/mpid_startall.c b/src/mpid/pamid/src/pt2pt/persistent/mpid_startall.c
index ed8639f..dbb1c3a 100644
--- a/src/mpid/pamid/src/pt2pt/persistent/mpid_startall.c
+++ b/src/mpid/pamid/src/pt2pt/persistent/mpid_startall.c
@@ -85,7 +85,7 @@ int MPID_Startall(int count, MPIR_Request * requests[])
              * us.
              */
             if (preq->partner_request != NULL)
-              MPIU_Object_add_ref(preq->partner_request);
+              MPIR_Object_add_ref(preq->partner_request);
             break;
           }
 
diff --git a/src/pm/util/simple_pmiutil2.h b/src/pm/util/simple_pmiutil2.h
index ed0b741..f8f877c 100644
--- a/src/pm/util/simple_pmiutil2.h
+++ b/src/pm/util/simple_pmiutil2.h
@@ -8,7 +8,7 @@
 #define PMIU_MAXLINE 1024
 #define PMIU_IDSIZE    32
 
-/* we don't have access to MPIU_Assert and friends here in the PMI code */
+/* we don't have access to MPIR_Assert and friends here in the PMI code */
 #if defined(HAVE_ASSERT_H)
 #  include <assert.h>
 #  define PMIU_Assert(expr) assert(expr)
diff --git a/src/pmi/pmi2/simple/pmi2compat.h b/src/pmi/pmi2/simple/pmi2compat.h
index d897d75..c834520 100644
--- a/src/pmi/pmi2/simple/pmi2compat.h
+++ b/src/pmi/pmi2/simple/pmi2compat.h
@@ -10,8 +10,8 @@
 #define PMI2U_Free MPL_free
 #define PMI2U_Strdup MPL_strdup
 #define PMI2U_Strnapp MPL_strnapp
-#define PMI2U_Assert MPIU_Assert
+#define PMI2U_Assert MPIR_Assert
 #define PMI2U_Exit MPL_exit
 #define PMI2U_Info MPIR_Info
-#define PMI2U_Memcpy MPIU_Memcpy
+#define PMI2U_Memcpy MPIR_Memcpy
 
diff --git a/src/pmi/simple/simple_pmiutil.h b/src/pmi/simple/simple_pmiutil.h
index 020398d..7404ad0 100644
--- a/src/pmi/simple/simple_pmiutil.h
+++ b/src/pmi/simple/simple_pmiutil.h
@@ -8,7 +8,7 @@
 #define PMIU_MAXLINE 1024
 #define PMIU_IDSIZE    32
 
-/* we don't have access to MPIU_Assert and friends here in the PMI code */
+/* we don't have access to MPIR_Assert and friends here in the PMI code */
 #if defined(HAVE_ASSERT_H)
 #  include <assert.h>
 #  define PMIU_Assert(expr) assert(expr)
diff --git a/src/util/logging/rlog/rlogtime.c b/src/util/logging/rlog/rlogtime.c
index c166097..bc035b6 100644
--- a/src/util/logging/rlog/rlogtime.c
+++ b/src/util/logging/rlog/rlogtime.c
@@ -136,11 +136,11 @@ static char *get_random_color_str(void)
 }
 
 static int s_RLOG_Initialized = 0;
-int MPIU_Timer_init(int rank, int size)
+int MPII_Timer_init(int rank, int size)
 {
     if (s_RLOG_Initialized)
     {
-	/* MPIU_Timer_init already called. */
+	/* MPII_Timer_init already called. */
 	return -1;
     }
     g_pRLOG = RLOG_InitLog(rank, size);
@@ -156,13 +156,13 @@ int MPIU_Timer_init(int rank, int size)
     /* arrow state */
     RLOG_DescribeState(g_pRLOG, RLOG_ARROW_EVENT_ID, "Arrow", "255 255 255");
 
-    MPIR_Describe_timer_states();
+    MPII_Describe_timer_states();
 
     s_RLOG_Initialized = 1;
     return MPI_SUCCESS;
 }
 
-int MPIU_Timer_finalize()
+int MPII_Timer_finalize()
 {
     if (g_pRLOG == NULL)
 	return -1;
@@ -194,7 +194,7 @@ int MPIU_Timer_finalize()
 
 /* This routine makes the RLOG_DescribeState call for each name */
 #include "state_names.h"
-int MPIR_Describe_timer_states( void )
+int MPII_Describe_timer_states( void )
 {
     MPIU_State_defs *def = mpich_states;
     
diff --git a/src/util/mem/handlemem.c b/src/util/mem/handlemem.c
index e12a91d..cea64ba 100644
--- a/src/util/mem/handlemem.c
+++ b/src/util/mem/handlemem.c
@@ -40,7 +40,7 @@ static void print_handle( int handle );
 
 #ifdef MPICH_DEBUG_HANDLEALLOC
 static int check_handles_on_finalize( void * );
-static const char *object_name( MPIU_Object_alloc_t * );
+static const char *object_name( MPIR_Object_alloc_t * );
 #endif
 
 /* This is the utility file for info that contains routines used to 
@@ -56,7 +56,7 @@ static const char *object_name( MPIU_Object_alloc_t * );
    // Static declaration of the information about the block
    // Define the number of preallocated entries # omitted)
    define MPID_<OBJ>_PREALLOC 256
-   MPIU_Object_alloc_t MPID_<obj>_mem = { 0, 0, 0, 0, MPID_<obj>, 
+   MPIR_Object_alloc_t MPID_<obj>_mem = { 0, 0, 0, 0, MPID_<obj>,
 				      sizeof(MPID_<obj>), MPID_<obj>_direct,
                                       MPID_<OBJ>_PREALLOC, };
 
@@ -109,7 +109,7 @@ static int handle_free( void *((*indirect)[]), int indirect_size )
                           "[MPICH handle: objptr=%p handle=0x%x %s/%s]",                        \
                           (objptr_), (objptr_)->handle,                                          \
                           ((is_direct_) ? "DIRECT" : "INDIRECT"),                                \
-                          MPIU_Handle_get_kind_str(handle_type_));                               \
+                          MPIR_Handle_get_kind_str(handle_type_));                               \
             /* we don't keep track of the block descriptor because the handle */                 \
             /* values never change once allocated */                                             \
             MPL_VG_CREATE_BLOCK((objptr_), (objsize_), desc_str);                               \
@@ -125,14 +125,14 @@ static void *handle_direct_init(void *direct,
 			      int handle_type)
 {
     int                i;
-    MPIU_Handle_common *hptr=0;
+    MPIR_Handle_common *hptr=0;
     char               *ptr = (char *)direct;
 
     for (i=0; i<direct_size; i++) {
 	/* printf( "Adding %p in %d\n", ptr, handle_type ); */
         /* First cast to (void*) to avoid false warnings about alignment
            (consider that a requirement of the input parameters) */
-	hptr = (MPIU_Handle_common *)(void *)ptr;
+	hptr = (MPIR_Handle_common *)(void *)ptr;
 	ptr  = ptr + obj_size;
 	hptr->next = ptr;
 	hptr->handle = ((unsigned)HANDLE_KIND_DIRECT << HANDLE_KIND_SHIFT) | 
@@ -155,7 +155,7 @@ static void *handle_indirect_init( void *(**indirect)[],
 					int handle_type )
 {
     void               *block_ptr;
-    MPIU_Handle_common *hptr=0;
+    MPIR_Handle_common *hptr=0;
     char               *ptr;
     int                i;
 
@@ -185,7 +185,7 @@ static void *handle_indirect_init( void *(**indirect)[],
     ptr = (char *)block_ptr;
     for (i=0; i<indirect_num_indices; i++) {
         /* Cast to (void*) to avoid false warning about alignment */
-	hptr       = (MPIU_Handle_common *)(void*)ptr;
+	hptr       = (MPIR_Handle_common *)(void*)ptr;
 	ptr        = ptr + obj_size;
 	hptr->next = ptr;
 	hptr->handle   = ((unsigned)HANDLE_KIND_INDIRECT << HANDLE_KIND_SHIFT) | 
@@ -213,7 +213,7 @@ static void *handle_indirect_init( void *(**indirect)[],
 
 static int handle_finalize( void *objmem_ptr )
 {
-    MPIU_Object_alloc_t *objmem = (MPIU_Object_alloc_t *)objmem_ptr;
+    MPIR_Object_alloc_t *objmem = (MPIR_Object_alloc_t *)objmem_ptr;
 
     (void)handle_free( objmem->indirect, objmem->indirect_size );
     /* This does *not* remove any Info objects that the user created 
@@ -231,7 +231,7 @@ static int handle_finalize( void *objmem_ptr )
    guaranteed to be single threaded).  When used by the obj_alloc, it
    adds unnecessary overhead, particularly when MPI is single threaded */
 
-static void handle_obj_alloc_complete(MPIU_Object_alloc_t *objmem,
+static void handle_obj_alloc_complete(MPIR_Object_alloc_t *objmem,
 				    int initialized)
 {
     if (initialized) {
@@ -249,7 +249,7 @@ static void handle_obj_alloc_complete(MPIU_Object_alloc_t *objmem,
 }
 
 /*+
-  MPIU_Handle_obj_alloc - Create an object using the handle allocator
+  MPIR_Handle_obj_alloc - Create an object using the handle allocator
 
 Input Parameters:
 . objmem - Pointer to object memory block.
@@ -274,25 +274,25 @@ Input Parameters:
 
   +*/
 #undef FUNCNAME
-#define FUNCNAME MPIU_Handle_obj_alloc
+#define FUNCNAME MPIR_Handle_obj_alloc
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-void *MPIU_Handle_obj_alloc(MPIU_Object_alloc_t *objmem)
+void *MPIR_Handle_obj_alloc(MPIR_Object_alloc_t *objmem)
 {
     void *ret;
     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_HANDLE_MUTEX);
-    ret = MPIU_Handle_obj_alloc_unsafe(objmem);
+    ret = MPIR_Handle_obj_alloc_unsafe(objmem);
     MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_HANDLE_MUTEX);
     return ret;
 }
 
 #undef FUNCNAME
-#define FUNCNAME MPIU_Handle_obj_alloc_unsafe
+#define FUNCNAME MPIR_Handle_obj_alloc_unsafe
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-void *MPIU_Handle_obj_alloc_unsafe(MPIU_Object_alloc_t *objmem)
+void *MPIR_Handle_obj_alloc_unsafe(MPIR_Object_alloc_t *objmem)
 {
-    MPIU_Handle_common *ptr;
+    MPIR_Handle_common *ptr;
 
     if (objmem->avail) {
 	ptr	      = objmem->avail;
@@ -391,7 +391,7 @@ void *MPIU_Handle_obj_alloc_unsafe(MPIU_Object_alloc_t *objmem)
 }
 
 /*+
-  MPIU_Handle_obj_free - Free an object allocated with MPID_Handle_obj_new
+  MPIR_Handle_obj_free - Free an object allocated with MPID_Handle_obj_new
 
 Input Parameters:
 + objmem - Pointer to object block
@@ -401,9 +401,9 @@ Input Parameters:
   This routine assumes that only a single thread calls it at a time; this
   is true for the SINGLE_CS approach to thread safety
   +*/
-void MPIU_Handle_obj_free( MPIU_Object_alloc_t *objmem, void *object )
+void MPIR_Handle_obj_free( MPIR_Object_alloc_t *objmem, void *object )
 {
-    MPIU_Handle_common *obj = (MPIU_Handle_common *)object;
+    MPIR_Handle_common *obj = (MPIR_Handle_common *)object;
 
     MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_HANDLE_MUTEX);
 
@@ -411,8 +411,8 @@ void MPIU_Handle_obj_free( MPIU_Object_alloc_t *objmem, void *object )
                                      "Freeing object ptr %p (0x%08x kind=%s) refcount=%d",
                                      (obj),
                                      (obj)->handle,
-                                     MPIU_Handle_get_kind_str(HANDLE_GET_MPI_KIND((obj)->handle)),
-                                     MPIU_Object_get_ref(obj)));
+                                     MPIR_Handle_get_kind_str(HANDLE_GET_MPI_KIND((obj)->handle)),
+                                     MPIR_Object_get_ref(obj)));
 
 #ifdef USE_MEMORY_TRACING
     {
@@ -429,7 +429,7 @@ void MPIU_Handle_obj_free( MPIU_Object_alloc_t *objmem, void *object )
 
         MPL_VG_MEMPOOL_FREE(objmem, obj);
         /* MEMPOOL_FREE marks the object NOACCESS, so we have to make the
-         * MPIU_Handle_common area that is used for internal book keeping
+         * MPIR_Handle_common area that is used for internal book keeping
          * addressable again. */
         MPL_VG_MAKE_MEM_DEFINED(&obj->handle, sizeof(obj->handle));
         MPL_VG_MAKE_MEM_UNDEFINED(&obj->next, sizeof(obj->next));
@@ -454,7 +454,7 @@ void MPIU_Handle_obj_free( MPIU_Object_alloc_t *objmem, void *object )
 /* 
  * Get an pointer to dynamically allocated storage for objects.
  */
-void *MPIU_Handle_get_ptr_indirect( int handle, MPIU_Object_alloc_t *objmem )
+void *MPIR_Handle_get_ptr_indirect( int handle, MPIR_Object_alloc_t *objmem )
 {
     int block_num, index_num;
 
@@ -489,7 +489,7 @@ void *MPIU_Handle_get_ptr_indirect( int handle, MPIU_Object_alloc_t *objmem )
 }
 
 /* returns the name of the handle kind for debugging/logging purposes */
-const char *MPIU_Handle_get_kind_str(int kind)
+const char *MPIR_Handle_get_kind_str(int kind)
 {
 #define mpiu_name_case_(name_) case MPIR_##name_: return (#name_)
     switch (kind) {
@@ -541,9 +541,9 @@ const char *MPIU_Handle_get_kind_str(int kind)
 */
 static int check_handles_on_finalize( void *objmem_ptr )
 {
-    MPIU_Object_alloc_t *objmem = (MPIU_Object_alloc_t *)objmem_ptr;
+    MPIR_Object_alloc_t *objmem = (MPIR_Object_alloc_t *)objmem_ptr;
     int i;
-    MPIU_Handle_common *ptr;
+    MPIR_Handle_common *ptr;
     int leaked_handles = FALSE;
     int   directSize = objmem->direct_size;
     char *direct = (char *)objmem->direct;
@@ -619,15 +619,15 @@ static int check_handles_on_finalize( void *objmem_ptr )
         /* comm_world has been (or should have been) destroyed by this point,
          * pass comm=NULL */
         MPID_Abort(NULL, MPI_ERR_OTHER, 1, "ERROR: leaked handles detected, aborting");
-        MPIU_Assert(0);
+        MPIR_Assert(0);
     }
 
     return 0;
 }
 
-static const char *object_name( MPIU_Object_alloc_t *objmem )
+static const char *object_name( MPIR_Object_alloc_t *objmem )
 {
-    return MPIU_Handle_get_kind_str(objmem->kind);
+    return MPIR_Handle_get_kind_str(objmem->kind);
 }
 #endif    
 
diff --git a/src/util/mem/strerror.c b/src/util/mem/strerror.c
index 68c3fbc..ca3bb29 100644
--- a/src/util/mem/strerror.c
+++ b/src/util/mem/strerror.c
@@ -15,7 +15,7 @@ int strerror_r(int errnum, char *strerrbuf, size_t buflen);
 #endif
 
 /* ideally, provides a thread-safe version of strerror */
-const char *MPIU_Strerror(int errnum)
+const char *MPIR_Strerror(int errnum)
 {
 #if defined(HAVE_STRERROR_R)
     char *buf;
@@ -24,7 +24,7 @@ const char *MPIU_Strerror(int errnum)
 
     MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key,
                                  MPIR_Per_thread, per_thread, &err);
-    MPIU_Assert(err == 0);
+    MPIR_Assert(err == 0);
     buf = per_thread->strerrbuf;
 #  if defined(STRERROR_R_CHAR_P)
     /* strerror_r returns char ptr (old GNU-flavor).  Static strings for known
diff --git a/src/util/procmap/local_proc.c b/src/util/procmap/local_proc.c
index 2ebb358..f331e1b 100644
--- a/src/util/procmap/local_proc.c
+++ b/src/util/procmap/local_proc.c
@@ -18,7 +18,7 @@
 #include <errno.h>
 #endif
 
-/* MPIU_Find_local_and_external -- from the list of processes in comm,
+/* MPIR_Find_local_and_external -- from the list of processes in comm,
    builds a list of local processes, i.e., processes on this same
    node, and a list of external processes, i.e., one process from each
    node.
@@ -51,12 +51,12 @@
                          comm->remote_size.
 */
 #undef FUNCNAME
-#define FUNCNAME MPIU_Find_local_and_external
+#define FUNCNAME MPIR_Find_local_and_external
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 
 #if defined(MPID_USE_NODE_IDS)
-int MPIU_Find_local_and_external(MPIR_Comm *comm, int *local_size_p, int *local_rank_p, int **local_ranks_p,
+int MPIR_Find_local_and_external(MPIR_Comm *comm, int *local_size_p, int *local_rank_p, int **local_ranks_p,
                                  int *external_size_p, int *external_rank_p, int **external_ranks_p,
                                  int **intranode_table_p, int **internode_table_p)
 {
@@ -74,8 +74,8 @@ int MPIU_Find_local_and_external(MPIR_Comm *comm, int *local_size_p, int *local_
     MPID_Node_id_t max_node_id;
     MPID_Node_id_t node_id;
     MPID_Node_id_t my_node_id;
-    MPIU_CHKLMEM_DECL(1);
-    MPIU_CHKPMEM_DECL(4);
+    MPIR_CHKLMEM_DECL(1);
+    MPIR_CHKPMEM_DECL(4);
 
     /* Scan through the list of processes in comm and add one
        process from each node to the list of "external" processes.  We
@@ -86,16 +86,16 @@ int MPIU_Find_local_and_external(MPIR_Comm *comm, int *local_size_p, int *local_
     /* these two will be realloc'ed later to the appropriate size (currently unknown) */
     /* FIXME: realloc doesn't guarantee that the allocated area will be 
        shrunk - so using realloc is not an appropriate strategy. */
-    MPIU_CHKPMEM_MALLOC (external_ranks, int *, sizeof(int) * comm->remote_size, mpi_errno, "external_ranks");
-    MPIU_CHKPMEM_MALLOC (local_ranks, int *, sizeof(int) * comm->remote_size, mpi_errno, "local_ranks");
+    MPIR_CHKPMEM_MALLOC (external_ranks, int *, sizeof(int) * comm->remote_size, mpi_errno, "external_ranks");
+    MPIR_CHKPMEM_MALLOC (local_ranks, int *, sizeof(int) * comm->remote_size, mpi_errno, "local_ranks");
 
-    MPIU_CHKPMEM_MALLOC (internode_table, int *, sizeof(int) * comm->remote_size, mpi_errno, "internode_table");
-    MPIU_CHKPMEM_MALLOC (intranode_table, int *, sizeof(int) * comm->remote_size, mpi_errno, "intranode_table");
+    MPIR_CHKPMEM_MALLOC (internode_table, int *, sizeof(int) * comm->remote_size, mpi_errno, "internode_table");
+    MPIR_CHKPMEM_MALLOC (intranode_table, int *, sizeof(int) * comm->remote_size, mpi_errno, "intranode_table");
 
     mpi_errno = MPID_Get_max_node_id(comm, &max_node_id);
     if (mpi_errno) MPIR_ERR_POP (mpi_errno);
-    MPIU_Assert(max_node_id >= 0);
-    MPIU_CHKLMEM_MALLOC (nodes, int *, sizeof(int) * (max_node_id + 1), mpi_errno, "nodes");
+    MPIR_Assert(max_node_id >= 0);
+    MPIR_CHKLMEM_MALLOC (nodes, int *, sizeof(int) * (max_node_id + 1), mpi_errno, "nodes");
 
     /* nodes maps node_id to rank in external_ranks of leader for that node */
     for (i = 0; i < (max_node_id + 1); ++i)
@@ -108,8 +108,8 @@ int MPIU_Find_local_and_external(MPIR_Comm *comm, int *local_size_p, int *local_
 
     mpi_errno = MPID_Get_node_id(comm, comm->rank, &my_node_id);
     if (mpi_errno) MPIR_ERR_POP (mpi_errno);
-    MPIU_Assert(my_node_id >= 0);
-    MPIU_Assert(my_node_id <= max_node_id);
+    MPIR_Assert(my_node_id >= 0);
+    MPIR_Assert(my_node_id <= max_node_id);
 
     local_size = 0;
     local_rank = -1;
@@ -124,7 +124,7 @@ int MPIU_Find_local_and_external(MPIR_Comm *comm, int *local_size_p, int *local_
            able to recover gracefully. */
         MPIR_ERR_CHKANDJUMP(node_id < 0, mpi_errno, MPI_ERR_OTHER, "**dynamic_node_ids");
 
-        MPIU_Assert(node_id <= max_node_id);
+        MPIR_Assert(node_id <= max_node_id);
 
         /* build list of external processes */
         if (nodes[node_id] == -1)
@@ -193,18 +193,18 @@ int MPIU_Find_local_and_external(MPIR_Comm *comm, int *local_size_p, int *local_
     if (internode_table_p)
         *internode_table_p = internode_table;
     
-    MPIU_CHKPMEM_COMMIT();
+    MPIR_CHKPMEM_COMMIT();
 
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
+    MPIR_CHKLMEM_FREEALL();
     return mpi_errno;
  fn_fail:
-    MPIU_CHKPMEM_REAP();
+    MPIR_CHKPMEM_REAP();
     goto fn_exit;
 }
 
 #else /* !defined(MPID_USE_NODE_IDS) */
-int MPIU_Find_local_and_external(MPIR_Comm *comm, int *local_size_p, int *local_rank_p, int **local_ranks_p,
+int MPIR_Find_local_and_external(MPIR_Comm *comm, int *local_size_p, int *local_rank_p, int **local_ranks_p,
                                  int *external_size_p, int *external_rank_p, int **external_ranks_p,
                                  int **intranode_table_p, int **internode_table_p)
 {
@@ -226,17 +226,17 @@ fn_fail:
    This function does NOT use mpich error handling.
  */
 #undef FUNCNAME
-#define FUNCNAME MPIU_Get_internode_rank
+#define FUNCNAME MPIR_Get_internode_rank
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIU_Get_internode_rank(MPIR_Comm *comm_ptr, int r)
+int MPIR_Get_internode_rank(MPIR_Comm *comm_ptr, int r)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm_valid_ptr( comm_ptr, mpi_errno, TRUE );
-    MPIU_Assert(mpi_errno == MPI_SUCCESS);
-    MPIU_Assert(r < comm_ptr->remote_size);
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
-    MPIU_Assert(comm_ptr->internode_table != NULL);
+    MPIR_Assert(mpi_errno == MPI_SUCCESS);
+    MPIR_Assert(r < comm_ptr->remote_size);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
+    MPIR_Assert(comm_ptr->internode_table != NULL);
 
     return comm_ptr->internode_table[r];
 }
@@ -247,17 +247,17 @@ int MPIU_Get_internode_rank(MPIR_Comm *comm_ptr, int r)
    This function does NOT use mpich error handling.
  */
 #undef FUNCNAME
-#define FUNCNAME MPIU_Get_intranode_rank
+#define FUNCNAME MPIR_Get_intranode_rank
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIU_Get_intranode_rank(MPIR_Comm *comm_ptr, int r)
+int MPIR_Get_intranode_rank(MPIR_Comm *comm_ptr, int r)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm_valid_ptr( comm_ptr, mpi_errno, TRUE );
-    MPIU_Assert(mpi_errno == MPI_SUCCESS);
-    MPIU_Assert(r < comm_ptr->remote_size);
-    MPIU_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
-    MPIU_Assert(comm_ptr->intranode_table != NULL);
+    MPIR_Assert(mpi_errno == MPI_SUCCESS);
+    MPIR_Assert(r < comm_ptr->remote_size);
+    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
+    MPIR_Assert(comm_ptr->intranode_table != NULL);
 
     /* FIXME this could/should be a list of ranks on the local node, which
        should take up much less space on a typical thin(ish)-node system. */
diff --git a/src/util/wrappers/README b/src/util/wrappers/README
index 07db5c9..072c1e2 100644
--- a/src/util/wrappers/README
+++ b/src/util/wrappers/README
@@ -2,7 +2,7 @@ This directory contains wrappers funcs to provide platform independence to MPICH
 
 TODO
 ------
-# The code currently uses MPIU_Assert*() and hence requires device support. Re-define MPIU_Assert*().
+# The code currently uses MPIR_Assert*() and hence requires device support. Re-define MPIR_Assert*().
 
 Notes:
 
diff --git a/src/util/wrappers/mpiu_shm_wrappers.h b/src/util/wrappers/mpiu_shm_wrappers.h
index 45ab5d6..8ea3575 100644
--- a/src/util/wrappers/mpiu_shm_wrappers.h
+++ b/src/util/wrappers/mpiu_shm_wrappers.h
@@ -213,7 +213,7 @@ static inline int MPIU_SHMW_Ghnd_alloc(MPIU_SHMW_Hnd_t hnd)
 /* Returns 0 on success, -1 on error */
 static inline int MPIU_SHMW_Hnd_alloc(MPIU_SHMW_Hnd_t *hnd_ptr)
 {
-    MPIU_Assert(hnd_ptr);
+    MPIR_Assert(hnd_ptr);
     *hnd_ptr = (MPIU_SHMW_Hnd_t) MPL_malloc(MPIU_SHMW_HND_SZ);
     if(*hnd_ptr){
         (*hnd_ptr)->flag = MPIU_SHMW_FLAG_GHND_STATIC;
@@ -229,13 +229,13 @@ static inline int MPIU_SHMW_Hnd_alloc(MPIU_SHMW_Hnd_t *hnd_ptr)
 
 static inline void MPIU_SHMW_Hnd_reset_val(MPIU_SHMW_Hnd_t hnd)
 {
-    MPIU_Assert(hnd);
+    MPIR_Assert(hnd);
     MPIU_SHMW_Lhnd_set(hnd, MPIU_SHMW_LHND_INIT_VAL);
     if(hnd->flag & MPIU_SHMW_FLAG_GHND_STATIC){
         hnd->ghnd = MPIU_SHMW_GHND_INVALID;
     }
     else{
-        MPIU_Assert(hnd->ghnd);
+        MPIR_Assert(hnd->ghnd);
         (hnd->ghnd)[0] = MPIU_SHMW_GHND_INIT_VAL;
     }
 }
@@ -275,9 +275,9 @@ static inline int MPIU_SHMW_Hnd_serialize(char *str,
     int mpi_errno = MPI_SUCCESS;
     int rc = -1;
 
-    MPIU_Assert(MPIU_SHMW_Hnd_is_init(hnd));
-    MPIU_Assert(str);
-    MPIU_Assert(str_len >= MPIU_SHMW_GHND_SZ);
+    MPIR_Assert(MPIU_SHMW_Hnd_is_init(hnd));
+    MPIR_Assert(str);
+    MPIR_Assert(str_len >= MPIU_SHMW_GHND_SZ);
 
     rc = MPIU_SHMW_Ghnd_get_by_val(hnd, str, str_len);
     MPIR_ERR_CHKANDJUMP(rc != 0, mpi_errno, MPI_ERR_OTHER, "**shmw_gethnd");
@@ -306,7 +306,7 @@ static inline int MPIU_SHMW_Hnd_deserialize(
     int mpi_errno = MPI_SUCCESS;
     int rc = -1;
 
-    MPIU_Assert(MPIU_SHMW_Hnd_is_init(hnd));
+    MPIR_Assert(MPIU_SHMW_Hnd_is_init(hnd));
     MPIR_ERR_CHKINTERNAL(!str_hnd, mpi_errno, "ser hnd is null");
     MPIR_ERR_CHKANDJUMP(str_hnd_len>=MPIU_SHMW_GHND_SZ,
         mpi_errno, MPI_ERR_OTHER, "**shmw_deserbufbig");
@@ -318,7 +318,7 @@ static inline int MPIU_SHMW_Hnd_deserialize(
         "**nomem", "**nomem %s", "shared mem global handle");
 
     rc = MPIU_SHMW_Ghnd_set_by_val(hnd, "%s", str_hnd);
-    MPIU_Assert(rc == 0);
+    MPIR_Assert(rc == 0);
 
     mpi_errno = MPIU_SHMW_Seg_open(hnd, 0);
     if(mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); }
@@ -348,11 +348,11 @@ static inline int MPIU_SHMW_Hnd_get_serialized_by_ref(
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SHMW_Hnd_is_init(hnd));
-    MPIU_Assert(str_ptr);
+    MPIR_Assert(MPIU_SHMW_Hnd_is_init(hnd));
+    MPIR_Assert(str_ptr);
 
     *str_ptr = (char *)MPIU_SHMW_Ghnd_get_by_ref(hnd);
-    MPIU_Assert(*str_ptr);
+    MPIR_Assert(*str_ptr);
 
     return mpi_errno;
 }
@@ -375,8 +375,8 @@ static inline int MPIU_SHMW_Hnd_deserialize_by_ref(
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SHMW_Hnd_is_init(hnd));
-    MPIU_Assert(ser_hnd_ptr);
+    MPIR_Assert(MPIU_SHMW_Hnd_is_init(hnd));
+    MPIR_Assert(ser_hnd_ptr);
 
     MPIR_ERR_CHKINTERNAL(!(*ser_hnd_ptr), mpi_errno, "ser hnd is null");
 
@@ -406,7 +406,7 @@ static inline int MPIU_SHMW_Hnd_init(
     int mpi_errno = MPI_SUCCESS;
     int rc = -1;
 
-    MPIU_Assert(hnd_ptr);
+    MPIR_Assert(hnd_ptr);
 
     rc = MPIU_SHMW_Hnd_alloc(hnd_ptr);
     MPIR_ERR_CHKANDJUMP1((rc != 0), mpi_errno, MPI_ERR_OTHER,
@@ -433,8 +433,8 @@ static inline int MPIU_SHMW_Hnd_finalize(
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(hnd_ptr);
-    MPIU_Assert(*hnd_ptr);
+    MPIR_Assert(hnd_ptr);
+    MPIR_Assert(*hnd_ptr);
 
     /* A finalize can/should be called on an invalid handle
      * Don't assert if we fail here ...
@@ -480,11 +480,11 @@ static inline int MPIU_SHMW_Seg_create_attach_templ(
             "**nomem", "**nomem %s", "shared mem global handle");
 
         rc = MPIU_SHMW_Ghnd_set_by_val(hnd, "%d", lhnd);
-        MPIU_Assert(rc == 0);
+        MPIR_Assert(rc == 0);
     }
     else{
         /* Open an existing shared memory seg */
-        MPIU_Assert(MPIU_SHMW_Ghnd_is_valid(hnd));
+        MPIR_Assert(MPIU_SHMW_Ghnd_is_valid(hnd));
 
         if(!MPIU_SHMW_Lhnd_is_valid(hnd)){
             lhnd = atoi(MPIU_SHMW_Ghnd_get_by_ref(hnd));
@@ -497,7 +497,7 @@ static inline int MPIU_SHMW_Seg_create_attach_templ(
 
     if(flag & MPIU_SHMW_FLAG_SHM_ATTACH){
         /* Attach to shared mem seg */
-        MPIU_Assert(shm_addr_ptr);
+        MPIR_Assert(shm_addr_ptr);
 
         *shm_addr_ptr = shmat(MPIU_SHMW_Lhnd_get(hnd), NULL, 0x0);
 
@@ -529,7 +529,7 @@ static inline int MPIU_SHMW_Seg_detach(
 
     MPIR_ERR_CHKANDJUMP(!MPIU_SHMW_Hnd_is_valid(hnd),
         mpi_errno, MPI_ERR_OTHER, "**shmw_badhnd");
-    MPIU_Assert(shm_addr_ptr);
+    MPIR_Assert(shm_addr_ptr);
     MPIR_ERR_CHKINTERNAL(!(*shm_addr_ptr), mpi_errno, "shm address is null");
 
     rc = shmdt(*shm_addr_ptr);
@@ -624,11 +624,11 @@ static inline int MPIU_SHMW_Seg_create_attach_templ(
             "**nomem", "**nomem %s", "shared memory global handle");
 
         rc = MPIU_SHMW_Ghnd_set_by_val(hnd, "%s", chosen_fname);
-        MPIU_Assert(rc == 0);
+        MPIR_Assert(rc == 0);
     }
     else{
         /* Open an existing shared memory seg */
-        MPIU_Assert(MPIU_SHMW_Ghnd_is_valid(hnd));
+        MPIR_Assert(MPIU_SHMW_Ghnd_is_valid(hnd));
 
         if(!MPIU_SHMW_Lhnd_is_valid(hnd)){
             lhnd = open(MPIU_SHMW_Ghnd_get_by_ref(hnd), O_RDWR);
@@ -643,7 +643,7 @@ static inline int MPIU_SHMW_Seg_create_attach_templ(
     if(flag & MPIU_SHMW_FLAG_SHM_ATTACH){
         void *buf_ptr = NULL;
 
-        MPIU_Assert(shm_addr_ptr);
+        MPIR_Assert(shm_addr_ptr);
 
         buf_ptr = mmap(NULL, seg_sz, PROT_READ | PROT_WRITE,
                         MAP_SHARED, MPIU_SHMW_Lhnd_get(hnd), 0);
@@ -659,7 +659,7 @@ fn_exit:
     /* FIXME: Close local handle only when closing the shm handle */
     if(MPIU_SHMW_Lhnd_is_valid(hnd)){
         rc = MPIU_SHMW_Lhnd_close(hnd);
-        MPIU_Assert(rc == 0);
+        MPIR_Assert(rc == 0);
     } 
     return mpi_errno;
 fn_fail:
@@ -677,7 +677,7 @@ static inline int MPIU_SHMW_Seg_detach(
     int mpi_errno = MPI_SUCCESS;
     int rc = -1;
 
-    MPIU_Assert(shm_addr_ptr);
+    MPIR_Assert(shm_addr_ptr);
     MPIR_ERR_CHKINTERNAL(!(*shm_addr_ptr), mpi_errno, "shm address is null");
 
     rc = munmap(*shm_addr_ptr, seg_sz);
@@ -732,7 +732,7 @@ static inline int MPIU_SHMW_Seg_create_attach_templ(
     seg_sz_large.QuadPart = seg_sz;
 
     if(!MPIU_SHMW_Ghnd_is_valid(hnd)){
-        MPIU_Assert(flag & MPIU_SHMW_FLAG_SHM_CREATE);
+        MPIR_Assert(flag & MPIU_SHMW_FLAG_SHM_CREATE);
 
         rc = MPIU_SHMW_Ghnd_set_uniq(hnd);
         MPIR_ERR_CHKANDJUMP((rc == 0), mpi_errno, MPI_ERR_OTHER,
@@ -753,7 +753,7 @@ static inline int MPIU_SHMW_Seg_create_attach_templ(
         MPIU_SHMW_Lhnd_set(hnd, lhnd);
     }
     else{
-        MPIU_Assert(MPIU_SHMW_Ghnd_is_valid(hnd));
+        MPIR_Assert(MPIU_SHMW_Ghnd_is_valid(hnd));
 
         if(!MPIU_SHMW_Lhnd_is_valid(hnd)){
             /* Strangely OpenFileMapping() returns NULL on error! */
@@ -769,7 +769,7 @@ static inline int MPIU_SHMW_Seg_create_attach_templ(
     }
 
     if(flag & MPIU_SHMW_FLAG_SHM_ATTACH){
-        MPIU_Assert(shm_addr_ptr);
+        MPIR_Assert(shm_addr_ptr);
 
         *shm_addr_ptr = (char *)MapViewOfFile(MPIU_SHMW_Lhnd_get(hnd),
                             FILE_MAP_WRITE, 0, offset, 0);
@@ -798,7 +798,7 @@ static inline int MPIU_SHMW_Seg_detach(
     MPL_UNREFERENCED_ARG(seg_sz);
     MPIR_ERR_CHKANDJUMP(!MPIU_SHMW_Hnd_is_valid(hnd),
         mpi_errno, MPI_ERR_OTHER, "**shmw_badhnd");
-    MPIU_Assert(shm_addr_ptr);
+    MPIR_Assert(shm_addr_ptr);
     MPIR_ERR_CHKINTERNAL(!(*shm_addr_ptr), mpi_errno, "shm address is null");
 
     rc = UnmapViewOfFile(*shm_addr_ptr);
@@ -831,8 +831,8 @@ static inline int MPIU_SHMW_Seg_create(
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SHMW_Hnd_is_init(hnd));
-    MPIU_Assert(seg_sz > 0);
+    MPIR_Assert(MPIU_SHMW_Hnd_is_init(hnd));
+    MPIR_Assert(seg_sz > 0);
 
     mpi_errno = MPIU_SHMW_Seg_create_attach_templ(hnd,
                     seg_sz, NULL, 0, MPIU_SHMW_FLAG_SHM_CREATE);
@@ -858,7 +858,7 @@ static inline int MPIU_SHMW_Seg_open(
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SHMW_Hnd_is_init(hnd));
+    MPIR_Assert(MPIU_SHMW_Hnd_is_init(hnd));
 
     mpi_errno = MPIU_SHMW_Seg_create_attach_templ(hnd, seg_sz,
             NULL, 0, MPIU_SHMW_FLAG_CLR);
@@ -887,9 +887,9 @@ static inline int MPIU_SHMW_Seg_create_and_attach(
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SHMW_Hnd_is_init(hnd));
-    MPIU_Assert(seg_sz > 0);
-    MPIU_Assert(shm_addr_ptr);
+    MPIR_Assert(MPIU_SHMW_Hnd_is_init(hnd));
+    MPIR_Assert(seg_sz > 0);
+    MPIR_Assert(shm_addr_ptr);
 
     mpi_errno = MPIU_SHMW_Seg_create_attach_templ(hnd, seg_sz,
                     shm_addr_ptr, offset, MPIU_SHMW_FLAG_SHM_CREATE |
@@ -919,8 +919,8 @@ static inline int MPIU_SHMW_Seg_attach(
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SHMW_Hnd_is_init(hnd));
-    MPIU_Assert(shm_addr_ptr);
+    MPIR_Assert(MPIU_SHMW_Hnd_is_init(hnd));
+    MPIR_Assert(shm_addr_ptr);
 
     mpi_errno = MPIU_SHMW_Seg_create_attach_templ(hnd, seg_sz,
                 shm_addr_ptr, offset, MPIU_SHMW_FLAG_SHM_ATTACH);
diff --git a/src/util/wrappers/mpiu_sock_wrappers.h b/src/util/wrappers/mpiu_sock_wrappers.h
index 760b8bb..e5267be 100644
--- a/src/util/wrappers/mpiu_sock_wrappers.h
+++ b/src/util/wrappers/mpiu_sock_wrappers.h
@@ -147,7 +147,7 @@ static inline int MPIU_SOCKW_Bind_port_range(
     int mpi_errno = MPI_SUCCESS;
     int done = 0;
 
-    MPIU_Assert(sin);
+    MPIR_Assert(sin);
 
     for(cur_port = low_port; cur_port <= high_port; cur_port++){
         (sin)->sin_port = htons(cur_port);
@@ -225,7 +225,7 @@ static inline int MPIU_SOCKW_Connect(
             MPI_ERR_OTHER, "**sock_connect", "**sock_connect %s %d",
             MPIU_OSW_Strerror(err), err);
 
-        MPIU_Assert(is_pending);
+        MPIR_Assert(is_pending);
         *is_pending = 1;
     }
 fn_exit:
@@ -265,7 +265,7 @@ static inline int MPIU_SOCKW_Readv(MPIU_SOCKW_Sockfd_t sock,
     int err;
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(nb_rd_ptr);
+    MPIR_Assert(nb_rd_ptr);
     if(WSARecv(sock, iov, iov_cnt, (LPDWORD )nb_rd_ptr, &flags, NULL, NULL)
         == SOCKET_ERROR){
         err = MPIU_OSW_Get_errno();
@@ -444,7 +444,7 @@ static inline void MPIU_SOCKW_Waitset_expand_(MPIU_SOCKW_Waitset_hnd_t waitset_h
 {
     MPL_UNREFERENCED_ARG(index_ptr);
     (waitset_hnd)->nfds++;
-    MPIU_Assert((waitset_hnd)->nfds < FD_SETSIZE);
+    MPIR_Assert((waitset_hnd)->nfds < FD_SETSIZE);
 }
 
 
@@ -484,7 +484,7 @@ static inline int MPIU_SOCKW_Timeval_hnd_init(
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(hnd_ptr);
+    MPIR_Assert(hnd_ptr);
 
     *hnd_ptr = (MPIU_SOCKW_Timeval_hnd_t)
                 MPL_malloc(sizeof(MPIU_SOCKW_Timeval_t_));
@@ -509,7 +509,7 @@ static inline int MPIU_SOCKW_Timeval_hnd_set(
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SOCKW_Timeval_hnd_is_init_(hnd));
+    MPIR_Assert(MPIU_SOCKW_Timeval_hnd_is_init_(hnd));
 
     hnd->tv_sec = tv_msec/1000;
     hnd->tv_usec = (tv_msec % 1000) * 1000;
@@ -526,7 +526,7 @@ static inline int MPIU_SOCKW_Timeval_hnd_finalize(
 {
     int mpi_errno = MPI_SUCCESS;
     
-    MPIU_Assert(hnd_ptr);
+    MPIR_Assert(hnd_ptr);
 
     if(MPIU_SOCKW_Timeval_hnd_is_init_(*hnd_ptr)){
         MPL_free(*hnd_ptr);
@@ -560,8 +560,8 @@ static inline int MPIU_SOCKW_Waitset_hnd_init(
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(hnd_ptr);
-    MPIU_Assert(nfds <= FD_SETSIZE);
+    MPIR_Assert(hnd_ptr);
+    MPIR_Assert(nfds <= FD_SETSIZE);
 
     if(nfds <= 0){
         nfds = FD_SETSIZE;
@@ -606,7 +606,7 @@ static inline int MPIU_SOCKW_Waitset_hnd_finalize(
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(hnd_ptr);
+    MPIR_Assert(hnd_ptr);
 
     if(MPIU_SOCKW_Waitset_hnd_is_init_(*hnd_ptr)){
         MPL_free((*hnd_ptr)->fdset);
@@ -635,8 +635,8 @@ static inline int MPIU_SOCKW_Waitset_wait(
     int nfds=0;
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SOCKW_Waitset_hnd_is_init_(hnd));
-    MPIU_Assert(MPIU_SOCKW_Timeval_hnd_is_init_(timeout));
+    MPIR_Assert(MPIU_SOCKW_Waitset_hnd_is_init_(hnd));
+    MPIR_Assert(MPIU_SOCKW_Timeval_hnd_is_init_(timeout));
 
     MPIU_SOCKW_Waitset_curindex_reset_(hnd);
     FD_ZERO(&(hnd->tmp_read_fds));
@@ -718,8 +718,8 @@ static inline int MPIU_SOCKW_Waitset_get_nxt_sock_with_evnt(
     int i;
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SOCKW_Waitset_hnd_is_init_(waitset_hnd));
-    MPIU_Assert(sock_hnd_ptr);
+    MPIR_Assert(MPIU_SOCKW_Waitset_hnd_is_init_(waitset_hnd));
+    MPIR_Assert(sock_hnd_ptr);
 
     *sock_hnd_ptr = NULL;
 
@@ -751,17 +751,17 @@ static inline int MPIU_SOCKW_Waitset_add_sock(
     int index= 0;
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SOCKW_Waitset_hnd_is_init_(waitset_hnd));
-    MPIU_Assert(sock_hnd_ptr);
+    MPIR_Assert(MPIU_SOCKW_Waitset_hnd_is_init_(waitset_hnd));
+    MPIR_Assert(sock_hnd_ptr);
 
     *sock_hnd_ptr = NULL;
 
     index = waitset_hnd->fdset_index;
     if(index >= waitset_hnd->nfds){
         index = MPIU_SOCKW_Waitset_freeindex_get_(waitset_hnd);
-        /* FIXME: Try not to use MPIU_Assert(). Utils should not depend
+        /* FIXME: Try not to use MPIR_Assert(). Utils should not depend
          * on a device impl of assert */
-        MPIU_Assert(index != MPIU_SOCKW_WAITSET_CURINDEX_INVALID_);
+        MPIR_Assert(index != MPIU_SOCKW_WAITSET_CURINDEX_INVALID_);
     }
     else{
         waitset_hnd->fdset_index++;
@@ -799,8 +799,8 @@ static inline int MPIU_SOCKW_Waitset_set_sock(
     MPIU_SOCKW_Sockfd_t sock;
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SOCKW_Waitset_hnd_is_init_(waitset_hnd));
-    MPIU_Assert(MPIU_SOCKW_Waitset_sock_hnd_is_init_(sock_hnd));
+    MPIR_Assert(MPIU_SOCKW_Waitset_hnd_is_init_(waitset_hnd));
+    MPIR_Assert(MPIU_SOCKW_Waitset_sock_hnd_is_init_(sock_hnd));
 
     sock = sock_hnd->sockfd;
     if((flag & MPIU_SOCKW_FLAG_WAITON_RD) && 
@@ -833,8 +833,8 @@ static inline int MPIU_SOCKW_Waitset_clr_sock(
     MPIU_SOCKW_Sockfd_t sock;
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SOCKW_Waitset_hnd_is_init_(waitset_hnd));
-    MPIU_Assert(MPIU_SOCKW_Waitset_sock_hnd_is_init_(sock_hnd));
+    MPIR_Assert(MPIU_SOCKW_Waitset_hnd_is_init_(waitset_hnd));
+    MPIR_Assert(MPIU_SOCKW_Waitset_sock_hnd_is_init_(sock_hnd));
 
     sock = sock_hnd->sockfd;
     if((flag & MPIU_SOCKW_FLAG_WAITON_RD) && 
@@ -868,9 +868,9 @@ static inline int MPIU_SOCKW_Waitset_rem_sock(
     MPIU_SOCKW_Sockfd_t sock;
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SOCKW_Waitset_hnd_is_init_(waitset_hnd));
-    MPIU_Assert(sock_hnd_ptr);
-    MPIU_Assert(MPIU_SOCKW_Waitset_sock_hnd_is_init_(*sock_hnd_ptr));
+    MPIR_Assert(MPIU_SOCKW_Waitset_hnd_is_init_(waitset_hnd));
+    MPIR_Assert(sock_hnd_ptr);
+    MPIR_Assert(MPIU_SOCKW_Waitset_sock_hnd_is_init_(*sock_hnd_ptr));
 
     sock = (*sock_hnd_ptr)->sockfd;
     if(FD_ISSET(sock, &(waitset_hnd->read_fds))){
@@ -904,8 +904,8 @@ static inline int MPIU_SOCKW_Waitset_sock_hnd_get_sockfd(
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SOCKW_Waitset_sock_hnd_is_init_(sock_hnd));
-    MPIU_Assert(sockfd_ptr);
+    MPIR_Assert(MPIU_SOCKW_Waitset_sock_hnd_is_init_(sock_hnd));
+    MPIR_Assert(sockfd_ptr);
 
     *sockfd_ptr = sock_hnd->sockfd;
 
@@ -921,8 +921,8 @@ static inline int MPIU_SOCKW_Waitset_sock_hnd_get_user_ptr(
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SOCKW_Waitset_sock_hnd_is_init_(sock_hnd));
-    MPIU_Assert(userp_ptr);
+    MPIR_Assert(MPIU_SOCKW_Waitset_sock_hnd_is_init_(sock_hnd));
+    MPIR_Assert(userp_ptr);
 
     *userp_ptr = sock_hnd->user_ptr;
 
@@ -938,7 +938,7 @@ static inline int MPIU_SOCKW_Waitset_sock_hnd_set_user_ptr(
 {
     int mpi_errno = MPI_SUCCESS;
 
-    MPIU_Assert(MPIU_SOCKW_Waitset_sock_hnd_is_init_(sock_hnd));
+    MPIR_Assert(MPIU_SOCKW_Waitset_sock_hnd_is_init_(sock_hnd));
 
     sock_hnd->user_ptr = user_ptr;
 
diff --git a/test/mpi/group/glpid.c b/test/mpi/group/glpid.c
index 4454e37..d698af5 100644
--- a/test/mpi/group/glpid.c
+++ b/test/mpi/group/glpid.c
@@ -21,8 +21,8 @@ int main(int argc, char *argv[])
     group.size = 4;
     group.rank = 0;
     group.idx_of_first_lpid = -1;
-    group.lrank_to_lpid = (MPIR_Group_pmap_t *)
-        MPL_malloc(group.size * sizeof(MPIR_Group_pmap_t));
+    group.lrank_to_lpid = (MPII_Group_pmap_t *)
+        MPL_malloc(group.size * sizeof(MPII_Group_pmap_t));
     for (i = 0; i < group.size; i++) {
         group.lrank_to_lpid[i].lrank = i;
         group.lrank_to_lpid[i].lpid = group.size - i - 1;
@@ -31,7 +31,7 @@ int main(int argc, char *argv[])
     }
 
     /* Set up the group lpid list */
-    MPIR_Group_setup_lpid_list(group_ptr);
+    MPII_Group_setup_lpid_list(group_ptr);
 
     /* Print the group structure */
     printf("Index of first lpid = %d\n", group.idx_of_first_lpid);

http://git.mpich.org/mpich.git/commitdiff/69567c614fd1285257c62d91a06128819cf98e3e

commit 69567c614fd1285257c62d91a06128819cf98e3e
Author: Pavan Balaji <balaji at anl.gov>
Date:   Sun Apr 24 22:17:05 2016 -0500

    Use size_t instead of MPIU_Size_t.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/configure.ac b/configure.ac
index d241945..de7728b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -4725,20 +4725,6 @@ AC_SUBST(MPI_F77_AINT)
 export MPI_F77_AINT
 # ----------------------------------------------------------------------------
 
-# define MPIU_Size_t - used to express the size of objects
-# This is used in mpir_type_defs.h to define MPIU_SIZE_T,
-# and is used in various parts of ch3 and mpid/common/sock.  
-# This is used to handle the potential problem that a message is 
-# too long to fit with an int.  However, we may still need to make
-# some more adjustments in the code (this may not be used everywhere
-# that it is needed).
-#
-# FIXME: this should really be in a util configure file, but we don't
-# have one at the moment
-#
-MPIU_SIZE_T="unsigned $MPI_AINT"
-AC_DEFINE_UNQUOTED(MPIU_SIZE_T,$MPIU_SIZE_T,[Set to a type that can express the size of the entire address space]) 
-
 if test "$ac_cv_sizeof_void_p" -lt "$aint_size" ; then
     AC_DEFINE(USE_AINT_FOR_ATTRVAL,1,[Define if MPI_Aint should be used instead of void * for storing attribute values])
 fi
diff --git a/src/include/mpir_type_defs.h b/src/include/mpir_type_defs.h
index 83b9cf7..5bb9484 100644
--- a/src/include/mpir_type_defs.h
+++ b/src/include/mpir_type_defs.h
@@ -46,8 +46,6 @@
 
 #include "mpl.h"
 
-typedef MPIU_SIZE_T MPIU_Size_t;
-
 /* Use the MPIU_PtrToXXX macros to convert pointers to and from integer types */
 
 /* The Microsoft compiler will not allow casting of different sized types
diff --git a/src/mpid/ch3/channels/nemesis/include/mpid_nem_defs.h b/src/mpid/ch3/channels/nemesis/include/mpid_nem_defs.h
index 81d3d67..aabf033 100644
--- a/src/mpid/ch3/channels/nemesis/include/mpid_nem_defs.h
+++ b/src/mpid/ch3/channels/nemesis/include/mpid_nem_defs.h
@@ -69,7 +69,7 @@ MPID_nem_barrier_t;
 
 typedef struct MPID_nem_seg
 {
-    MPIU_Size_t segment_len;
+    size_t segment_len;
     /* Handle to shm seg */
     MPIU_SHMW_Hnd_t hnd;
     /* Pointers */
diff --git a/src/mpid/ch3/channels/sock/src/ch3_isend.c b/src/mpid/ch3/channels/sock/src/ch3_isend.c
index d354f87..44aed4f 100644
--- a/src/mpid/ch3/channels/sock/src/ch3_isend.c
+++ b/src/mpid/ch3/channels/sock/src/ch3_isend.c
@@ -11,7 +11,7 @@
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static void update_request(MPIR_Request * sreq, void * hdr,
-			   intptr_t hdr_sz, MPIU_Size_t nb)
+			   intptr_t hdr_sz, size_t nb)
 {
     MPIDI_STATE_DECL(MPID_STATE_UPDATE_REQUEST);
 
@@ -52,7 +52,7 @@ int MPIDI_CH3_iSend(MPIDI_VC_t * vc, MPIR_Request * sreq, void * hdr,
 	   data, queuing any unsent data. */
 	if (MPIDI_CH3I_SendQ_empty(vcch)) /* MT */
 	{
-	    MPIU_Size_t nb;
+	    size_t nb;
 	    int rc;
 
 	    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
diff --git a/src/mpid/ch3/channels/sock/src/ch3_isendv.c b/src/mpid/ch3/channels/sock/src/ch3_isendv.c
index afba188..e55b87d 100644
--- a/src/mpid/ch3/channels/sock/src/ch3_isendv.c
+++ b/src/mpid/ch3/channels/sock/src/ch3_isendv.c
@@ -11,7 +11,7 @@
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static void update_request(MPIR_Request * sreq, MPL_IOV * iov, int iov_count,
-			   int iov_offset, MPIU_Size_t nb)
+			   int iov_offset, size_t nb)
 {
     int i;
     MPIDI_STATE_DECL(MPID_STATE_UPDATE_REQUEST);
@@ -76,7 +76,7 @@ int MPIDI_CH3_iSendv(MPIDI_VC_t * vc, MPIR_Request * sreq,
 	   data, queuing any unsent data. */
 	if (MPIDI_CH3I_SendQ_empty(vcch)) /* MT */
 	{
-	    MPIU_Size_t nb;
+	    size_t nb;
 	    int rc;
 
 	    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
diff --git a/src/mpid/ch3/channels/sock/src/ch3_istartmsg.c b/src/mpid/ch3/channels/sock/src/ch3_istartmsg.c
index c28f4a6..808d0c5 100644
--- a/src/mpid/ch3/channels/sock/src/ch3_istartmsg.c
+++ b/src/mpid/ch3/channels/sock/src/ch3_istartmsg.c
@@ -11,7 +11,7 @@
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static MPIR_Request * create_request(void * hdr, intptr_t hdr_sz,
-				     MPIU_Size_t nb)
+				     size_t nb)
 {
     MPIR_Request * sreq;
     MPIDI_STATE_DECL(MPID_STATE_CREATE_REQUEST);
@@ -73,7 +73,7 @@ int MPIDI_CH3_iStartMsg(MPIDI_VC_t * vc, void * hdr, intptr_t hdr_sz,
 	   data, queuing any unsent data. */
 	if (MPIDI_CH3I_SendQ_empty(vcch)) /* MT */
 	{
-	    MPIU_Size_t nb;
+	    size_t nb;
 	    int rc;
 
 	    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
diff --git a/src/mpid/ch3/channels/sock/src/ch3_istartmsgv.c b/src/mpid/ch3/channels/sock/src/ch3_istartmsgv.c
index 1edc491..8720914 100644
--- a/src/mpid/ch3/channels/sock/src/ch3_istartmsgv.c
+++ b/src/mpid/ch3/channels/sock/src/ch3_istartmsgv.c
@@ -11,7 +11,7 @@
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static MPIR_Request * create_request(MPL_IOV * iov, int iov_count,
-				     int iov_offset, MPIU_Size_t nb)
+				     int iov_offset, size_t nb)
 {
     MPIR_Request * sreq;
     int i;
@@ -98,7 +98,7 @@ int MPIDI_CH3_iStartMsgv(MPIDI_VC_t * vc, MPL_IOV * iov, int n_iov,
 	if (MPIDI_CH3I_SendQ_empty(vcch)) /* MT */
 	{
 	    int rc;
-	    MPIU_Size_t nb;
+	    size_t nb;
 
 	    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
 			 "send queue empty, attempting to write");
diff --git a/src/mpid/ch3/channels/sock/src/ch3_progress.c b/src/mpid/ch3/channels/sock/src/ch3_progress.c
index 67d2dd4..74583c2 100644
--- a/src/mpid/ch3/channels/sock/src/ch3_progress.c
+++ b/src/mpid/ch3/channels/sock/src/ch3_progress.c
@@ -43,7 +43,7 @@ static int MPIDI_CH3I_Progress_handle_sock_event(MPIDU_Sock_event_t * event);
 static inline int connection_pop_sendq_req(MPIDI_CH3I_Connection_t * conn);
 static inline int connection_post_recv_pkt(MPIDI_CH3I_Connection_t * conn);
 
-static int adjust_iov(MPL_IOV ** iovp, int * countp, MPIU_Size_t nb);
+static int adjust_iov(MPL_IOV ** iovp, int * countp, size_t nb);
 
 #define MAX_PROGRESS_HOOKS 4
 typedef int (*progress_func_ptr_t) (int* made_progress);
@@ -608,7 +608,7 @@ static int MPIDI_CH3I_Progress_handle_sock_event(MPIDU_Sock_event_t * event)
 		    for(;;)
 		    {
 			MPL_IOV * iovp;
-			MPIU_Size_t nb;
+			size_t nb;
 				
 			iovp = sreq->dev.iov;
 			    
@@ -847,7 +847,7 @@ static inline int connection_post_recv_pkt(MPIDI_CH3I_Connection_t * conn)
 #define FUNCNAME adjust_iov
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-static int adjust_iov(MPL_IOV ** iovp, int * countp, MPIU_Size_t nb)
+static int adjust_iov(MPL_IOV ** iovp, int * countp, size_t nb)
 {
     MPL_IOV * const iov = *iovp;
     const int count = *countp;
@@ -883,7 +883,7 @@ static int ReadMoreData( MPIDI_CH3I_Connection_t * conn, MPIR_Request *rreq )
     
     while (1) {
 	MPL_IOV * iovp;
-	MPIU_Size_t nb;
+	size_t nb;
 	
 	iovp = rreq->dev.iov;
 			    
diff --git a/src/mpid/common/sock/iocp/sock.c b/src/mpid/common/sock/iocp/sock.c
index 48414ab..fc59da1 100644
--- a/src/mpid/common/sock/iocp/sock.c
+++ b/src/mpid/common/sock/iocp/sock.c
@@ -34,7 +34,7 @@ typedef struct sock_buffer
 #endif
     int iovlen;
     int index;
-    MPIU_Size_t total;
+    size_t total;
     MPIDU_Sock_progress_update_func_t progress_update;
 } sock_buffer;
 
@@ -1525,7 +1525,7 @@ int MPIDU_Sock_post_close(MPIDU_Sock_t sock)
 #define FUNCNAME MPIDU_Sock_post_read
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIDU_Sock_post_read(MPIDU_Sock_t sock, void * buf, MPIU_Size_t minbr, MPIU_Size_t maxbr,
+int MPIDU_Sock_post_read(MPIDU_Sock_t sock, void * buf, size_t minbr, size_t maxbr,
                          MPIDU_Sock_progress_update_func_t fn)
 {
     int mpi_errno = MPI_SUCCESS;
@@ -1651,7 +1651,7 @@ int MPIDU_Sock_post_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, MPIDU_Soc
 #define FUNCNAME MPIDU_Sock_post_write
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIDU_Sock_post_write(MPIDU_Sock_t sock, void * buf, MPIU_Size_t min, MPIU_Size_t max, MPIDU_Sock_progress_update_func_t fn)
+int MPIDU_Sock_post_write(MPIDU_Sock_t sock, void * buf, size_t min, size_t max, MPIDU_Sock_progress_update_func_t fn)
 {
     int mpi_errno = MPI_SUCCESS;
     MPIDI_STATE_DECL(MPID_STATE_MPIDU_SOCK_POST_WRITE);
@@ -2652,7 +2652,7 @@ int MPIDU_Sock_wakeup(MPIDU_Sock_set_t set)
 #define FUNCNAME MPIDU_Sock_read
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIDU_Sock_read(MPIDU_Sock_t sock, void * buf, MPIU_Size_t len, MPIU_Size_t * num_read)
+int MPIDU_Sock_read(MPIDU_Sock_t sock, void * buf, size_t len, size_t * num_read)
 {
     int mpi_errno = MPI_SUCCESS;
     MPL_IOV iov;
@@ -2670,7 +2670,7 @@ int MPIDU_Sock_read(MPIDU_Sock_t sock, void * buf, MPIU_Size_t len, MPIU_Size_t
 #define FUNCNAME MPIDU_Sock_readv
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIDU_Sock_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, MPIU_Size_t * num_read)
+int MPIDU_Sock_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, size_t * num_read)
 {
     int mpi_errno = MPI_SUCCESS;
     DWORD nFlags = 0;
@@ -2739,7 +2739,7 @@ int MPIDU_Sock_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, MPIU_Size_t *
 #define FUNCNAME MPIDU_Sock_write
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIDU_Sock_write(MPIDU_Sock_t sock, void * buf, MPIU_Size_t len, MPIU_Size_t * num_written)
+int MPIDU_Sock_write(MPIDU_Sock_t sock, void * buf, size_t len, size_t * num_written)
 {
     int mpi_errno;
     MPL_IOV iov;
@@ -2757,7 +2757,7 @@ int MPIDU_Sock_write(MPIDU_Sock_t sock, void * buf, MPIU_Size_t len, MPIU_Size_t
 #define FUNCNAME MPIDU_Sock_writev
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, MPIU_Size_t * num_written)
+int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, size_t * num_written)
 {
     int mpi_errno;
     DWORD num_written_local;
diff --git a/src/mpid/common/sock/mpidu_sock.h b/src/mpid/common/sock/mpidu_sock.h
index 8833290..5a47c48 100644
--- a/src/mpid/common/sock/mpidu_sock.h
+++ b/src/mpid/common/sock/mpidu_sock.h
@@ -106,7 +106,7 @@ S*/
 typedef struct MPIDU_Sock_event
 {
     MPIDU_Sock_op_t op_type;
-    MPIU_Size_t num_bytes;
+    size_t num_bytes;
     void * user_ptr;
     int error;
 } MPIDU_Sock_event_t;
@@ -563,7 +563,7 @@ internal progress engine could block on an application routine.
 Module:
 Utility-Sock
 E*/
-typedef int (* MPIDU_Sock_progress_update_func_t)(MPIU_Size_t num_bytes, void * user_ptr);
+typedef int (* MPIDU_Sock_progress_update_func_t)(size_t num_bytes, void * user_ptr);
 
 
 /*@
@@ -622,7 +622,7 @@ one thread is not attempting to post a new operation while another thread is att
 Module:
 Utility-Sock
 @*/
-int MPIDU_Sock_post_read(MPIDU_Sock_t sock, void * buf, MPIU_Size_t minbr, MPIU_Size_t maxbr,
+int MPIDU_Sock_post_read(MPIDU_Sock_t sock, void * buf, size_t minbr, size_t maxbr,
                          MPIDU_Sock_progress_update_func_t fn);
 
 
@@ -741,7 +741,7 @@ need this flexibility?
 Module:
 Utility-Sock
 @*/
-int MPIDU_Sock_post_write(MPIDU_Sock_t sock, void * buf, MPIU_Size_t min, MPIU_Size_t max,
+int MPIDU_Sock_post_write(MPIDU_Sock_t sock, void * buf, size_t min, size_t max,
 			  MPIDU_Sock_progress_update_func_t fn);
 
 
@@ -910,7 +910,7 @@ not attempting to perform an immediate read while another thread is attempting t
 Module:
 Utility-Sock
 @*/
-int MPIDU_Sock_read(MPIDU_Sock_t sock, void * buf, MPIU_Size_t len, MPIU_Size_t * num_read);
+int MPIDU_Sock_read(MPIDU_Sock_t sock, void * buf, size_t len, size_t * num_read);
 
 
 /*@
@@ -960,7 +960,7 @@ not attempting to perform an immediate read while another thread is attempting t
 Module:
 Utility-Sock
 @*/
-int MPIDU_Sock_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, MPIU_Size_t * num_read);
+int MPIDU_Sock_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, size_t * num_read);
 
 
 /*@
@@ -1009,7 +1009,7 @@ not attempting to perform an immediate write while another thread is attempting
 Module:
 Utility-Sock
 @*/
-int MPIDU_Sock_write(MPIDU_Sock_t sock, void * buf, MPIU_Size_t len, MPIU_Size_t * num_written);
+int MPIDU_Sock_write(MPIDU_Sock_t sock, void * buf, size_t len, size_t * num_written);
 
 
 /*@
@@ -1058,7 +1058,7 @@ not attempting to perform an immediate write while another thread is attempting
 Module:
 Utility-Sock
 @*/
-int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, MPIU_Size_t * num_written);
+int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, size_t * num_written);
 
 
 /*@
diff --git a/src/mpid/common/sock/poll/sock.c b/src/mpid/common/sock/poll/sock.c
index a4f257e..0f74dca 100644
--- a/src/mpid/common/sock/poll/sock.c
+++ b/src/mpid/common/sock/poll/sock.c
@@ -111,12 +111,12 @@ struct pollinfo
 	struct
 	{
 	    char * ptr;
-	    MPIU_Size_t min;
-	    MPIU_Size_t max;
+	    size_t min;
+	    size_t max;
 	} buf;
     } read;
     int read_iov_flag;
-    MPIU_Size_t read_nb;
+    size_t read_nb;
     MPIDU_Sock_progress_update_func_t read_progress_update_fn;
     union
     {
@@ -129,12 +129,12 @@ struct pollinfo
 	struct
 	{
 	    char * ptr;
-	    MPIU_Size_t min;
-	    MPIU_Size_t max;
+	    size_t min;
+	    size_t max;
 	} buf;
     } write;
     int write_iov_flag;
-    MPIU_Size_t write_nb;
+    size_t write_nb;
     MPIDU_Sock_progress_update_func_t write_progress_update_fn;
 };
 
diff --git a/src/mpid/common/sock/poll/sock_immed.i b/src/mpid/common/sock/poll/sock_immed.i
index b0f183f..ffc7f71 100644
--- a/src/mpid/common/sock/poll/sock_immed.i
+++ b/src/mpid/common/sock/poll/sock_immed.i
@@ -258,8 +258,8 @@ int MPIDU_Sock_accept(struct MPIDU_Sock * listener,
 #define FUNCNAME MPIDU_Sock_read
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIDU_Sock_read(MPIDU_Sock_t sock, void * buf, MPIU_Size_t len, 
-		    MPIU_Size_t * num_read)
+int MPIDU_Sock_read(MPIDU_Sock_t sock, void * buf, size_t len,
+		    size_t * num_read)
 {
     struct pollfd * pollfd;
     struct pollinfo * pollinfo;
@@ -302,7 +302,7 @@ int MPIDU_Sock_read(MPIDU_Sock_t sock, void * buf, MPIU_Size_t len,
 
     if (nb > 0)
     {
-	*num_read = (MPIU_Size_t) nb;
+	*num_read = (size_t) nb;
     }
     /* --BEGIN ERROR HANDLING-- */
     else if (nb == 0)
@@ -378,7 +378,7 @@ int MPIDU_Sock_read(MPIDU_Sock_t sock, void * buf, MPIU_Size_t len,
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 int MPIDU_Sock_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, 
-		     MPIU_Size_t * num_read)
+		     size_t * num_read)
 {
     struct pollfd * pollfd;
     struct pollinfo * pollinfo;
@@ -415,7 +415,7 @@ int MPIDU_Sock_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n,
 
     if (nb > 0)
     {
-	*num_read = (MPIU_Size_t) nb;
+	*num_read = (size_t) nb;
     }
     /* --BEGIN ERROR HANDLING-- */
     else if (nb == 0)
@@ -491,8 +491,8 @@ int MPIDU_Sock_readv(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n,
 #define FUNCNAME MPIDU_Sock_write
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIDU_Sock_write(MPIDU_Sock_t sock, void * buf, MPIU_Size_t len, 
-		     MPIU_Size_t * num_written)
+int MPIDU_Sock_write(MPIDU_Sock_t sock, void * buf, size_t len,
+		     size_t * num_written)
 {
     struct pollfd * pollfd;
     struct pollinfo * pollinfo;
@@ -577,7 +577,7 @@ int MPIDU_Sock_write(MPIDU_Sock_t sock, void * buf, MPIU_Size_t len,
 #define FUNCNAME MPIDU_Sock_writev
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, MPIU_Size_t * num_written)
+int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, size_t * num_written)
 {
     struct pollfd * pollfd;
     struct pollinfo * pollinfo;
@@ -614,7 +614,7 @@ int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPL_IOV * iov, int iov_n, MPIU_Size_t *
 
     if (nb >= 0)
     {
-	*num_written = (MPIU_Size_t) nb;
+	*num_written = (size_t) nb;
     }
     /* --BEGIN ERROR HANDLING-- */
     else if (errno == EAGAIN || errno == EWOULDBLOCK)
diff --git a/src/mpid/common/sock/poll/sock_post.i b/src/mpid/common/sock/poll/sock_post.i
index 8005466..9c1f6d7 100644
--- a/src/mpid/common/sock/poll/sock_post.i
+++ b/src/mpid/common/sock/poll/sock_post.i
@@ -453,7 +453,7 @@ int MPIDU_Sock_listen(struct MPIDU_Sock_set * sock_set, void * user_ptr,
 #define FUNCNAME MPIDU_Sock_post_read
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIDU_Sock_post_read(struct MPIDU_Sock * sock, void * buf, MPIU_Size_t minlen, MPIU_Size_t maxlen,
+int MPIDU_Sock_post_read(struct MPIDU_Sock * sock, void * buf, size_t minlen, size_t maxlen,
 			 MPIDU_Sock_progress_update_func_t fn)
 {
     struct pollfd * pollfd;
@@ -552,7 +552,7 @@ int MPIDU_Sock_post_readv(struct MPIDU_Sock * sock, MPL_IOV * iov, int iov_n, MP
 #define FUNCNAME MPIDU_Sock_post_write
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIDU_Sock_post_write(struct MPIDU_Sock * sock, void * buf, MPIU_Size_t minlen, MPIU_Size_t maxlen,
+int MPIDU_Sock_post_write(struct MPIDU_Sock * sock, void * buf, size_t minlen, size_t maxlen,
 			  MPIDU_Sock_progress_update_func_t fn)
 {
     struct pollfd * pollfd;
diff --git a/src/mpid/common/sock/poll/socki_util.i b/src/mpid/common/sock/poll/socki_util.i
index 98e6e33..2033801 100644
--- a/src/mpid/common/sock/poll/socki_util.i
+++ b/src/mpid/common/sock/poll/socki_util.i
@@ -23,7 +23,7 @@ static void MPIDU_Socki_sock_free(struct MPIDU_Sock * sock);
 
 static int MPIDU_Socki_event_enqueue(struct pollinfo * pollinfo, 
 				     enum MPIDU_Sock_op op, 
-				     MPIU_Size_t num_bytes,
+				     size_t num_bytes,
 				     void * user_ptr, int error);
 static inline int MPIDU_Socki_event_dequeue(struct MPIDU_Sock_set * sock_set, 
 					    int * set_elem, 
@@ -832,7 +832,7 @@ static void MPIDU_Socki_sock_free(struct MPIDU_Sock * sock)
 #define FUNCNAME MPIDU_Socki_event_enqueue
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-static int MPIDU_Socki_event_enqueue(struct pollinfo * pollinfo, MPIDU_Sock_op_t op, MPIU_Size_t num_bytes,
+static int MPIDU_Socki_event_enqueue(struct pollinfo * pollinfo, MPIDU_Sock_op_t op, size_t num_bytes,
 				     void * user_ptr, int error)
 {
     struct MPIDU_Sock_set * sock_set = pollinfo->sock_set;
diff --git a/src/util/wrappers/mpiu_shm_wrappers.h b/src/util/wrappers/mpiu_shm_wrappers.h
index f5eab06..45ab5d6 100644
--- a/src/util/wrappers/mpiu_shm_wrappers.h
+++ b/src/util/wrappers/mpiu_shm_wrappers.h
@@ -250,7 +250,7 @@ static inline void MPIU_SHMW_Hnd_free(MPIU_SHMW_Hnd_t hnd)
     }
 }
 
-static inline int MPIU_SHMW_Seg_open(MPIU_SHMW_Hnd_t hnd, MPIU_Size_t seg_sz);
+static inline int MPIU_SHMW_Seg_open(MPIU_SHMW_Hnd_t hnd, size_t seg_sz);
 static inline int MPIU_SHMW_Hnd_deserialize_by_ref(MPIU_SHMW_Hnd_t hnd, char **ser_hnd_ptr);
 
 /* FIXME : Don't print ENGLISH strings on error. Define the error
@@ -457,7 +457,7 @@ static inline int MPIU_SHMW_Hnd_finalize(
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static inline int MPIU_SHMW_Seg_create_attach_templ(
-    MPIU_SHMW_Hnd_t hnd, MPIU_Size_t seg_sz, char **shm_addr_ptr,
+    MPIU_SHMW_Hnd_t hnd, size_t seg_sz, char **shm_addr_ptr,
     int offset, int flag)
 {
     int mpi_errno = MPI_SUCCESS;
@@ -522,7 +522,7 @@ fn_fail:
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static inline int MPIU_SHMW_Seg_detach(
-    MPIU_SHMW_Hnd_t hnd, char **shm_addr_ptr, MPIU_Size_t seg_sz)
+    MPIU_SHMW_Hnd_t hnd, char **shm_addr_ptr, size_t seg_sz)
 {
     int mpi_errno = MPI_SUCCESS;
     int rc = -1;
@@ -588,7 +588,7 @@ fn_fail:
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static inline int MPIU_SHMW_Seg_create_attach_templ(
-    MPIU_SHMW_Hnd_t hnd, MPIU_Size_t seg_sz, char **shm_addr_ptr,
+    MPIU_SHMW_Hnd_t hnd, size_t seg_sz, char **shm_addr_ptr,
     int offset, int flag)
 {
     int mpi_errno = MPI_SUCCESS;
@@ -672,7 +672,7 @@ fn_fail:
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static inline int MPIU_SHMW_Seg_detach(
-    MPIU_SHMW_Hnd_t hnd, char **shm_addr_ptr, MPIU_Size_t seg_sz)
+    MPIU_SHMW_Hnd_t hnd, char **shm_addr_ptr, size_t seg_sz)
 {
     int mpi_errno = MPI_SUCCESS;
     int rc = -1;
@@ -722,7 +722,7 @@ fn_fail:
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static inline int MPIU_SHMW_Seg_create_attach_templ(
-    MPIU_SHMW_Hnd_t hnd, MPIU_Size_t seg_sz, char **shm_addr_ptr,
+    MPIU_SHMW_Hnd_t hnd, size_t seg_sz, char **shm_addr_ptr,
     int offset, int flag)
 {
     int mpi_errno = MPI_SUCCESS;
@@ -790,7 +790,7 @@ fn_fail:
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static inline int MPIU_SHMW_Seg_detach(
-    MPIU_SHMW_Hnd_t hnd, char **shm_addr_ptr, MPIU_Size_t seg_sz)
+    MPIU_SHMW_Hnd_t hnd, char **shm_addr_ptr, size_t seg_sz)
 {
     int mpi_errno = MPI_SUCCESS;
     int rc = -1;
@@ -827,7 +827,7 @@ fn_fail:
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static inline int MPIU_SHMW_Seg_create(
-    MPIU_SHMW_Hnd_t hnd, MPIU_Size_t seg_sz)
+    MPIU_SHMW_Hnd_t hnd, size_t seg_sz)
 {
     int mpi_errno = MPI_SUCCESS;
 
@@ -854,7 +854,7 @@ fn_fail:
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static inline int MPIU_SHMW_Seg_open(
-    MPIU_SHMW_Hnd_t hnd, MPIU_Size_t seg_sz)
+    MPIU_SHMW_Hnd_t hnd, size_t seg_sz)
 {
     int mpi_errno = MPI_SUCCESS;
 
@@ -882,7 +882,7 @@ fn_fail:
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static inline int MPIU_SHMW_Seg_create_and_attach(
-    MPIU_SHMW_Hnd_t hnd, MPIU_Size_t seg_sz, char **shm_addr_ptr, 
+    MPIU_SHMW_Hnd_t hnd, size_t seg_sz, char **shm_addr_ptr,
     int offset)
 {
     int mpi_errno = MPI_SUCCESS;
@@ -914,7 +914,7 @@ fn_fail:
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static inline int MPIU_SHMW_Seg_attach(
-    MPIU_SHMW_Hnd_t hnd, MPIU_Size_t seg_sz, char **shm_addr_ptr,
+    MPIU_SHMW_Hnd_t hnd, size_t seg_sz, char **shm_addr_ptr,
     int offset)
 {
     int mpi_errno = MPI_SUCCESS;

http://git.mpich.org/mpich.git/commitdiff/bcf8e55a21eaa265a1aae00467dd30185043229d

commit bcf8e55a21eaa265a1aae00467dd30185043229d
Author: Pavan Balaji <balaji at anl.gov>
Date:   Sun Apr 24 12:51:40 2016 -0500

    Remove MPIU_BOOL.
    
    Generic types such as this need to be defined in MPL.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/src/include/mpir_misc.h b/src/include/mpir_misc.h
index ca551c7..022f8b5 100644
--- a/src/include/mpir_misc.h
+++ b/src/include/mpir_misc.h
@@ -16,8 +16,6 @@
 #define MPIR_FINALIZE_CALLBACK_DEFAULT_PRIO 0
 #define MPIR_FINALIZE_CALLBACK_MAX_PRIO 10
 
-typedef int MPIU_BOOL;
-
 /* Define a typedef for the errflag value used by many internal
  * functions.  If an error needs to be returned, these values can be
  * used to signal such.  More details can be found further down in the
diff --git a/src/mpid/ch3/include/mpidpre.h b/src/mpid/ch3/include/mpidpre.h
index 9d87e5d..cb8eedf 100644
--- a/src/mpid/ch3/include/mpidpre.h
+++ b/src/mpid/ch3/include/mpidpre.h
@@ -692,7 +692,7 @@ int MPID_Progress_poke(void);
 
 int MPID_Get_processor_name( char *name, int namelen, int *resultlen);
 int MPID_Get_universe_size(int  * universe_size);
-int MPID_Comm_get_lpid(MPIR_Comm *comm_ptr, int idx, int * lpid_ptr, MPIU_BOOL is_remote);
+int MPID_Comm_get_lpid(MPIR_Comm *comm_ptr, int idx, int * lpid_ptr, MPL_bool is_remote);
 
 void MPID_Request_init(MPIR_Request *);
 void MPID_Request_finalize(MPIR_Request *);
diff --git a/src/mpid/ch3/src/mpid_vc.c b/src/mpid/ch3/src/mpid_vc.c
index 3541876..a852f95 100644
--- a/src/mpid/ch3/src/mpid_vc.c
+++ b/src/mpid/ch3/src/mpid_vc.c
@@ -300,7 +300,7 @@ int MPIDI_VCR_Dup(MPIDI_VCR orig_vcr, MPIDI_VCR * new_vcr)
 #define FUNCNAME MPID_Comm_get_lpid
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPID_Comm_get_lpid(MPIR_Comm *comm_ptr, int idx, int * lpid_ptr, MPIU_BOOL is_remote)
+int MPID_Comm_get_lpid(MPIR_Comm *comm_ptr, int idx, int * lpid_ptr, MPL_bool is_remote)
 {
     MPIDI_STATE_DECL(MPID_STATE_MPID_VCR_GET_LPID);
 
diff --git a/src/mpl/include/mpl_base.h b/src/mpl/include/mpl_base.h
index cb033d2..7718502 100644
--- a/src/mpl/include/mpl_base.h
+++ b/src/mpl/include/mpl_base.h
@@ -111,4 +111,6 @@
 #define MPL_UNIQUE_IMPL2_(prefix_,line_) MPL_UNIQUE_IMPL3_(prefix_,line_)
 #define MPL_UNIQUE_IMPL3_(prefix_,line_) prefix_##line_
 
+typedef int MPL_bool;
+
 #endif /* !defined(MPL_BASE_H_INCLUDED) */

http://git.mpich.org/mpich.git/commitdiff/16faa92ccb6cb3b0dec392247beea733093c527a

commit 16faa92ccb6cb3b0dec392247beea733093c527a
Author: Pavan Balaji <balaji at anl.gov>
Date:   Thu Apr 21 17:02:30 2016 -0500

    Refactor mpiimpl.h and friends.
    
    Split mpiimpl.h and friends into multiple files and move code around
    to make the organization cleaner.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/.gitignore b/.gitignore
index 53606ff..ad7bd8c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -179,7 +179,7 @@ Makefile.am-stamp
 
 # random additions 2
 /mpich-doxygen
-/src/include/glue_romio.h
+/src/include/mpir_ext.h
 /src/include/mpichinfo.h
 /src/packaging/envmods/mpich.module
 /src/packaging/pkgconfig/mpich.pc
@@ -224,8 +224,8 @@ Makefile.am-stamp
 /src/binding/fortran/use_mpi_f08/Makefile.mk
 
 # MPICH2 parameter handling
-/src/include/mpich_cvars.h
-/src/util/cvar/mpich_cvars.c
+/src/include/mpir_cvars.h
+/src/util/cvar/mpir_cvars.c
 
 
 ################################################################################
diff --git a/CHANGES b/CHANGES
index 5a7aece..ca19721 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1240,11 +1240,11 @@ www.mcs.anl.gov/mpi/mpich2/mpich2_1_0_6changes.htm.
   reorder == true cases in Cart_create and Graph_create).  
 
 - New memory allocation macros, MPIU_CHK[PL]MEM_*(), have been added to help
-  prevent memory leaks.  See mpich2/src/include/mpimem.h.
+  prevent memory leaks.  See mpich2/src/include/mpir_mem.h.
 
 - New error reporting macros, MPIU_ERR_*, have been added to simplify the error
   handling throughout the code, making the code easier to read.  See
-  mpich2/src/include/mpierrs.h.
+  mpich2/src/include/mpir_err.h.
 
 - Interprocess communication using the Sock interface (sock and ssm channels)
   may now be bound to a particular destination interface using the environment
diff --git a/configure.ac b/configure.ac
index 2575c50..d241945 100644
--- a/configure.ac
+++ b/configure.ac
@@ -913,7 +913,7 @@ fi
 if test "$enable_error_checking" = "yes" ; then
    enable_error_checking=all
 fi
-# glue_romio.h needs the variable HAVE_ERROR_CHECKING to have the value 0 or 1
+# mpir_ext.h needs the variable HAVE_ERROR_CHECKING to have the value 0 or 1
 HAVE_ERROR_CHECKING=0
 case "$enable_error_checking" in 
     no)
@@ -932,7 +932,7 @@ case "$enable_error_checking" in
     AC_MSG_WARN([Unknown value $enable_error_checking for enable-error-checking])
     ;;
 esac
-# permit @HAVE_ERROR_CHECKING@ substitution in glue_romio.h 
+# permit @HAVE_ERROR_CHECKING@ substitution in mpir_ext.h
 AC_SUBST([HAVE_ERROR_CHECKING])
 
 # error-messages
@@ -4726,7 +4726,7 @@ export MPI_F77_AINT
 # ----------------------------------------------------------------------------
 
 # define MPIU_Size_t - used to express the size of objects
-# This is used in mpiu_type_defs.h to define MPIU_SIZE_T, 
+# This is used in mpir_type_defs.h to define MPIU_SIZE_T,
 # and is used in various parts of ch3 and mpid/common/sock.  
 # This is used to handle the potential problem that a message is 
 # too long to fit with an int.  However, we may still need to make
@@ -4754,7 +4754,7 @@ typedef $MPI_AINT MPI_Aint;
 #ifdef HAVE_STDINT_H
 #include <stdint.h>
 #endif
-#include "${master_top_srcdir}/src/include/mpibsend.h"]
+#include "${master_top_srcdir}/src/include/mpir_bsend.h"]
 )
 if test "$ac_cv_sizeof_MPIR_Bsend_data_t" = "0" ; then
     AC_MSG_ERROR([Unable to determine the size of MPI_BSEND_OVERHEAD])
@@ -5540,7 +5540,7 @@ AC_OUTPUT(Makefile \
           test/commands/Makefile \
           src/include/mpichinfo.h \
 	  mpich-doxygen \
-          src/include/glue_romio.h \
+          src/include/mpir_ext.h \
           src/binding/cxx/mpicxx.h \
 	  src/binding/fortran/mpif_h/mpif.h \
 	  src/binding/fortran/mpif_h/setbotf.f \
diff --git a/maint/decode_handle b/maint/decode_handle
index 6ac7cb3..9b740fe 100755
--- a/maint/decode_handle
+++ b/maint/decode_handle
@@ -57,7 +57,7 @@ while (scalar @ARGV) {
     }
 }
 
-## code from src/include/mpihandlemem.h from which this script was derived
+## code from src/include/mpir_objects.h from which this script was derived
 ## ----8<----
 ##
 ## typedef enum MPIR_Object_kind {
diff --git a/maint/extractcvars.in b/maint/extractcvars.in
index ed95bd6..c352201 100755
--- a/maint/extractcvars.in
+++ b/maint/extractcvars.in
@@ -42,8 +42,8 @@ my $alt_ns = "MPIR_PARAM";
 my $dep_ns = "MPICH";
 
 # Default :output source files
-my $header_file = "@abs_srcdir@/../src/include/mpich_cvars.h";
-my $c_file      = "@abs_srcdir@/../src/util/cvar/mpich_cvars.c";
+my $header_file = "@abs_srcdir@/../src/include/mpir_cvars.h";
+my $c_file      = "@abs_srcdir@/../src/util/cvar/mpir_cvars.c";
 my $readme_file = "@abs_srcdir@/../README.envvar";
 
 sub Usage {
diff --git a/src/binding/cxx/buildiface b/src/binding/cxx/buildiface
index 1c218f9..607cea3 100755
--- a/src/binding/cxx/buildiface
+++ b/src/binding/cxx/buildiface
@@ -2349,8 +2349,7 @@ print $OUTFD "\
 
     # Keyval and attribute routines
     print $OUTFD <<EOT;
-#include \"mpi_attr.h\"
-#include \"mpi_lang.h\"
+#include \"mpir_attr_generic.h\"
 static
 int
 MPIR_Comm_delete_attr_cxx_proxy(
diff --git a/src/binding/fortran/mpif_h/buildiface b/src/binding/fortran/mpif_h/buildiface
index 596835c..5c30884 100755
--- a/src/binding/fortran/mpif_h/buildiface
+++ b/src/binding/fortran/mpif_h/buildiface
@@ -4221,11 +4221,9 @@ sub build_specials {
     $args = "void *, int *";
     &print_header( "mpi_", "MPI_Address", "address", $args );
     # Add the definitions needed for error reporting
-    # (We could use mpiimpl.h, but mpierrs.h should be sufficient)
-    # mpierror.h references FILE *, so needs stdio.h
-    print $OUTFD "#include \"mpierrs.h\"\n"; 
+    # (We could use mpiimpl.h, but mpir_err.h should be sufficient)
+    print $OUTFD "#include \"mpir_err.h\"\n";
     print $OUTFD "#include <stdio.h>\n"; 
-    print $OUTFD "#include \"mpierror.h\"\n"; 
     &print_routine_type_decl( $OUTFD, $out_prefix, "address" );
     &print_args( $OUTFD, $args, 0, "address" );
     #&print_attr;
@@ -4256,11 +4254,9 @@ sub build_specials {
     $args = "void *, MPI_FAintp";
     &print_header( "mpi_", "MPI_Get_address", "get_address", $args );
     # Add the definitions needed for error reporting
-    # (We could use mpiimpl.h, but mpierrs.h should be sufficient)
-    # mpierror.h references FILE *, so needs stdio.h
-    print $OUTFD "#include \"mpierrs.h\"\n"; 
+    # (We could use mpiimpl.h, but mpir_err.h should be sufficient)
+    print $OUTFD "#include \"mpir_err.h\"\n";
     print $OUTFD "#include <stdio.h>\n"; 
-    print $OUTFD "#include \"mpierror.h\"\n"; 
     &print_routine_type_decl( $OUTFD, $out_prefix, "get_address" );
     &print_args( $OUTFD, $args, 0, "get_address" );
     #&print_attr;
@@ -4573,10 +4569,9 @@ MPIR_Comm_delete_attr_f77_proxy(
  * DO NOT EDIT
  */
 #include \"mpi_fortimpl.h\"
-/* mpierrs.h and mpierror.h for the error code creation */
-#include \"mpierrs.h\"
+/* mpir_err.h for the error code creation */
+#include \"mpir_err.h\"
 #include <stdio.h> 
-#include \"mpierror.h\"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Status_f2c */
 #if defined(USE_WEAK_SYMBOLS) && !defined(USE_ONLY_MPI_NAMES) 
@@ -4647,10 +4642,9 @@ print $OUTFD "\
  * DO NOT EDIT
  */
 #include \"mpi_fortimpl.h\"
-/* mpierrs.h and mpierror.h for the error code creation */
-#include \"mpierrs.h\"
+/* mpir_err.h for the error code creation */
+#include \"mpir_err.h\"
 #include <stdio.h> 
-#include \"mpierror.h\"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Status_c2f */
 #if defined(USE_WEAK_SYMBOLS) && !defined(USE_ONLY_MPI_NAMES) 
diff --git a/src/binding/fortran/mpif_h/mpi_fortimpl.h b/src/binding/fortran/mpif_h/mpi_fortimpl.h
index 364187c..2d6c765 100644
--- a/src/binding/fortran/mpif_h/mpi_fortimpl.h
+++ b/src/binding/fortran/mpif_h/mpi_fortimpl.h
@@ -122,20 +122,7 @@
 
 /* mpi.h includes the definitions of MPI_Fint */
 #include "mpi.h"
-#include "mpiutil.h"
-
-/* Include prototypes of helper functions.
-   These include MPIR_Keyval_set_fortran, fortran90, and 
-   Grequest_set_lang_f77 */
-#include "mpi_f77interface.h"
-/* Include the attribute access routines that permit access to the 
-   attribute or its pointer, needed for cross-language access to attributes */
-#include "mpi_attr.h"
-
-/* mpi_lang.h - Prototypes for language specific routines. Currently used to
- * set keyval attribute callbacks
- */
-#include "mpi_lang.h"
+#include "mpiimpl.h"
 
 /* If there is no MPI I/O support, and we are still using MPIO_Request,
    make sure that one is defined */
@@ -156,8 +143,8 @@ typedef MPI_Aint MPI_FAint;
 /* Fortran logicals */
 /* The definitions for the Fortran logical values are also needed 
    by the reduction operations in mpi/coll/opland, oplor, and oplxor, 
-   so they are defined in src/include/mpi_fortlogical.h */
-#include "mpi_fortlogical.h"
+   so they are defined in src/include/mpir_fortlogical.h */
+#include "mpir_fortlogical.h"
 
 
 /* MPIR_F_MPI_BOTTOM is the address of the Fortran MPI_BOTTOM value */
diff --git a/src/binding/fortran/use_mpi_f08/mpi_c_interface_glue.f90 b/src/binding/fortran/use_mpi_f08/mpi_c_interface_glue.f90
index 74d0f07..208f1f2 100644
--- a/src/binding/fortran/use_mpi_f08/mpi_c_interface_glue.f90
+++ b/src/binding/fortran/use_mpi_f08/mpi_c_interface_glue.f90
@@ -22,7 +22,7 @@ public :: MPIR_Win_delete_attr_f08_proxy
 public :: MPIR_Keyval_set_proxy
 public :: MPIR_Grequest_set_lang_fortran
 
-! Bind to C's enum MPIR_AttrType in mpi_attr.h
+! Bind to C's enum MPIR_AttrType in mpir_attr_generic.h
 enum, bind(C)
     enumerator :: MPIR_ATTR_PTR  = 0
     enumerator :: MPIR_ATTR_AINT = 1
diff --git a/src/glue/romio/glue_romio.c b/src/glue/romio/glue_romio.c
index 7eaa9c3..d2bded3 100644
--- a/src/glue/romio/glue_romio.c
+++ b/src/glue/romio/glue_romio.c
@@ -9,7 +9,7 @@
  * the headers it includes) directly inside of ROMIO. */
 
 #include "mpiimpl.h"
-#include "glue_romio.h"
+#include "mpir_ext.h"
 
 #if defined (MPL_USE_DBG_LOGGING)
 static MPL_dbg_class DBG_ROMIO;
diff --git a/src/include/Makefile.mk b/src/include/Makefile.mk
index 2336291..b9a28ea 100644
--- a/src/include/Makefile.mk
+++ b/src/include/Makefile.mk
@@ -12,47 +12,69 @@ nodist_include_HEADERS += src/include/mpi.h
 ## ".h.in" file.  This ensures that these files are _not_ distributed, which is
 ## important because they contain lots of info that is computed by configure.
 nodist_noinst_HEADERS +=     \
-    src/include/glue_romio.h \
     src/include/mpichinfo.h \
     src/include/mpichconf.h
 
-## listed here in BUILT_SOURCES to ensure that if glue_romio.h is out of date
+## listed here in BUILT_SOURCES to ensure that if mpir_ext.h is out of date
 ## that it will be rebuilt before make recurses into src/mpi/romio and runs
 ## make.  This isn't normally necessary when automake is tracking all the
 ## dependencies, but that's not true for SUBDIRS packages
-BUILT_SOURCES += src/include/glue_romio.h
+BUILT_SOURCES += src/include/mpir_ext.h
 
 noinst_HEADERS +=                   \
     src/include/bsocket.h           \
-    src/include/mpi_attr.h          \
-    src/include/mpi_f77interface.h  \
-    src/include/mpi_fortlogical.h   \
-    src/include/mpi_lang.h          \
+    src/include/mpir_refcount.h     \
+    src/include/mpir_refcount_global.h \
+    src/include/mpir_refcount_pobj.h \
+    src/include/mpir_refcount_single.h \
+    src/include/mpir_refcount.h     \
+    src/include/mpir_assert.h       \
+    src/include/mpir_misc_post.h    \
+    src/include/mpir_type_defs.h    \
+    src/include/mpir_dbg.h          \
+    src/include/mpir_attr_generic.h \
+    src/include/mpir_attr.h         \
+    src/include/mpir_f77interface.h \
+    src/include/mpir_cxxinterface.h \
+    src/include/mpir_fortlogical.h   \
     src/include/mpiallstates.h      \
-    src/include/mpibsend.h          \
-    src/include/mpich_cvars.h  \
+    src/include/mpir_bsend.h          \
+    src/include/mpir_cvars.h        \
     src/include/mpichconfconst.h    \
-    src/include/mpierror.h          \
-    src/include/mpierrs.h           \
-    src/include/mpiext.h            \
-    src/include/mpifunc.h           \
-    src/include/mpihandlemem.h      \
+    src/include/mpir_err.h          \
+    src/include/mpir_ext.h            \
+    src/include/mpir_func.h         \
+    src/include/mpir_coll.h         \
+    src/include/mpir_comm.h         \
+    src/include/mpir_debugger.h     \
+    src/include/mpir_request.h      \
+    src/include/mpir_status.h       \
+    src/include/mpir_contextid.h    \
+    src/include/mpir_objects.h      \
+    src/include/mpir_pointers.h     \
+    src/include/mpir_topo.h         \
+    src/include/mpir_group.h        \
+    src/include/mpir_errhandler.h   \
+    src/include/mpir_info.h         \
     src/include/mpiimpl.h           \
-    src/include/mpimem.h            \
+    src/include/mpir_mem.h          \
+    src/include/mpir_thread.h       \
     src/include/mpir_nbc.h          \
-    src/include/mpir_type_defs.h    \
-    src/include/mpitimerimpl.h      \
-    src/include/mpiu_utarray.h      \
-    src/include/mpiu_uthash.h       \
-    src/include/mpiutil.h           \
+    src/include/mpir_op.h           \
+    src/include/mpir_process.h      \
+    src/include/mpir_utarray.h      \
+    src/include/mpir_uthash.h       \
+    src/include/mpir_misc.h         \
+    src/include/mpir_tags.h         \
+    src/include/mpir_datatype.h     \
+    src/include/mpir_win.h          \
+    src/include/mpir_pt2pt.h        \
     src/include/nopackage.h         \
-    src/include/oputil.h            \
     src/include/pmi.h               \
     src/include/pmi2.h              \
     src/include/rlog.h              \
     src/include/rlog_macros.h       \
-    src/include/oputil.h            \
-    src/include/mpiinfo.h
+    src/include/mpir_op_util.h
 
-src/include/mpich_cvars.h:
+src/include/mpir_cvars.h:
 	$(top_srcdir)/maint/extractcvars --dirs="`cat $(top_srcdir)/maint/cvardirs`"
diff --git a/src/include/glue_romio.h.in b/src/include/glue_romio.h.in
deleted file mode 100644
index fc16212..0000000
--- a/src/include/glue_romio.h.in
+++ /dev/null
@@ -1,48 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- *  (C) 2011 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef GLUE_ROMIO_H_INCLUDED
-#define GLUE_ROMIO_H_INCLUDED
-
-int MPIR_Ext_assert_fail(const char *cond, const char *file_name, int line_num);
-
-#if (!defined(NDEBUG) && (@HAVE_ERROR_CHECKING@))
-#define MPIR_Ext_assert(a_)                                \
-    do {                                                   \
-        if (!(a_)) {                                       \
-            MPIR_Ext_assert_fail(#a_, __FILE__, __LINE__); \
-        }                                                  \
-    } while (0)
-#else
-#define MPIR_Ext_assert(a_) do {} while(0)
-#endif
-
-
-extern int MPIR_Ext_dbg_romio_terse_enabled;
-extern int MPIR_Ext_dbg_romio_typical_enabled;
-extern int MPIR_Ext_dbg_romio_verbose_enabled;
-
-/* a copy of MPIU_Ensure_Aint_fits_in_pointer for external use, slightly
- * modified to use ROMIO's version of the pointer-casting macro */
-#define MPIR_Ext_ensure_Aint_fits_in_pointer(aint) \
-  MPIR_Ext_assert((aint) == (MPI_Aint)(uintptr_t) ADIOI_AINT_CAST_TO_VOID_PTR(aint));
-
-/* to be called early by ROMIO's initialization process in order to setup init-time
- * glue code that cannot be initialized statically */
-int MPIR_Ext_init(void);
-
-void MPIR_Ext_cs_enter(void);
-void MPIR_Ext_cs_exit(void);
-void MPIR_Ext_cs_yield(void);
-
-/* to facilitate error checking */
-int MPIR_Ext_datatype_iscommitted(MPI_Datatype datatype);
-
-/* make comm split based on access to a common file system easier */
-int MPIR_Get_node_id(MPI_Comm comm, int rank, int *id);
-
-#endif /* defined(GLUE_ROMIO_H_INCLUDED) */
-
diff --git a/src/include/mpi_attr.h b/src/include/mpi_attr.h
deleted file mode 100644
index 0369d1e..0000000
--- a/src/include/mpi_attr.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*  
- *  (C) 2001 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef MPI_ATTR_H_INCLUDED
-#define MPI_ATTR_H_INCLUDED
-
-/* bit 0 distinguishes between pointers (0) and integers (1) */
-typedef enum
-  { MPIR_ATTR_PTR=0, MPIR_ATTR_AINT=1, MPIR_ATTR_INT=3 } MPIR_AttrType;
-
-#define MPIR_ATTR_KIND(_a) (_a & 0x1)
-
-int MPIR_CommSetAttr( MPI_Comm, int, void *, MPIR_AttrType );
-int MPIR_TypeSetAttr( MPI_Datatype, int, void *, MPIR_AttrType );
-int MPIR_WinSetAttr( MPI_Win, int, void *, MPIR_AttrType );
-int MPIR_CommGetAttr( MPI_Comm, int, void *, int *, MPIR_AttrType );
-int MPIR_TypeGetAttr( MPI_Datatype, int, void *, int *, MPIR_AttrType );
-int MPIR_WinGetAttr( MPI_Win, int, void *, int *, MPIR_AttrType );
-
-int MPIR_CommGetAttr_fort( MPI_Comm, int, void *, int *, MPIR_AttrType );
-
-#endif /* MPI_ATTR_H_INCLUDED */
diff --git a/src/include/mpi_lang.h b/src/include/mpi_lang.h
deleted file mode 100644
index 4810dd2..0000000
--- a/src/include/mpi_lang.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- *  (C) 2001 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- *
- * Portions of this code were written by Microsoft. Those portions are
- * Copyright (c) 2007 Microsoft Corporation. Microsoft grants
- * permission to use, reproduce, prepare derivative works, and to
- * redistribute to others. The code is licensed "as is." The User
- * bears the risk of using it. Microsoft gives no express warranties,
- * guarantees or conditions. To the extent permitted by law, Microsoft
- * excludes the implied warranties of merchantability, fitness for a
- * particular purpose and non-infringement.
- */
-
-#ifndef MPI_LANG_H_INCLUDED
-#define MPI_LANG_H_INCLUDED
-
-#include "mpi_attr.h"
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-/*E
-  Language bindings for MPI
-
-  A few operations in MPI need to know how to marshal the callback into the calling
-  lanuage calling convention. The marshaling code is provided by a thunk layer which
-  implements the correct behavior.  Examples of these callback functions are the
-  keyval attribute copy and delete functions.
-
-  Module:
-  Attribute-DS
-  E*/
-
-/*
- * Support bindings for Attribute copy/del callbacks
- * Consolidate Comm/Type/Win attribute functions together as the handle type is the same
- * use MPI_Comm for the prototypes
- */
-typedef
-int
-(MPIR_Attr_copy_proxy)(
-    MPI_Comm_copy_attr_function* user_function,
-    int handle,
-    int keyval,
-    void* extra_state,
-    MPIR_AttrType attrib_type,
-    void* attrib,
-    void** attrib_copy,
-    int* flag
-    );
-
-typedef
-int
-(MPIR_Attr_delete_proxy)(
-    MPI_Comm_delete_attr_function* user_function,
-    int handle,
-    int keyval,
-    MPIR_AttrType attrib_type,
-    void* attrib,
-    void* extra_state
-    );
-
-void
-MPIR_Keyval_set_proxy(
-    int keyval,
-    MPIR_Attr_copy_proxy copy_proxy,
-    MPIR_Attr_delete_proxy delete_proxy
-    );
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif /* MPI_LANG_H_INCLUDED */
diff --git a/src/include/mpichconfconst.h b/src/include/mpichconfconst.h
index 24d983c..9b0f09d 100644
--- a/src/include/mpichconfconst.h
+++ b/src/include/mpichconfconst.h
@@ -18,7 +18,6 @@
 #define MPICH_ERROR_MSG_GENERIC 2
 #define MPICH_ERROR_MSG_ALL 8
 
-
 /* -------------------------------------------------------------------- */
 /* thread-related constants */
 /* -------------------------------------------------------------------- */
diff --git a/src/include/mpierror.h b/src/include/mpierror.h
deleted file mode 100644
index 4e1836a..0000000
--- a/src/include/mpierror.h
+++ /dev/null
@@ -1,189 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- *  (C) 2001 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef MPIERROR_H_INCLUDED
-#define MPIERROR_H_INCLUDED
-
-/* Error severity */
-#define MPIR_ERR_FATAL 1
-#define MPIR_ERR_RECOVERABLE 0
-
-struct MPIR_Comm;
-struct MPIR_Win;
-
-/* Bindings for internal routines */
-int MPIR_Err_return_comm( struct MPIR_Comm *, const char [], int );
-int MPIR_Err_return_win( struct MPIR_Win *, const char [], int );
-#ifdef MPI__FILE_DEFINED
-/* Only define if we have MPI_File */
-int MPIR_Err_return_file( MPI_File, const char [], int ); /* Romio version */
-#endif
-/* FIXME:
- * Update this description to match the current version of the routine, 
- * in particular, the pseudo-format types (even better, fix it so that
- * the pseudo format types can work with the format attribute check).
- */
-/*@
-  MPIR_Err_create_code - Create an error code and associated message
-  to report an error
-
-  Input Parameters:
-+ lastcode - Previous error code (see notes)
-. severity  - Indicates severity of error
-. fcname - Name of the function in which the error has occurred.  
-. line  - Line number (usually '__LINE__')
-. class - Error class
-. generic_msg - A generic message to be used if not instance-specific
- message is available
-. instance_msg - A message containing printf-style formatting commands
-  that, when combined with the instance_parameters, specify an error
-  message containing instance-specific data.
-- instance_parameters - The remaining parameters.  These must match
- the formatting commands in 'instance_msg'.
-
- Notes:
- A typical use is\:
-.vb
-   mpi_errno = MPIR_Err_create_code( mpi_errno, MPIR_ERR_RECOVERABLE, 
-               FCNAME, __LINE__, MPI_ERR_RANK, 
-               "Invalid Rank", "Invalid rank %d", rank );
-.ve
- 
-  Predefined message may also be used.  Any message that uses the
-  prefix '"**"' will be looked up in a table.  This allows standardized 
-  messages to be used for a message that is used in several different locations
-  in the code.  For example, the name '"**rank"' might be used instead of
-  '"Invalid Rank"'; this would also allow the message to be made more
-  specific and useful, such as 
-.vb
-   Invalid rank provided.  The rank must be between 0 and the 1 less than
-   the size of the communicator in this call.
-.ve
-  This interface is compatible with the 'gettext' interface for 
-  internationalization, in the sense that the 'generic_msg' and 'instance_msg' 
-  may be used as arguments to 'gettext' to return a string in the appropriate 
-  language; the implementation of 'MPID_Err_create_code' can then convert
-  this text into the appropriate code value.
-
-  The current set of formatting commands is undocumented and will change.
-  You may safely use '%d' and '%s' (though only use '%s' for names of 
-  objects, not text messages, as using '%s' for a message breaks support for
-  internationalization.
-
-  This interface allows error messages to be chained together.  The first 
-  argument is the last error code; if there is no previous error code, 
-  use 'MPI_SUCCESS'.  
-
-  Extended Format Specifiers:
-  In addition to the standard format specifies (e.g., %d for an int value),
-  MPIR_Err_create_code accepts some additional values that correspond to 
-  various MPI types:
-+ i - an MPI rank; recognizes 'MPI_ANY_SOURCE', 'MPI_PROC_NULL', and 
-      'MPI_ROOT'
-. t - an MPI tag; recognizes 'MPI_ANY_TAG'.
-. A - an MPI assert value.
-. C - an MPI communicator.
-. D - an MPI datatype.
-. E - an MPI Errhandler.
-. F - an MPI File object.
-. G - an MPI Group.
-. I - an MPI info object.
-. O - an MPI Op.
-. R - an MPI Request.
-- W - an MPI Window object.
-
-
-  Module:
-  Error
-
-  @*/
-int MPIR_Err_create_code( int, int, const char [], int, int, const char [], const char [], ... );
-
-#ifdef USE_ERR_CODE_VALIST
-int MPIR_Err_create_code_valist( int, int, const char [], int, int, const char [], const char [], va_list );
-#endif
-
-/*@
-  MPIR_Err_combine_codes - Combine two error codes, or more importantly
-  two lists of error messages.  The list associated with the second error
-  code is appended to the list associated with the first error code.  If
-  the list associated with the first error code has a dangling tail, which
-  is possible if the ring has wrapped and overwritten entries that were
-  once part of the list, then the append operation is not performed and
-  the error code for the first list is returned.
-
-  Input Parameter:
-+ errorcode1 - the error code associated with the first list
-- errorcode2 - the error code associated with the second list
-
-  Return value:
-  An error code which resolves to the combined list of error messages
-
-  Notes:
-  If errorcode1 is equal to MPI_SUCCESS, then errorcode2 is returned.
-  Likewise, if errorcode2 is equal to MPI_SUCCESS, then errorcode1 is
-  returned.
-
-  Module:
-  Error 
-  @*/
-int MPIR_Err_combine_codes(int, int);
-
-int MPIR_Err_is_fatal(int);
-void MPIR_Err_init(void);
-void MPIR_Err_preOrPostInit( void );
-
-/* FIXME: This comment is incorrect because the routine was improperly modified
-   to take an additional argument (the MPIR_Err_get_class_string_func_t).  
-   That arg needs to be removed and this function restored. */
-/*@
-  MPID_Err_get_string - Get the message string that corresponds to an error
-  class or code
-
-  Input Parameter:
-+ code - An error class or code.  If a code, it must have been created by 
-  'MPID_Err_create_code'.
-- msg_len - Length of 'msg'.
-
-  Output Parameter:
-. msg - A null-terminated text string of length (including the null) of no
-  more than 'msg_len'.  
-
-  Return value:
-  Zero on success.  Non-zero returns indicate either (a) 'msg_len' is too
-  small for the message or (b) the value of 'code' is neither a valid 
-  error class or code.
-
-  Notes:
-  This routine is used to implement 'MPI_ERROR_STRING'.
-
-  Module:
-  Error 
-
-  Question:
-  What values should be used for the error returns?  Should they be
-  valid error codes?
-
-  How do we get a good value for 'MPI_MAX_ERROR_STRING' for 'mpi.h'?
-  See 'errgetmsg' for one idea.
-
-  @*/
-typedef int (* MPIR_Err_get_class_string_func_t)(int error, char *str, int length);
-void MPIR_Err_get_string( int, char *, int, MPIR_Err_get_class_string_func_t );
-
-int MPIR_Err_set_msg( int code, const char *msg_string );
-
-/* This routine is called when there is a fatal error. Now public because file
- * error handling is defined in a separate file from comm and win, but all
- * three need to call it */
-void MPIR_Handle_fatal_error(struct MPIR_Comm *comm_ptr,
-	const char fcname[], int errcode);
-
-#define MPIR_ERR_CLASS_MASK 0x0000007f
-#define MPIR_ERR_CLASS_SIZE 128
-#define MPIR_ERR_GET_CLASS(mpi_errno_) (mpi_errno_ & MPIR_ERR_CLASS_MASK)
-
-#endif
diff --git a/src/include/mpiext.h b/src/include/mpiext.h
deleted file mode 100644
index aff4bb1..0000000
--- a/src/include/mpiext.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*  
- *  (C) 2006 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-/* This file contains the prototypes for routines that are used with 
-   "external" modules such as ROMIO.  These allow the different packages to 
-   hide their internal datatypes from one another */
-
-#ifndef MPIEXT_H_INCLUDED
-#define MPIEXT_H_INCLUDED
-
-/* This routine, given an MPI_Errhandler (from a file), returns
-   a pointer to the user-supplied error function.  The last argument
-   is set to an integer indicating that the function is MPI_ERRORS_RETURN 
-   (value == 1), MPI_ERRORS_ARE_FATAL (value == 0), a valid user-function
-   (value == 2), or a valid user-function that is a C++ routine (value == 3)
-
-   This routine is implemented in mpich/src/mpi/errhan/file_set_errhandler.c
-*/
-void MPIR_Get_file_error_routine( MPI_Errhandler, 
-				  void (**)(MPI_File *, int *, ...), 
-				  int * );
-
-/* Invoke the C++ error handler (this invokes a special C++ routine that
- in turn calls the provided function.  That special routine also 
- resets the errorcode to MPI_SUCCESS to prevent the MPICH C++ error handling
- code from throwing an exception when the user routine returns.
-*/
-int MPIR_File_call_cxx_errhandler( MPI_File *, int *, 
-				   void (*)(MPI_File *, int *, ... ) );
-/* 
-   These routines provide access to the MPI_Errhandler field within the 
-   ROMIO MPI_File structure
- */
-int MPIR_ROMIO_Get_file_errhand( MPI_File, MPI_Errhandler * );
-int MPIR_ROMIO_Set_file_errhand( MPI_File, MPI_Errhandler );
-
-/* FIXME: This routine is also defined in adio.h */
-int MPIO_Err_return_file( MPI_File, int );
-
-#endif
diff --git a/src/include/mpifunc.h b/src/include/mpifunc.h
deleted file mode 100644
index f694439..0000000
--- a/src/include/mpifunc.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- *  (C) 2008 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef MPIFUNC_H_INCLUDED
-#define MPIFUNC_H_INCLUDED
-
-/* state declaration macros */
-#if defined(MPL_USE_DBG_LOGGING) || defined(MPICH_DEBUG_MEMARENA)
-#define MPID_MPI_STATE_DECL(a)
-#define MPID_MPI_INIT_STATE_DECL(a)
-#define MPID_MPI_FINALIZE_STATE_DECL(a)
-#define MPIDI_STATE_DECL(a)
-
-/* Tell the package to define the rest of the enter/exit macros in
-   terms of these */
-#define NEEDS_FUNC_ENTER_EXIT_DEFS 1
-#endif /* MPL_USE_DBG_LOGGING || MPICH_DEBUG_MEMARENA */
-
-/* function enter and exit macros */
-#if defined(MPL_USE_DBG_LOGGING)
-#define MPIR_FUNC_ENTER(a) MPL_DBG_MSG(MPL_DBG_ROUTINE_ENTER,TYPICAL,"Entering "#a)
-#elif defined(MPICH_DEBUG_MEMARENA)
-#define MPIR_FUNC_ENTER(a) MPL_trvalid("Entering " #a)
-#endif
-
-#if defined(MPL_USE_DBG_LOGGING)
-#define MPIR_FUNC_EXIT(a) MPL_DBG_MSG(MPL_DBG_ROUTINE_EXIT,TYPICAL,"Leaving "#a)
-#elif defined(MPICH_DEBUG_MEMARENA)
-#define MPIR_FUNC_EXIT(a) MPL_trvalid("Leaving " #a)
-#endif
-
-#endif /* MPIFUNC_H_INCLUDED */
diff --git a/src/include/mpiimpl.h b/src/include/mpiimpl.h
index c7a5f1a..e43b486 100644
--- a/src/include/mpiimpl.h
+++ b/src/include/mpiimpl.h
@@ -1,5 +1,5 @@
 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*  
+/*
  *  (C) 2001 by Argonne National Laboratory.
  *      See COPYRIGHT in top-level directory.
  *
@@ -12,12 +12,10 @@
  * excludes the implied warranties of merchantability, fitness for a
  * particular purpose and non-infringement.
  */
+
 #ifndef MPIIMPL_H_INCLUDED
 #define MPIIMPL_H_INCLUDED
 
-/* style: define:vsnprintf:1 sig:0 */
-/* style: allow:printf:3 sig:0 */
-
 #include "mpichconfconst.h"
 #include "mpichconf.h"
 
@@ -58,22 +56,6 @@ int usleep(useconds_t usec);
 #endif
 #endif
 
-/* if we are defining this, we must define it before including mpl.h */
-#if defined(MPICH_DEBUG_MEMINIT)
-#define MPL_VG_ENABLED 1
-#endif
-
-#include "mpl.h"
-#include "opa_primitives.h"
-#include "mpi.h"
-#include "mpiutil.h"
-#include "mpidpre.h"
-#include "mpir_refcount.h"
-
-#if defined(HAVE_ROMIO)
-int MPIR_Comm_split_filesystem(MPI_Comm comm, int key, const char *dirname, MPI_Comm *newcomm);
-#endif
-
 #if defined(HAVE_LONG_LONG_INT)
 /* Assume two's complement for determining LLONG_MAX (already assumed
  * elsewhere in MPICH). */
@@ -98,7 +80,7 @@ int MPIR_Comm_split_filesystem(MPI_Comm comm, int key, const char *dirname, MPI_
 #ifdef  USE_WEAK_SYMBOLS
 #define PMPI_LOCAL static
 #else
-#define PMPI_LOCAL 
+#define PMPI_LOCAL
 #endif
 
 /* Fix for universal endianess added in autoconf 2.62 */
@@ -111,4174 +93,143 @@ int MPIR_Comm_split_filesystem(MPI_Comm comm, int key, const char *dirname, MPI_
 #endif
 #endif
 
-#include "mpir_type_defs.h"
-
-/* Routines for memory management */
-#include "mpimem.h"
-
-/* Overriding memcpy:
-     This is a utility function for memory copy.  The device might use
-     this directly or override it with a different device-specific
-     mechanism to provide an MPID_Memcpy function.  However, we
-     currently do not provide such an ADI function.
-*/
-#ifndef MPIU_Memcpy
-#define MPIU_Memcpy(dst, src, len)                \
-    do {                                          \
-        MPIU_MEM_CHECK_MEMCPY((dst),(src),(len)); \
-        memcpy((dst), (src), (len));              \
-    } while (0)
-#endif
-
-#if defined HAVE_LIBHCOLL
-#include "../mpid/common/hcoll/hcollpre.h"
-#endif
-
-typedef struct {
-    int thread_provided;        /* Provided level of thread support */
-
-    /* This is a special case for is_thread_main, which must be
-     * implemented even if MPICH itself is single threaded.  */
-#if MPICH_THREAD_LEVEL >= MPI_THREAD_SERIALIZED
-    MPID_Thread_id_t master_thread;     /* Thread that started MPI */
-#endif
-
-#if defined MPICH_IS_THREADED
-    int isThreaded;             /* Set to true if user requested
-                                 * THREAD_MULTIPLE */
-#endif                          /* MPICH_IS_THREADED */
-} MPIR_Thread_info_t;
-extern MPIR_Thread_info_t MPIR_ThreadInfo;
-
+#if defined(HAVE_VSNPRINTF) && defined(NEEDS_VSNPRINTF_DECL) && !defined(vsnprintf)
+int vsnprintf(char *str, size_t size, const char *format, va_list ap);
+# endif
 
-/* ------------------------------------------------------------------------- */
-/* thread-local storage macros */
-/* arbitrary, just needed to avoid cleaning up heap allocated memory at thread
- * destruction time */
-#define MPIR_STRERROR_BUF_SIZE (1024)
 
-/* This structure contains all thread-local variables and will be zeroed at
- * allocation time.
+/*****************************************************************************
+ * We use the following ordering of information in this file:
  *
- * Note that any pointers to dynamically allocated memory stored in this
- * structure must be externally cleaned up.
- * */
-typedef struct {
-    int op_errno;               /* For errors in predefined MPI_Ops */
-
-    /* error string storage for MPIU_Strerror */
-    char strerrbuf[MPIR_STRERROR_BUF_SIZE];
-
-#if (MPICH_THREAD_LEVEL == MPI_THREAD_MULTIPLE)
-    int lock_depth;
-#endif
-} MPIR_Per_thread_t;
-
-#if defined(MPICH_IS_THREADED) && defined(MPL_TLS_SPECIFIER)
-extern MPL_TLS_SPECIFIER MPIR_Per_thread_t MPIR_Per_thread;
-#else
-extern MPIR_Per_thread_t MPIR_Per_thread;
-#endif
-
-extern MPID_Thread_tls_t MPIR_Per_thread_key;
-
-
-/*TDSOverview.tex
-  
-  MPI has a number of data structures, most of which are represented by 
-  an opaque handle in an MPI program.  In the MPICH implementation of MPI, 
-  these handles are represented
-  as integers; this makes implementation of the C/Fortran handle transfer 
-  calls (part of MPI-2) easy.  
- 
-  MPID objects (again with the possible exception of 'MPI_Request's) 
-  are allocated by a common set of object allocation functions.
-  These are 
-.vb
-    void *MPIU_Handle_obj_create( MPIU_Object_alloc_t *objmem )
-    void MPIU_Handle_obj_destroy( MPIU_Object_alloc_t *objmem, void *object )
-.ve
-  where 'objmem' is a pointer to a memory allocation object that knows 
-  enough to allocate objects, including the
-  size of the object and the location of preallocated memory, as well 
-  as the type of memory allocator.  By providing the routines to allocate and
-  free the memory, we make it easy to use the same interface to allocate both
-  local and shared memory for objects (always using the same kind for each 
-  type of object).
-
-  The names create/destroy were chosen because they are different from 
-  new/delete (C++ operations) and malloc/free.  
-  Any name choice will have some conflicts with other uses, of course.
-
-  Reference Counts:
-  Many MPI objects have reference count semantics.  
-  The semantics of MPI require that many objects that have been freed by the 
-  user 
-  (e.g., with 'MPI_Type_free' or 'MPI_Comm_free') remain valid until all 
-  pending
-  references to that object (e.g., by an 'MPI_Irecv') are complete.  There
-  are several ways to implement this; MPICH uses `reference counts` in the
-  objects.  To support the 'MPI_THREAD_MULTIPLE' level of thread-safety, these
-  reference counts must be accessed and updated atomically.  
-  A reference count for
-  `any` object can be incremented (atomically) 
-  with 'MPIU_Object_add_ref(objptr)'
-  and decremented with 'MPIU_Object_release_ref(objptr,newval_ptr)'.  
-  These have been designed so that then can be implemented as inlined 
-  macros rather than function calls, even in the multithreaded case, and
-  can use special processor instructions that guarantee atomicity to 
-  avoid thread locks.
-  The decrement routine sets the value pointed at by 'inuse_ptr' to 0 if 
-  the postdecrement value of the reference counter is zero, and to a non-zero
-  value otherwise.  If this value is zero, then the routine that decremented 
-  the
-  reference count should free the object.  This may be as simple as 
-  calling 'MPIU_Handle_obj_destroy' (for simple objects with no other allocated
-  storage) or may require calling a separate routine to destroy the object.
-  Because MPI uses 'MPI_xxx_free' to both decrement the reference count and 
-  free the object if the reference count is zero, we avoid the use of 'free'
-  in the MPID routines.
-
-  The 'inuse_ptr' approach is used rather than requiring the post-decrement
-  value because, for reference-count semantics, all that is necessary is
-  to know when the reference count reaches zero, and this can sometimes
-  be implemented more cheaply that requiring the post-decrement value (e.g.,
-  on IA32, there is an instruction for this operation).
-
-  Question:
-  Should we state that this is a macro so that we can use a register for
-  the output value?  That avoids a store.  Alternately, have the macro 
-  return the value as if it was a function?
-
-  Structure Definitions:
-  The structure definitions in this document define `only` that part of
-  a structure that may be used by code that is making use of the ADI.
-  Thus, some structures, such as 'MPID_Comm', have many defined fields;
-  these are used to support MPI routines such as 'MPI_Comm_size' and
-  'MPI_Comm_remote_group'.  Other structures may have few or no defined
-  members; these structures have no fields used outside of the ADI.  
-  In C++ terms,  all members of these structures are 'private'.  
-
-  For the initial implementation, we expect that the structure definitions 
-  will be designed for the multimethod device.  However, all items that are
-  specific to a particular device (including the multi-method device) 
-  will be placed at the end of the structure;
-  the document will clearly identify the members that all implementations
-  will provide.  This simplifies much of the code in both the ADI and the 
-  implementation of the MPI routines because structure member can be directly
-  accessed rather than using some macro or C++ style method interface.
-  
- T*/
-
-/* mpi_lang.h - Prototypes for language specific routines. Currently used to
- * set keyval attribute callbacks
- */
-#include "mpi_lang.h"
-/* Known language bindings */
-/*E
-  MPIR_Lang_t - Known language bindings for MPI
-
-  A few operations in MPI need to know what language they were called from
-  or created by.  This type enumerates the possible languages so that
-  the MPI implementation can choose the correct behavior.  An example of this
-  are the keyval attribute copy and delete functions.
-
-  Module:
-  Attribute-DS
-  E*/
-typedef enum MPIR_Lang_t {
-    MPIR_LANG__C
-#ifdef HAVE_FORTRAN_BINDING
-    , MPIR_LANG__FORTRAN
-    , MPIR_LANG__FORTRAN90
-#endif
-#ifdef HAVE_CXX_BINDING
-    , MPIR_LANG__CXX
-#endif
-} MPIR_Lang_t;
-
-/* Macros for the MPI handles (e.g., the object that encodes an
-   MPI_Datatype) */
-#include "mpihandlemem.h"
-
-/* This routine is used to install an attribute free routine for datatypes
-   at finalize-time */
-void MPIR_DatatypeAttrFinalize( void );
-
-/* ------------------------------------------------------------------------- */
-/* Should the following be moved into mpihandlemem.h ?*/
-/* ------------------------------------------------------------------------- */
-
-/* Convert Handles to objects for MPI types that have predefined objects */
-/* TODO examine generated assembly for this construct, it's probably suboptimal
- * on Blue Gene.  An if/else if/else might help the compiler out.  It also lets
- * us hint that one case is likely(), usually the BUILTIN case. */
-#define MPIR_Getb_ptr(kind,a,bmsk,ptr)                                  \
-{                                                                       \
-   switch (HANDLE_GET_KIND(a)) {                                        \
-      case HANDLE_KIND_BUILTIN:                                         \
-          ptr=MPIR_##kind##_builtin+((a)&(bmsk));                       \
-          break;                                                        \
-      case HANDLE_KIND_DIRECT:                                          \
-          ptr=MPIR_##kind##_direct+HANDLE_INDEX(a);                     \
-          break;                                                        \
-      case HANDLE_KIND_INDIRECT:                                        \
-          ptr=((MPIR_##kind*)                                           \
-               MPIU_Handle_get_ptr_indirect(a,&MPIR_##kind##_mem));     \
-          break;                                                        \
-      case HANDLE_KIND_INVALID:                                         \
-      default:								\
-          ptr=0;							\
-          break;							\
-    }                                                                   \
-}
-
-/* Convert handles to objects for MPI types that do _not_ have any predefined
-   objects */
-#define MPIR_Get_ptr(kind,a,ptr)					\
-{									\
-   switch (HANDLE_GET_KIND(a)) {					\
-      case HANDLE_KIND_DIRECT:						\
-          ptr=MPIR_##kind##_direct+HANDLE_INDEX(a);			\
-          break;							\
-      case HANDLE_KIND_INDIRECT:					\
-          ptr=((MPIR_##kind*)						\
-               MPIU_Handle_get_ptr_indirect(a,&MPIR_##kind##_mem));	\
-          break;							\
-      case HANDLE_KIND_INVALID:						\
-      case HANDLE_KIND_BUILTIN:						\
-      default:								\
-          ptr=0;							\
-          break;							\
-     }									\
-}
-
-/* FIXME: the masks should be defined with the handle definitions instead
-   of inserted here as literals */
-#define MPIR_Comm_get_ptr(a,ptr)       MPIR_Getb_ptr(Comm,a,0x03ffffff,ptr)
-#define MPIR_Group_get_ptr(a,ptr)      MPIR_Getb_ptr(Group,a,0x03ffffff,ptr)
-#define MPIR_Errhandler_get_ptr(a,ptr) MPIR_Getb_ptr(Errhandler,a,0x3,ptr)
-#define MPIR_Op_get_ptr(a,ptr)         MPIR_Getb_ptr(Op,a,0x000000ff,ptr)
-#define MPIR_Info_get_ptr(a,ptr)       MPIR_Getb_ptr(Info,a,0x03ffffff,ptr)
-#define MPIR_Win_get_ptr(a,ptr)        MPIR_Get_ptr(Win,a,ptr)
-#define MPIR_Request_get_ptr(a,ptr)    MPIR_Get_ptr(Request,a,ptr)
-#define MPIR_Grequest_class_get_ptr(a,ptr) MPIR_Get_ptr(Grequest_class,a,ptr)
-/* Keyvals have a special format. This is roughly MPID_Get_ptrb, but
-   the handle index is in a smaller bit field.  In addition, 
-   there is no storage for the builtin keyvals.  
-   For the indirect case, we mask off the part of the keyval that is
-   in the bits normally used for the indirect block index.
-*/
-#define MPIR_Keyval_get_ptr(a,ptr)     \
-{                                                                       \
-   switch (HANDLE_GET_KIND(a)) {                                        \
-      case HANDLE_KIND_BUILTIN:                                         \
-          ptr=0;                                                        \
-          break;                                                        \
-      case HANDLE_KIND_DIRECT:                                          \
-          ptr=MPIR_Keyval_direct+((a)&0x3fffff);                        \
-          break;                                                        \
-      case HANDLE_KIND_INDIRECT:                                        \
-          ptr=((MPIR_Keyval*)                                           \
-             MPIU_Handle_get_ptr_indirect((a)&0xfc3fffff,&MPIR_Keyval_mem)); \
-          break;                                                        \
-      case HANDLE_KIND_INVALID:                                         \
-      default:								\
-          ptr=0;							\
-          break;							\
-    }                                                                   \
-}
-
-/* Valid pointer checks */
-/* This test is lame.  Should eventually include cookie test 
-   and in-range addresses */
-#define MPIR_Valid_ptr_class(kind,ptr,errclass,err) \
-  {if (!(ptr)) { err = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, errclass, \
-                                             "**nullptrtype", "**nullptrtype %s", #kind ); } }
-
-#define MPIR_Info_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Info,ptr,MPI_ERR_INFO,err)
-/* Check not only for a null pointer but for an invalid communicator,
-   such as one that has been freed.  Let's try the ref_count as the test
-   for now */
-/* ticket #1441: check (refcount<=0) to cover the case of 0, an "over-free" of
- * -1 or similar, and the 0xecec... case when --enable-g=mem is used */
-#define MPIR_Comm_valid_ptr(ptr,err,ignore_rev) {     \
-     MPIR_Valid_ptr_class(Comm,ptr,MPI_ERR_COMM,err); \
-     if ((ptr) && MPIU_Object_get_ref(ptr) <= 0) {    \
-         MPIR_ERR_SET(err,MPI_ERR_COMM,"**comm");     \
-         ptr = 0;                                     \
-     } else if ((ptr) && (ptr)->revoked && !(ignore_rev)) {        \
-         MPIR_ERR_SET(err,MPIX_ERR_REVOKED,"**comm"); \
-     }                                                \
-}
-#define MPIR_Win_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Win,ptr,MPI_ERR_WIN,err)
-#define MPIR_Group_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Group,ptr,MPI_ERR_GROUP,err)
-#define MPIR_Op_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Op,ptr,MPI_ERR_OP,err)
-#define MPIR_Errhandler_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Errhandler,ptr,MPI_ERR_ARG,err)
-#define MPIR_Request_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Request,ptr,MPI_ERR_REQUEST,err)
-#define MPIR_Keyval_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Keyval,ptr,MPI_ERR_KEYVAL,err)
-
-#define MPIR_DATATYPE_IS_PREDEFINED(type) \
-    ((HANDLE_GET_KIND(type) == HANDLE_KIND_BUILTIN) || \
-     (type == MPI_FLOAT_INT) || (type == MPI_DOUBLE_INT) || \
-     (type == MPI_LONG_INT) || (type == MPI_SHORT_INT) || \
-     (type == MPI_LONG_DOUBLE_INT))
-
-/* ------------------------------------------------------------------------- */
-/* end of code that should the following be moved into mpihandlemem.h ?*/
-/* ------------------------------------------------------------------------- */
-
-/* ------------------------------------------------------------------------- */
-/* Info */
-/*TInfoOverview.tex
-
-  'MPI_Info' provides a way to create a list of '(key,value)' pairs
-  where the 'key' and 'value' are both strings.  Because many routines, both
-  in the MPI implementation and in related APIs such as the PMI process
-  management interface, require 'MPI_Info' arguments, we define a simple 
-  structure for each 'MPI_Info' element.  Elements are allocated by the 
-  generic object allocator; the head element is always empty (no 'key'
-  or 'value' is defined on the head element).  
-  
-  For simplicity, we have not abstracted the info data structures;
-  routines that want to work with the linked list may do so directly.
-  Because the 'MPI_Info' type is a handle and not a pointer, an MPIU
-  (utility) routine is provided to handle the 
-  deallocation of 'MPIR_Info' elements.  See the implementation of
-  'MPI_Info_create' for how an Info type is allocated.
-
-  Thread Safety:
-
-  The info interface itself is not thread-robust.  In particular, the routines
-  'MPI_INFO_GET_NKEYS' and 'MPI_INFO_GET_NTHKEY' assume that no other 
-  thread modifies the info key.  (If the info routines had the concept
-  of a next value, they would not be thread safe.  As it stands, a user
-  must be careful if several threads have access to the same info object.) 
-  Further, 'MPI_INFO_DUP', while not 
-  explicitly advising implementers to be careful of one thread modifying the
-  'MPI_Info' structure while 'MPI_INFO_DUP' is copying it, requires that the
-  operation take place in a thread-safe manner.
-  There isn'' much that we can do about these cases.  There are other cases
-  that must be handled.  In particular, multiple threads are allowed to 
-  update the same info value.  Thus, all of the update routines must be thread
-  safe; the simple implementation used in the MPICH implementation uses locks.
-  Note that the 'MPI_Info_delete' call does not need a lock; the defintion of
-  thread-safety means that any order of the calls functions correctly; since
-  it invalid either to delete the same 'MPI_Info' twice or to modify an
-  'MPI_Info' that has been deleted, only one thread at a time can call 
-  'MPI_Info_free' on any particular 'MPI_Info' value.  
-
-  T*/
-/*S
-  MPIR_Info - Structure of an MPID info
-
-  Notes:
-  There is no reference count because 'MPI_Info' values, unlike other MPI 
-  objects, may be changed after they are passed to a routine without 
-  changing the routine''s behavior.  In other words, any routine that uses
-  an 'MPI_Info' object must make a copy or otherwise act on any info value
-  that it needs.
-
-  A linked list is used because the typical 'MPI_Info' list will be short
-  and a simple linked list is easy to implement and to maintain.  Similarly,
-  a single structure rather than separate header and element structures are
-  defined for simplicity.  No separate thread lock is provided because
-  info routines are not performance critical; they may use the single
-  critical section lock in the 'MPIR_Process' structure when they need a
-  thread lock.
-  
-  This particular form of linked list (in particular, with this particular
-  choice of the first two members) is used because it allows us to use 
-  the same routines to manage this list as are used to manage the 
-  list of free objects (in the file 'src/util/mem/handlemem.c').  In 
-  particular, if lock-free routines for updating a linked list are 
-  provided, they can be used for managing the 'MPIR_Info' structure as well.
-
-  The MPI standard requires that keys can be no less that 32 characters and
-  no more than 255 characters.  There is no mandated limit on the size 
-  of values.
-
-  Module:
-  Info-DS
-  S*/
-typedef struct MPIR_Info {
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
-    struct MPIR_Info   *next;
-    char               *key;
-    char               *value;
-} MPIR_Info;
-extern MPIU_Object_alloc_t MPIR_Info_mem;
-/* Preallocated info objects */
-#define MPIR_INFO_N_BUILTIN 2
-extern MPIR_Info MPIR_Info_builtin[MPIR_INFO_N_BUILTIN];
-extern MPIR_Info MPIR_Info_direct[];
-/* ------------------------------------------------------------------------- */
-
-#if defined(MPICH_IS_THREADED)
-#define MPIR_THREAD_CHECK_BEGIN if (MPIR_ThreadInfo.isThreaded) {
-#define MPIR_THREAD_CHECK_END   }
-#else
-#define MPIR_THREAD_CHECK_BEGIN
-#define MPIR_THREAD_CHECK_END
-#endif /* MPICH_IS_THREADED */
-
-#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL || \
-    MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT
-extern MPID_Thread_mutex_t MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX;
-#endif
-
-#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT
-extern MPID_Thread_mutex_t MPIR_THREAD_POBJ_HANDLE_MUTEX;
-extern MPID_Thread_mutex_t MPIR_THREAD_POBJ_MSGQ_MUTEX;
-extern MPID_Thread_mutex_t MPIR_THREAD_POBJ_COMPLETION_MUTEX;
-extern MPID_Thread_mutex_t MPIR_THREAD_POBJ_CTX_MUTEX;
-extern MPID_Thread_mutex_t MPIR_THREAD_POBJ_PMI_MUTEX;
-
-#define MPIR_THREAD_POBJ_COMM_MUTEX(_comm_ptr) _comm_ptr->mutex
-#define MPIR_THREAD_POBJ_WIN_MUTEX(_win_ptr)   _win_ptr->mutex
-#endif
-
-
-/* ------------------------------------------------------------------------- */
-/* Error Handlers */
-/*E
-  MPIR_Errhandler_fn - MPID Structure to hold an error handler function
-
-  Notes:
-  The MPI-1 Standard declared only the C version of this, implicitly 
-  assuming that 'int' and 'MPI_Fint' were the same. 
-
-  Since Fortran does not have a C-style variable number of arguments 
-  interface, the Fortran interface simply accepts two arguments.  Some
-  calling conventions for Fortran (particularly under Windows) require
-  this.
-
-  Module:
-  ErrHand-DS
-  
-  Questions:
-  What do we want to do about C++?  Do we want a hook for a routine that can
-  be called to throw an exception in C++, particularly if we give C++ access
-  to this structure?  Does the C++ handler need to be different (not part
-  of the union)?
-
-  E*/
-typedef union MPIR_Errhandler_fn {
-   void (*C_Comm_Handler_function) ( MPI_Comm *, int *, ... );
-   void (*F77_Handler_function) ( MPI_Fint *, MPI_Fint * );
-   void (*C_Win_Handler_function) ( MPI_Win *, int *, ... );
-   void (*C_File_Handler_function) ( MPI_File *, int *, ... );
-} MPIR_Errhandler_fn;
-
-/*S
-  MPIR_Errhandler - Description of the error handler structure
-
-  Notes:
-  Device-specific information may indicate whether the error handler is active;
-  this can help prevent infinite recursion in error handlers caused by 
-  user-error without requiring the user to be as careful.  We might want to 
-  make this part of the interface so that the 'MPI_xxx_call_errhandler' 
-  routines would check.
-
-  It is useful to have a way to indicate that the errhandler is no longer
-  valid, to help catch the case where the user has freed the errhandler but
-  is still using a copy of the 'MPI_Errhandler' value.  We may want to 
-  define the 'id' value for deleted errhandlers.
-
-  Module:
-  ErrHand-DS
-  S*/
-typedef struct MPIR_Errhandler {
-  MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
-  MPIR_Lang_t        language;
-  MPIR_Object_kind   kind;
-  MPIR_Errhandler_fn errfn;
-  /* Other, device-specific information */
-#ifdef MPID_DEV_ERRHANDLER_DECL
-    MPID_DEV_ERRHANDLER_DECL
-#endif
-} MPIR_Errhandler;
-extern MPIU_Object_alloc_t MPIR_Errhandler_mem;
-/* Preallocated errhandler objects */
-extern MPIR_Errhandler MPIR_Errhandler_builtin[];
-extern MPIR_Errhandler MPIR_Errhandler_direct[];
-
-/* We never reference count the builtin error handler objects, regardless of how
- * we decide to reference count the other predefined objects.  If we get to the
- * point where we never reference count *any* of the builtin objects then we
- * should probably remove these checks and let them fall through to the checks
- * for BUILTIN down in the MPIU_Object_* routines. */
-#define MPIR_Errhandler_add_ref( _errhand )                               \
-    do {                                                                  \
-        if (HANDLE_GET_KIND((_errhand)->handle) != HANDLE_KIND_BUILTIN) { \
-            MPIU_Object_add_ref( _errhand );                              \
-        }                                                                 \
-    } while (0)
-#define MPIR_Errhandler_release_ref( _errhand, _inuse )                   \
-    do {                                                                  \
-        if (HANDLE_GET_KIND((_errhand)->handle) != HANDLE_KIND_BUILTIN) { \
-            MPIU_Object_release_ref( (_errhand), (_inuse) );              \
-        }                                                                 \
-        else {                                                            \
-            *(_inuse) = 1;                                                \
-        }                                                                 \
-    } while (0)
-/* ------------------------------------------------------------------------- */
-
-/* ------------------------------------------------------------------------- */
-/* Keyvals and attributes */
-/*TKyOverview.tex
-
-  Keyvals are MPI objects that, unlike most MPI objects, are defined to be
-  integers rather than a handle (e.g., 'MPI_Comm').  However, they really
-  `are` MPI opaque objects and are handled by the MPICH implementation in
-  the same way as all other MPI opaque objects.  The only difference is that
-  there is no 'typedef int MPI_Keyval;' in 'mpi.h'.  In particular, keyvals
-  are encoded (for direct and indirect references) in the same way that 
-  other MPI opaque objects are
-
-  Each keyval has a copy and a delete function associated with it.
-  Unfortunately, these have a slightly different calling sequence for
-  each language, particularly when the size of a pointer is 
-  different from the size of a Fortran integer.  The unions 
-  'MPIR_Copy_function' and 'MPIR_Delete_function' capture the differences
-  in a single union type.
-
-  The above comment is out of date but has never been updated as it should
-  have to match the introduction of a different interface.  Beware!
-
-  Notes: 
-  
-  In the original design, retrieving a attribute from a different
-  language that set it was thought to be an error.  The MPI Forum
-  decided that this should be allowed, and after much discussion, the
-  behavior was defined.  Thus, we need to record what sort of
-  attribute was provided, and be able to properly return the correct
-  value in each case.  See MPI 2.2, Section 16.3.7 (Attributes) for
-  specific requirements.  One consequence of this is that the value
-  that is returned may have a different length that how it was set.
-  On little-endian platforms (e.g., x86), this doesn't cause much of a
-  problem, because the address is that of the least significant byte,
-  and the lower bytes have the data that is needed in the case that
-  the desired attribute type is shorter than the stored attribute.
-  However, on a big-endian platform (e.g., IBM POWER), since the most
-  significant bytes are stored first, depending on the length of the
-  result type, the address of the result may not be the beginning of
-  the memory area.  For example, assume that an MPI_Fint is 4 bytes
-  and a void * (and a Fortran INTEGER of kind MPI_ADDRESS_KIND) is 8
-  bytes, and let the attribute store the value in an 8 byte integer in
-  a field named "value".  On a little-endian platform, the address of
-  the value is always the beginning of the field "value".  On a
-  big-endian platform, the address of the value is the beginning of
-  the field if the return type is a pointer (e.g., from C) or Fortran
-  (KIND=MPI_ADDRESS_KIND), and the address of the beginning of the
-  field + 4 if the return type is a Fortran 77 integer (and, as
-  specified above, an MPI_Fint is 4 bytes shorter than a void *).
-
-  For the big-endian case, it is possible to manage these shifts (using
-  WORDS_LITTLEENDIAN to detect the big-endian case).  Alternatively,
-  at a small cost in space, copies in variables of the correct length
-  can be maintained.  At this writing, the code in src/mpi/attr makes
-  use of WORDS_LITTLEENDIAN to provide the appropriate code for the most
-  common cases.
-
-  T*/
-/*TAttrOverview.tex
+ *   1. Start with independent headers that do not have any
+ *      dependencies on the rest of the MPICH implementation (e.g.,
+ *      mpl, opa, mpi.h).
  *
- * The MPI standard allows `attributes`, essentially an '(integer,pointer)'
- * pair, to be attached to communicators, windows, and datatypes.  
- * The integer is a `keyval`, which is allocated by a call (at the MPI level)
- * to 'MPI_Comm/Type/Win_create_keyval'.  The pointer is the value of 
- * the attribute.
- * Attributes are primarily intended for use by the user, for example, to save
- * information on a communicator, but can also be used to pass data to the
- * MPI implementation.  For example, an attribute may be used to pass 
- * Quality of Service information to an implementation to be used with 
- * communication on a particular communicator.  
- * To provide the most general access by the ADI to all attributes, the
- * ADI defines a collection of routines that are used by the implementation
- * of the MPI attribute routines (such as 'MPI_Comm_get_attr').
- * In addition, the MPI routines involving attributes will invoke the 
- * corresponding 'hook' functions (e.g., 'MPID_Dev_comm_attr_set_hook') 
- * should the device define them.
+ *   2. Next is forward declarations of MPIR structures (MPIR_Comm,
+ *      MPIR_Win, etc.).
  *
- * Attributes on windows and datatypes are defined by MPI but not of 
- * interest (as yet) to the device.
+ *   3. After that we have device-independent headers (MPIR
+ *      functionality that does not have any dependency on MPID).
  *
- * In addition, there are seven predefined attributes that the device must
- * supply to the implementation.  This is accomplished through 
- * data values that are part of the 'MPIR_Process' data block.
- *  The predefined keyvals on 'MPI_COMM_WORLD' are\:
- *.vb
- * Keyval                     Related Module
- * MPI_APPNUM                 Dynamic
- * MPI_HOST                   Core
- * MPI_IO                     Core
- * MPI_LASTUSEDCODE           Error
- * MPI_TAG_UB                 Communication
- * MPI_UNIVERSE_SIZE          Dynamic
- * MPI_WTIME_IS_GLOBAL        Timer
- *.ve
- * The values stored in the 'MPIR_Process' block are the actual values.  For 
- * example, the value of 'MPI_TAG_UB' is the integer value of the largest tag.
- * The
- * value of 'MPI_WTIME_IS_GLOBAL' is a '1' for true and '0' for false.  Likely
- * values for 'MPI_IO' and 'MPI_HOST' are 'MPI_ANY_SOURCE' and 'MPI_PROC_NULL'
- * respectively.
+ *   4. Next is the device "pre" header that defines device-level
+ *      initial objects that would be used by the MPIR structures.
  *
- T*/
-
-/* Include the attribute access routines that permit access to the 
-   attribute or its pointer, needed for cross-language access to attributes */
-#include "mpi_attr.h"
-
-/* Because Comm, Datatype, and File handles are all ints, and because
-   attributes are otherwise identical between the three types, we
-   only store generic copy and delete functions.  This allows us to use
-   common code for the attribute set, delete, and dup functions */
-/*E
-  MPIR_Copy_function - MPID Structure to hold an attribute copy function
-
-  Notes:
-  The appropriate element of this union is selected by using the language
-  field of the 'keyval'.
-
-  Because 'MPI_Comm', 'MPI_Win', and 'MPI_Datatype' are all 'int's in 
-  MPICH, we use a single C copy function rather than have separate
-  ones for the Communicator, Window, and Datatype attributes.
-
-  There are no corresponding typedefs for the Fortran functions.  The 
-  F77 function corresponds to the Fortran 77 binding used in MPI-1 and the
-  F90 function corresponds to the Fortran 90 binding used in MPI-2.
-
-  Module:
-  Attribute-DS
-
-  E*/
-int
-MPIR_Attr_copy_c_proxy(
-    MPI_Comm_copy_attr_function* user_function,
-    int handle,
-    int keyval,
-    void* extra_state,
-    MPIR_AttrType attrib_type,
-    void* attrib,
-    void** attrib_copy,
-    int* flag
-    );
-
-typedef struct MPIR_Copy_function {
-  int  (*C_CopyFunction)( int, int, void *, void *, void *, int * );
-  void (*F77_CopyFunction)  ( MPI_Fint *, MPI_Fint *, MPI_Fint *, MPI_Fint *, 
-                              MPI_Fint *, MPI_Fint *, MPI_Fint * );
-  void (*F90_CopyFunction)  ( MPI_Fint *, MPI_Fint *, MPI_Aint *, MPI_Aint *,
-                              MPI_Aint *, MPI_Fint *, MPI_Fint * );
-  /* The generic lang-independent user_function and proxy will
-   * replace the lang dependent copy funcs above
-   * Currently the lang-indpendent funcs are used only for keyvals
-   */
-  MPI_Comm_copy_attr_function *user_function;
-  MPIR_Attr_copy_proxy *proxy;
-  /* The C++ function is the same as the C function */
-} MPIR_Copy_function;
-
-/*E
-  MPIR_Delete_function - MPID Structure to hold an attribute delete function
-
-  Notes:
-  The appropriate element of this union is selected by using the language
-  field of the 'keyval'.
-
-  Because 'MPI_Comm', 'MPI_Win', and 'MPI_Datatype' are all 'int's in 
-  MPICH, we use a single C delete function rather than have separate
-  ones for the Communicator, Window, and Datatype attributes.
-
-  There are no corresponding typedefs for the Fortran functions.  The 
-  F77 function corresponds to the Fortran 77 binding used in MPI-1 and the
-  F90 function corresponds to the Fortran 90 binding used in MPI-2.
-
-  Module:
-  Attribute-DS
-
-  E*/
-int
-MPIR_Attr_delete_c_proxy(
-    MPI_Comm_delete_attr_function* user_function,
-    int handle,
-    int keyval,
-    MPIR_AttrType attrib_type,
-    void* attrib,
-    void* extra_state
-    );
-
-typedef struct MPIR_Delete_function {
-  int  (*C_DeleteFunction)  ( int, int, void *, void * );
-  void (*F77_DeleteFunction)( MPI_Fint *, MPI_Fint *, MPI_Fint *, MPI_Fint *, 
-                              MPI_Fint * );
-  void (*F90_DeleteFunction)( MPI_Fint *, MPI_Fint *, MPI_Aint *, MPI_Aint *, 
-                              MPI_Fint * );
-  /* The generic lang-independent user_function and proxy will
-   * replace the lang dependent copy funcs above
-   * Currently the lang-indpendent funcs are used only for keyvals
-   */
-  MPI_Comm_delete_attr_function *user_function;
-  MPIR_Attr_delete_proxy *proxy;
-} MPIR_Delete_function;
-
-/*S
-  MPIR_Keyval - Structure of an MPID keyval
-
-  Module:
-  Attribute-DS
-
-  S*/
-typedef struct MPIR_Keyval {
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
-    MPIR_Object_kind     kind;
-    int                  was_freed;
-    void                 *extra_state;
-    MPIR_Copy_function   copyfn;
-    MPIR_Delete_function delfn;
-  /* other, device-specific information */
-#ifdef MPID_DEV_KEYVAL_DECL
-    MPID_DEV_KEYVAL_DECL
-#endif
-} MPIR_Keyval;
-
-#define MPIR_Keyval_add_ref( _keyval )                                  \
-    do {                                                                \
-        MPIU_Object_add_ref( _keyval );                                 \
-    } while(0)
-
-#define MPIR_Keyval_release_ref( _keyval, _inuse )                      \
-    do {                                                                \
-        MPIU_Object_release_ref( _keyval, _inuse );                     \
-    } while(0)
-
-
-/* Attribute values in C/C++ are void * and in Fortran are ADDRESS_SIZED
-   integers.  Normally, these are the same size, but in at least one 
-   case, the address-sized integers was selected as longer than void *
-   to work with the datatype code used in the I/O library.  While this
-   is really a limitation in the current Datatype implementation. */
-#ifdef USE_AINT_FOR_ATTRVAL
-typedef MPI_Aint MPIR_AttrVal_t;
-#else
-typedef void * MPIR_AttrVal_t;
-#endif
-
-/* Attributes need no ref count or handle, but since we want to use the
-   common block allocator for them, we must provide those elements 
-*/
-/*S
-  MPIR_Attribute - Structure of an MPID attribute
-
-  Notes:
-  Attributes don''t have 'ref_count's because they don''t have reference
-  count semantics.  That is, there are no shallow copies or duplicates
-  of an attibute.  An attribute is copied when the communicator that
-  it is attached to is duplicated.  Subsequent operations, such as
-  'MPI_Comm_attr_free', can change the attribute list for one of the
-  communicators but not the other, making it impractical to keep the
-  same list.  (We could defer making the copy until the list is changed,
-  but even then, there would be no reference count on the individual
-  attributes.)
- 
-  A pointer to the keyval, rather than the (integer) keyval itself is
-  used since there is no need within the attribute structure to make
-  it any harder to find the keyval structure.
-
-  The attribute value is a 'void *'.  If 'sizeof(MPI_Fint)' > 'sizeof(void*)',
-  then this must be changed (no such system has been encountered yet).
-  For the Fortran 77 routines in the case where 'sizeof(MPI_Fint)' < 
-  'sizeof(void*)', the high end of the 'void *' value is used.  That is,
-  we cast it to 'MPI_Fint *' and use that value.
-
-  MPI defines three kinds of attributes (see MPI 2.1, Section 16.3, pages 
-  487-488 (the standard says two, but there are really three, as discussed
-  below).  These are pointer-valued attributes and two types of integer-valued
-  attributes.  
-  Pointer-valued attributes are used in C.
-  Integer-valued attributes are used in Fortran.  These are of type either
-  INTEGER or INTEGER(KIND=MPI_ADDRESS_KIND).
-
-  The predefined attributes are a combination of INTEGER and pointers.
- 
-  Module:
-  Attribute-DS
-
- S*/
-typedef struct MPIR_Attribute {
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
-    MPIR_Keyval  *keyval;           /* Keyval structure for this attribute */
-
-    struct MPIR_Attribute *next;    /* Pointer to next in the list */
-    MPIR_AttrType attrType;         /* Type of the attribute */
-    long        pre_sentinal;       /* Used to detect user errors in accessing
-				       the value */
-    MPIR_AttrVal_t value;           /* Stored value. An Aint must be at least
-				       as large as an address - some builds
-				       may make an Aint larger than a void * */
-    long        post_sentinal;      /* Like pre_sentinal */
-    /* other, device-specific information */
-#ifdef MPID_DEV_ATTR_DECL
-    MPID_DEV_ATTR_DECL
-#endif
-} MPIR_Attribute;
-/* ------------------------------------------------------------------------- */
-
-/*---------------------------------------------------------------------------
- * Groups are *not* a major data structure in MPICH-2.  They are provided
- * only because they are required for the group operations (e.g., 
- * MPI_Group_intersection) and for the scalable RMA synchronization
- *---------------------------------------------------------------------------*/
-/* This structure is used to implement the group operations such as 
-   MPI_Group_translate_ranks */
-typedef struct MPIR_Group_pmap_t {
-    int          lpid;      /* local process id, from VCONN */
-    int          next_lpid; /* Index of next lpid (in lpid order) */
-    int          flag;      /* marker, used to implement group operations */
-} MPIR_Group_pmap_t;
-
-/* Any changes in the MPIR_Group structure must be made to the
-   predefined value in MPIR_Group_builtin for MPI_GROUP_EMPTY in
-   src/mpi/group/grouputil.c */
-/*S
- MPIR_Group - Description of the Group data structure
-
- The processes in the group of 'MPI_COMM_WORLD' have lpid values 0 to 'size'-1,
- where 'size' is the size of 'MPI_COMM_WORLD'.  Processes created by 
- 'MPI_Comm_spawn' or 'MPI_Comm_spawn_multiple' or added by 'MPI_Comm_attach' 
- or  
- 'MPI_Comm_connect'
- are numbered greater than 'size - 1' (on the calling process). See the 
- discussion of LocalPID values.
-
- Note that when dynamic process creation is used, the pids are `not` unique
- across the universe of connected MPI processes.  This is ok, as long as
- pids are interpreted `only` on the process that owns them.
-
- Only for MPI-1 are the lpid''s equal to the `global` pids.  The local pids
- can be thought of as a reference not to the remote process itself, but
- how the remote process can be reached from this process.  We may want to 
- have a structure 'MPID_Lpid_t' that contains information on the remote
- process, such as (for TCP) the hostname, ip address (it may be different if
- multiple interfaces are supported; we may even want plural ip addresses for
- stripping communication), and port (or ports).  For shared memory connected
- processes, it might have the address of a remote queue.  The lpid number 
- is an index into a table of 'MPID_Lpid_t'''s that contain this (device- and
- method-specific) information.
-
- Module:
- Group-DS
-
- S*/
-typedef struct MPIR_Group {
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
-    int          size;           /* Size of a group */
-    int          rank;           /* rank of this process relative to this 
-				    group */
-    int          idx_of_first_lpid;
-    MPIR_Group_pmap_t *lrank_to_lpid; /* Array mapping a local rank to local
-					 process number */
-    int          is_local_dense_monotonic; /* see NOTE-G1 */
-
-    /* We may want some additional data for the RMA syncrhonization calls */
-  /* Other, device-specific information */
-#ifdef MPID_DEV_GROUP_DECL
-    MPID_DEV_GROUP_DECL
-#endif
-} MPIR_Group;
-
-/* NOTE-G1: is_local_dense_monotonic will be true iff the group meets the
- * following criteria:
- * 1) the lpids are all in the range [0,size-1], i.e. a subset of comm world
- * 2) the pids are sequentially numbered in increasing order, without any gaps,
- *    stride, or repetitions
+ *   5. Then comes the device-dependent MPIR functionality, with the
+ *      actual definitions of structures, function prototypes, etc.
+ *      This functionality can only rely on the device "pre"
+ *      functionality.
  *
- * This additional information allows us to handle the common case (insofar as
- * group ops are common) for MPI_Group_translate_ranks where group2 is
- * group_of(MPI_COMM_WORLD), or some simple subset.  This is an important use
- * case for many MPI tool libraries, such as Scalasca.
- */
-
-extern MPIU_Object_alloc_t MPIR_Group_mem;
-/* Preallocated group objects */
-#define MPIR_GROUP_N_BUILTIN 1
-extern MPIR_Group MPIR_Group_builtin[MPIR_GROUP_N_BUILTIN];
-extern MPIR_Group MPIR_Group_direct[];
-
-/* Object for empty group */
-extern MPIR_Group * const MPIR_Group_empty;
-
-#define MPIR_Group_add_ref( _group ) \
-    do { MPIU_Object_add_ref( _group ); } while (0)
-
-#define MPIR_Group_release_ref( _group, _inuse ) \
-     do { MPIU_Object_release_ref( _group, _inuse ); } while (0)
-
-void MPIR_Group_setup_lpid_list( MPIR_Group * );
-
-/* ------------------------------------------------------------------------- */
-
-/*E
-  MPIR_Comm_kind_t - Name the two types of communicators
-  E*/
-typedef enum MPIR_Comm_kind_t {
-    MPIR_COMM_KIND__INTRACOMM = 0,
-    MPIR_COMM_KIND__INTERCOMM = 1
-} MPIR_Comm_kind_t;
-
-/* ideally we could add these to MPIR_Comm_kind_t, but there's too much existing
- * code that assumes that the only valid values are INTRACOMM or INTERCOMM */
-typedef enum MPIR_Comm_hierarchy_kind_t {
-    MPIR_COMM_HIERARCHY_KIND__FLAT = 0,        /* no hierarchy */
-    MPIR_COMM_HIERARCHY_KIND__PARENT = 1,      /* has subcommunicators */
-    MPIR_COMM_HIERARCHY_KIND__NODE_ROOTS = 2,  /* is the subcomm for node roots */
-    MPIR_COMM_HIERARCHY_KIND__NODE = 3,        /* is the subcomm for a node */
-    MPIR_COMM_HIERARCHY_KIND__SIZE             /* cardinality of this enum */
-} MPIR_Comm_hierarchy_kind_t;
-/* Communicators */
-
-typedef enum {
-    MPIR_COMM_MAP_TYPE__DUP,
-    MPIR_COMM_MAP_TYPE__IRREGULAR
-} MPIR_Comm_map_type_t;
-
-/* direction of mapping: local to local, local to remote, remote to
- * local, remote to remote */
-typedef enum {
-    MPIR_COMM_MAP_DIR__L2L,
-    MPIR_COMM_MAP_DIR__L2R,
-    MPIR_COMM_MAP_DIR__R2L,
-    MPIR_COMM_MAP_DIR__R2R
-} MPIR_Comm_map_dir_t;
-
-typedef struct MPIR_Comm_map {
-    MPIR_Comm_map_type_t type;
-
-    struct MPIR_Comm *src_comm;
-
-    /* mapping direction for intercomms, which contain local and
-     * remote groups */
-    MPIR_Comm_map_dir_t dir;
-
-    /* only valid for irregular map type */
-    int src_mapping_size;
-    int *src_mapping;
-    int free_mapping;       /* we allocated the mapping */
-
-    struct MPIR_Comm_map *next;
-} MPIR_Comm_map_t;
-
-int MPIR_Comm_map_irregular(struct MPIR_Comm *newcomm, struct MPIR_Comm *src_comm,
-                            int *src_mapping, int src_mapping_size,
-                            MPIR_Comm_map_dir_t dir,
-                            MPIR_Comm_map_t **map);
-int MPIR_Comm_map_dup(struct MPIR_Comm *newcomm, struct MPIR_Comm *src_comm,
-                      MPIR_Comm_map_dir_t dir);
-int MPIR_Comm_map_free(struct MPIR_Comm *comm);
-
-/*S
-  MPIR_Comm - Description of the Communicator data structure
-
-  Notes:
-  Note that the size and rank duplicate data in the groups that
-  make up this communicator.  These are used often enough that this
-  optimization is valuable.  
-
-  This definition provides only a 16-bit integer for context id''s .
-  This should be sufficient for most applications.  However, extending
-  this to a 32-bit (or longer) integer should be easy.
-
-  There are two context ids.  One is used for sending and one for 
-  receiving.  In the case of an Intracommunicator, they are the same
-  context id.  They differ in the case of intercommunicators, where 
-  they may come from processes in different comm worlds (in the
-  case of MPI-2 dynamic process intercomms).  
-
-  The virtual connection table is an explicit member of this structure.
-  This contains the information used to contact a particular process,
-  indexed by the rank relative to this communicator.
-
-  Groups are allocated lazily.  That is, the group pointers may be
-  null, created only when needed by a routine such as 'MPI_Comm_group'.
-  The local process ids needed to form the group are available within
-  the virtual connection table.
-  For intercommunicators, we may want to always have the groups.  If not, 
-  we either need the 'local_group' or we need a virtual connection table
-  corresponding to the 'local_group' (we may want this anyway to simplify
-  the implementation of the intercommunicator collective routines).
-
-  The pointer to the structure 'MPIR_Collops' containing pointers to the
-  collective  
-  routines allows an implementation to replace each routine on a 
-  routine-by-routine basis.  By default, this pointer is null, as are the 
-  pointers within the structure.  If either pointer is null, the implementation
-  uses the generic provided implementation.  This choice, rather than
-  initializing the table with pointers to all of the collective routines,
-  is made to reduce the space used in the communicators and to eliminate the
-  need to include the implementation of all collective routines in all MPI 
-  executables, even if the routines are not used.
-
-  The macro 'MPID_HAS_HETERO' may be defined by a device to indicate that
-  the device supports MPI programs that must communicate between processes with
-  different data representations (e.g., different sized integers or different
-  byte orderings).  If the device does need to define this value, it should
-  be defined in the file 'mpidpre.h'. 
-
-  Please note that the local_size and remote_size fields can be confusing.  For
-  intracommunicators both fields are always equal to the size of the
-  communicator.  For intercommunicators local_size is equal to the size of
-  local_group while remote_size is equal to the size of remote_group.
-
-  Module:
-  Communicator-DS
-
-  Question:
-  For fault tolerance, do we want to have a standard field for communicator 
-  health?  For example, ok, failure detected, all (live) members of failed 
-  communicator have acked.
-  S*/
-typedef struct MPIR_Comm {
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
-    MPID_Thread_mutex_t mutex;
-    MPIU_Context_id_t context_id; /* Send context id.  See notes */
-    MPIU_Context_id_t recvcontext_id; /* Send context id.  See notes */
-    int           remote_size;   /* Value of MPI_Comm_(remote)_size */
-    int           rank;          /* Value of MPI_Comm_rank */
-    MPIR_Attribute *attributes;  /* List of attributes */
-    int           local_size;    /* Value of MPI_Comm_size for local group */
-    MPIR_Group   *local_group,   /* Groups in communicator. */
-                 *remote_group;  /* The local and remote groups are the
-                                    same for intra communicators */
-    MPIR_Comm_kind_t comm_kind;  /* MPIR_COMM_KIND__INTRACOMM or MPIR_COMM_KIND__INTERCOMM */
-    char          name[MPI_MAX_OBJECT_NAME];  /* Required for MPI-2 */
-    MPIR_Errhandler *errhandler; /* Pointer to the error handler structure */
-    struct MPIR_Comm    *local_comm; /* Defined only for intercomms, holds
-				        an intracomm for the local group */
-
-    MPIR_Comm_hierarchy_kind_t hierarchy_kind; /* flat, parent, node, or node_roots */
-    struct MPIR_Comm *node_comm; /* Comm of processes in this comm that are on
-                                    the same node as this process. */
-    struct MPIR_Comm *node_roots_comm; /* Comm of root processes for other nodes. */
-    int *intranode_table;        /* intranode_table[i] gives the rank in
-                                    node_comm of rank i in this comm or -1 if i
-                                    is not in this process' node_comm.
-                                    It is of size 'local_size'. */
-    int *internode_table;        /* internode_table[i] gives the rank in
-                                    node_roots_comm of rank i in this comm.
-                                    It is of size 'local_size'. */
-
-    int           is_low_group;  /* For intercomms only, this boolean is
-				    set for all members of one of the 
-				    two groups of processes and clear for 
-				    the other.  It enables certain
-				    intercommunicator collective operations
-				    that wish to use half-duplex operations
-				    to implement a full-duplex operation */
-    struct MPIR_Comm     *comm_next;/* Provides a chain through all active
-				       communicators */
-    struct MPIR_Collops  *coll_fns; /* Pointer to a table of functions
-                                              implementing the collective
-                                              routines */
-    struct MPIR_TopoOps  *topo_fns; /* Pointer to a table of functions
-				       implementting the topology routines */
-    int next_sched_tag;             /* used by the NBC schedule code to allocate tags */
-
-    int revoked;                    /* Flag to track whether the communicator
-                                     * has been revoked */
-    MPIR_Info *info;                /* Hints to the communicator */
-
-#ifdef MPID_HAS_HETERO
-    int is_hetero;
-#endif
-
-#if defined HAVE_LIBHCOLL
-    hcoll_comm_priv_t hcoll_priv;
-#endif /* HAVE_LIBHCOLL */
-
-    /* the mapper is temporarily filled out in order to allow the
-     * device to setup its network addresses.  it will be freed after
-     * the device has initialized the comm. */
-    MPIR_Comm_map_t *mapper_head;
-    MPIR_Comm_map_t *mapper_tail;
-
-  /* Other, device-specific information */
-#ifdef MPID_DEV_COMM_DECL
-    MPID_DEV_COMM_DECL
-#endif
-} MPIR_Comm;
-extern MPIU_Object_alloc_t MPIR_Comm_mem;
-
-typedef struct MPIR_Gpid {
-#ifdef MPID_DEV_GPID_DECL
-    MPID_DEV_GPID_DECL
-#else
-    int dummy;   /* don't create an empty structure */
-#endif
-}MPIR_Gpid;
-
-/* this function should not be called by normal code! */
-int MPIR_Comm_delete_internal(MPIR_Comm * comm_ptr);
-
-#define MPIR_Comm_add_ref(_comm) \
-    do { MPIU_Object_add_ref((_comm)); } while (0)
-#define MPIR_Comm_release_ref( _comm, _inuse ) \
-    do { MPIU_Object_release_ref( _comm, _inuse ); } while (0)
-
-
-/* Release a reference to a communicator.  If there are no pending
-   references, delete the communicator and recover all storage and
-   context ids.
-
-   This routine has been inlined because keeping it as a separate routine
-   results in a >5% performance hit for the SQMR benchmark.
-*/
-#undef FUNCNAME
-#define FUNCNAME MPIR_Comm_release
-#undef FCNAME
-#define FCNAME MPL_QUOTE(FUNCNAME)
-static inline int MPIR_Comm_release(MPIR_Comm * comm_ptr)
-{
-    int mpi_errno = MPI_SUCCESS;
-    int in_use;
-
-    MPIR_Comm_release_ref(comm_ptr, &in_use);
-    if (unlikely(!in_use)) {
-        /* the following routine should only be called by this function and its
-         * "_always" variant. */
-        mpi_errno = MPIR_Comm_delete_internal(comm_ptr);
-        /* not ERR_POPing here to permit simpler inlining.  Our caller will
-         * still report the error from the comm_delete level. */
-    }
-
-    return mpi_errno;
-}
-#undef FUNCNAME
-#undef FCNAME
-
-/* MPIR_Comm_release_always is the same as MPIR_Comm_release except it uses
-   MPIR_Comm_release_ref_always instead.
-*/
-int MPIR_Comm_release_always(MPIR_Comm *comm_ptr);
-
-/* applies the specified info chain to the specified communicator */
-int MPIR_Comm_apply_hints(MPIR_Comm *comm_ptr, MPIR_Info *info_ptr);
-
-/* Preallocated comm objects.  There are 3: comm_world, comm_self, and 
-   a private (non-user accessible) dup of comm world that is provided 
-   if needed in MPI_Finalize.  Having a separate version of comm_world
-   avoids possible interference with User code */
-#define MPIR_COMM_N_BUILTIN 3
-extern MPIR_Comm MPIR_Comm_builtin[MPIR_COMM_N_BUILTIN];
-extern MPIR_Comm MPIR_Comm_direct[];
-/* This is the handle for the internal MPI_COMM_WORLD .  The "2" at the end
-   of the handle is 3-1 (e.g., the index in the builtin array) */
-#define MPIR_ICOMM_WORLD  ((MPI_Comm)0x44000002)
-
-/* The following preprocessor macros provide bitfield access information for
- * context ID values.  They follow a uniform naming pattern:
- *
- * MPIR_CONTEXT_foo_WIDTH - the width in bits of the field
- * MPIR_CONTEXT_foo_MASK  - A valid bit mask for bit-wise AND and OR operations
- *                          with exactly all of the bits in the field set.
- * MPIR_CONTEXT_foo_SHIFT - The number of bits that the field should be shifted
- *                          rightwards to place it in the least significant bits
- *                          of the ID.  There may still be higher order bits
- *                          from other fields, so the _MASK should be used first
- *                          if you want to reliably retrieve the exact value of
- *                          the field.
- */
-
-/* yields an rvalue that is the value of the field_name_ in the least significant bits */
-#define MPIR_CONTEXT_READ_FIELD(field_name_,id_) \
-    (((id_) & MPIR_CONTEXT_##field_name_##_MASK) >> MPIR_CONTEXT_##field_name_##_SHIFT)
-/* yields an rvalue that is the old_id_ with field_name_ set to field_val_ */
-#define MPIR_CONTEXT_SET_FIELD(field_name_,old_id_,field_val_) \
-    ((old_id_ & ~MPIR_CONTEXT_##field_name_##_MASK) | ((field_val_) << MPIR_CONTEXT_##field_name_##_SHIFT))
-
-/* Context suffixes for separating pt2pt and collective communication */
-#define MPIR_CONTEXT_SUFFIX_WIDTH (1)
-#define MPIR_CONTEXT_SUFFIX_SHIFT (0)
-#define MPIR_CONTEXT_SUFFIX_MASK ((1 << MPIR_CONTEXT_SUFFIX_WIDTH) - 1)
-#define MPIR_CONTEXT_INTRA_PT2PT (0)
-#define MPIR_CONTEXT_INTRA_COLL  (1)
-#define MPIR_CONTEXT_INTER_PT2PT (0)
-#define MPIR_CONTEXT_INTER_COLL  (1)
-
-/* Used to derive context IDs for sub-communicators from a parent communicator's
-   context ID value.  This field comes after the one bit suffix.
-   values are shifted left by 1. */
-#define MPIR_CONTEXT_SUBCOMM_WIDTH (2)
-#define MPIR_CONTEXT_SUBCOMM_SHIFT (MPIR_CONTEXT_SUFFIX_WIDTH + MPIR_CONTEXT_SUFFIX_SHIFT)
-#define MPIR_CONTEXT_SUBCOMM_MASK      (((1 << MPIR_CONTEXT_SUBCOMM_WIDTH) - 1) << MPIR_CONTEXT_SUBCOMM_SHIFT)
-
-/* these values may be added/subtracted directly to/from an existing context ID
- * in order to determine the context ID of the child/parent */
-#define MPIR_CONTEXT_PARENT_OFFSET    (0 << MPIR_CONTEXT_SUBCOMM_SHIFT)
-#define MPIR_CONTEXT_INTRANODE_OFFSET (1 << MPIR_CONTEXT_SUBCOMM_SHIFT)
-#define MPIR_CONTEXT_INTERNODE_OFFSET (2 << MPIR_CONTEXT_SUBCOMM_SHIFT)
-
-/* this field (IS_LOCALCOM) is used to derive a context ID for local
- * communicators of intercommunicators without communication */
-#define MPIR_CONTEXT_IS_LOCALCOMM_WIDTH (1)
-#define MPIR_CONTEXT_IS_LOCALCOMM_SHIFT (MPIR_CONTEXT_SUBCOMM_SHIFT + MPIR_CONTEXT_SUBCOMM_WIDTH)
-#define MPIR_CONTEXT_IS_LOCALCOMM_MASK (((1 << MPIR_CONTEXT_IS_LOCALCOMM_WIDTH) - 1) << MPIR_CONTEXT_IS_LOCALCOMM_SHIFT)
-
-/* MPIR_MAX_CONTEXT_MASK is the number of ints that make up the bit vector that
- * describes the context ID prefix space.
- *
- * The following must hold:
- * (num_bits_in_vector) <= (maximum_context_id_prefix)
- *   which is the following in concrete terms:
- * MPIR_MAX_CONTEXT_MASK*MPIR_CONTEXT_INT_BITS <= 2**(MPIR_CONTEXT_ID_BITS - (MPIR_CONTEXT_PREFIX_SHIFT + MPIR_CONTEXT_DYNAMIC_PROC_WIDTH))
- *
- * We currently always assume MPIR_CONTEXT_INT_BITS is 32, regardless of the
- * value of sizeof(int)*CHAR_BITS.  We also make the assumption that CHAR_BITS==8.
- *
- * For a 16-bit context id field and CHAR_BITS==8, this implies MPIR_MAX_CONTEXT_MASK <= 256
- */
-
-/* number of bits to shift right by in order to obtain the context ID prefix */
-#define MPIR_CONTEXT_PREFIX_SHIFT (MPIR_CONTEXT_IS_LOCALCOMM_SHIFT + MPIR_CONTEXT_IS_LOCALCOMM_WIDTH)
-#define MPIR_CONTEXT_PREFIX_WIDTH (MPIR_CONTEXT_ID_BITS - (MPIR_CONTEXT_PREFIX_SHIFT + MPIR_CONTEXT_DYNAMIC_PROC_WIDTH))
-#define MPIR_CONTEXT_PREFIX_MASK (((1 << MPIR_CONTEXT_PREFIX_WIDTH) - 1) << MPIR_CONTEXT_PREFIX_SHIFT)
-
-#define MPIR_CONTEXT_DYNAMIC_PROC_WIDTH (1) /* the upper half is reserved for dynamic procs */
-#define MPIR_CONTEXT_DYNAMIC_PROC_SHIFT (MPIR_CONTEXT_ID_BITS - MPIR_CONTEXT_DYNAMIC_PROC_WIDTH) /* the upper half is reserved for dynamic procs */
-#define MPIR_CONTEXT_DYNAMIC_PROC_MASK (((1 << MPIR_CONTEXT_DYNAMIC_PROC_WIDTH) - 1) << MPIR_CONTEXT_DYNAMIC_PROC_SHIFT)
-
-/* should probably be (sizeof(int)*CHAR_BITS) once we make the code CHAR_BITS-clean */
-#define MPIR_CONTEXT_INT_BITS (32)
-#define MPIR_CONTEXT_ID_BITS (sizeof(MPIU_Context_id_t)*8) /* 8 --> CHAR_BITS eventually */
-#define MPIR_MAX_CONTEXT_MASK \
-    ((1 << (MPIR_CONTEXT_ID_BITS - (MPIR_CONTEXT_PREFIX_SHIFT + MPIR_CONTEXT_DYNAMIC_PROC_WIDTH))) / MPIR_CONTEXT_INT_BITS)
-
-/* Utility routines.  Where possible, these are kept in the source directory
-   with the other comm routines (src/mpi/comm, in mpicomm.h).  However,
-   to create a new communicator after a spawn or connect-accept operation, 
-   the device may need to create a new contextid */
-int MPIR_Get_contextid_sparse(MPIR_Comm *comm_ptr, MPIU_Context_id_t *context_id, int ignore_id);
-int MPIR_Get_contextid_sparse_group(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr, int tag, MPIU_Context_id_t *context_id, int ignore_id);
-void MPIR_Free_contextid( MPIU_Context_id_t );
-
-/* ------------------------------------------------------------------------- */
-
-/* Requests */
-/* This currently defines a single structure type for all requests.  
-   Eventually, we may want a union type, as used in MPICH-1 */
-/* NOTE-R1: MPIR_REQUEST_KIND__MPROBE signifies that this is a request created by
- * MPI_Mprobe or MPI_Improbe.  Since we use MPI_Request objects as our
- * MPI_Message objects, we use this separate kind in order to provide stronger
- * error checking.  Once a message (backed by a request) is promoted to a real
- * request by calling MPI_Mrecv/MPI_Imrecv, we actually modify the kind to be
- * MPIR_REQUEST_KIND__RECV in order to keep completion logic as simple as possible. */
-/*E
-  MPIR_Request_kind - Kinds of MPI Requests
-
-  Module:
-  Request-DS
-
-  E*/
-typedef enum MPIR_Request_kind_t {
-    MPIR_REQUEST_KIND__UNDEFINED,
-    MPIR_REQUEST_KIND__SEND,
-    MPIR_REQUEST_KIND__RECV,
-    MPIR_REQUEST_KIND__PREQUEST_SEND,
-    MPIR_REQUEST_KIND__PREQUEST_RECV,
-    MPIR_REQUEST_KIND__GREQUEST,
-    MPIR_REQUEST_KIND__COLL,
-    MPIR_REQUEST_KIND__MPROBE, /* see NOTE-R1 */
-    MPIR_REQUEST_KIND__RMA,
-    MPIR_REQUEST_KIND__LAST
-#ifdef MPID_REQUEST_KIND_DECL
-    , MPID_REQUEST_KIND_DECL
-#endif
-} MPIR_Request_kind_t;
-
-/* Typedefs for Fortran generalized requests */
-typedef void (MPIR_Grequest_f77_cancel_function)(void *, MPI_Fint*, MPI_Fint *); 
-typedef void (MPIR_Grequest_f77_free_function)(void *, MPI_Fint *); 
-typedef void (MPIR_Grequest_f77_query_function)(void *, MPI_Fint *, MPI_Fint *); 
-
-/* vtable-ish structure holding generalized request function pointers and other
- * state.  Saves ~48 bytes in pt2pt requests on many platforms. */
-struct MPIR_Grequest_fns {
-    MPI_Grequest_cancel_function *cancel_fn;
-    MPI_Grequest_free_function   *free_fn;
-    MPI_Grequest_query_function  *query_fn;
-    MPIX_Grequest_poll_function   *poll_fn;
-    MPIX_Grequest_wait_function   *wait_fn;
-    void             *grequest_extra_state;
-    MPIX_Grequest_class         greq_class;
-    MPIR_Lang_t                  greq_lang;         /* language that defined
-                                                       the generalize req */
-};
-
-#if defined (MPL_USE_DBG_LOGGING)
-extern MPL_dbg_class MPIR_DBG_INIT;
-extern MPL_dbg_class MPIR_DBG_PT2PT;
-extern MPL_dbg_class MPIR_DBG_THREAD;
-extern MPL_dbg_class MPIR_DBG_DATATYPE;
-extern MPL_dbg_class MPIR_DBG_COMM;
-extern MPL_dbg_class MPIR_DBG_BSEND;
-extern MPL_dbg_class MPIR_DBG_ERRHAND;
-extern MPL_dbg_class MPIR_DBG_OTHER;
-extern MPL_dbg_class MPIR_DBG_REQUEST;
-extern MPL_dbg_class MPIR_DBG_ASSERT;
-#endif /* MPL_USE_DBG_LOGGING */
-
-/* We use bits from the "count_lo" and "count_hi_and_cancelled" fields
- * to represent the 'count' and 'cancelled' objects.  The LSB of the
- * "count_hi_and_cancelled" field represents the 'cancelled' object.
- * The 'count' object is split between the "count_lo" and
- * "count_hi_and_cancelled" fields, with the lower order bits going
- * into the "count_lo" field, and the higher order bits goin into the
- * "count_hi_and_cancelled" field.  This gives us 2N-1 bits for the
- * 'count' object, where N is the size of int.  However, the value
- * returned to the user is bounded by the definition on MPI_Count. */
-/* NOTE: The below code assumes that the count value is never
- * negative.  For negative values, right-shifting can have weird
- * implementation specific consequences. */
-#define MPIR_STATUS_SET_COUNT(status_, count_)                          \
-    {                                                                   \
-        (status_).count_lo = ((int) count_);                            \
-        (status_).count_hi_and_cancelled &= 1;                          \
-        (status_).count_hi_and_cancelled |= (int) ((MPIR_Ucount) count_ >> (8 * SIZEOF_INT) << 1); \
-    }
-
-#define MPIR_STATUS_GET_COUNT(status_)                                  \
-    ((MPI_Count) ((((MPIR_Ucount) (((unsigned int) (status_).count_hi_and_cancelled) >> 1)) << (8 * SIZEOF_INT)) + (unsigned int) (status_).count_lo))
-
-#define MPIR_STATUS_SET_CANCEL_BIT(status_, cancelled_)	\
-    {                                                   \
-        (status_).count_hi_and_cancelled &= ~1;         \
-        (status_).count_hi_and_cancelled |= cancelled_; \
-    }
-
-#define MPIR_STATUS_GET_CANCEL_BIT(status_)	((status_).count_hi_and_cancelled & 1)
-
-/* Do not set MPI_ERROR (only set if ERR_IN_STATUS is returned */
-#define MPIR_Status_set_empty(status_)                          \
-    {                                                           \
-        if ((status_) != MPI_STATUS_IGNORE)                     \
-        {                                                       \
-            (status_)->MPI_SOURCE = MPI_ANY_SOURCE;             \
-            (status_)->MPI_TAG = MPI_ANY_TAG;                   \
-            MPIR_STATUS_SET_COUNT(*(status_), 0);               \
-            MPIR_STATUS_SET_CANCEL_BIT(*(status_), FALSE);      \
-        }                                                       \
-    }
-/* See MPI 1.1, section 3.11, Null Processes */
-/* Do not set MPI_ERROR (only set if ERR_IN_STATUS is returned */
-#define MPIR_Status_set_procnull(status_)                       \
-    {                                                           \
-        if ((status_) != MPI_STATUS_IGNORE)                     \
-        {                                                       \
-            (status_)->MPI_SOURCE = MPI_PROC_NULL;              \
-            (status_)->MPI_TAG = MPI_ANY_TAG;                   \
-            MPIR_STATUS_SET_COUNT(*(status_), 0);               \
-            MPIR_STATUS_SET_CANCEL_BIT(*(status_), FALSE);      \
-        }                                                       \
-    }
-
-#define MPIR_Request_extract_status(request_ptr_, status_)								\
-{															\
-    if ((status_) != MPI_STATUS_IGNORE)											\
-    {															\
-	int error__;													\
-															\
-	/* According to the MPI 1.1 standard page 22 lines 9-12, the MPI_ERROR field may not be modified except by the	\
-	   functions in section 3.7.5 which return MPI_ERR_IN_STATUSES (MPI_Wait{all,some} and MPI_Test{all,some}). */	\
-	error__ = (status_)->MPI_ERROR;											\
-	*(status_) = (request_ptr_)->status;										\
-	(status_)->MPI_ERROR = error__;											\
-    }															\
-}
-
-#define MPIR_Request_is_complete(req_) (MPIR_cc_is_complete((req_)->cc_ptr))
-
-/*S
-  MPIR_Request - Description of the Request data structure
-
-  Module:
-  Request-DS
-
-  Notes:
-  If it is necessary to remember the MPI datatype, this information is 
-  saved within the device-specific fields provided by 'MPID_DEV_REQUEST_DECL'.
-
-  Requests come in many flavors, as stored in the 'kind' field.  It is 
-  expected that each kind of request will have its own structure type 
-  (e.g., 'MPIR_Request_send_t') that extends the 'MPIR_Request'.
-  
-  S*/
-typedef struct MPIR_Request {
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
-
-    MPIR_Request_kind_t kind;
-
-    /* pointer to the completion counter.  This is necessary for the
-     * case when an operation is described by a list of requests */
-    MPIR_cc_t *cc_ptr;
-    /* the actual completion counter.  Ensure cc and status are in the
-     * same cache line, assuming the cache line size is a multiple of
-     * 32 bytes and 32-bit integers */
-    MPIR_cc_t cc;
-
-    /* A comm is needed to find the proper error handler */
-    MPIR_Comm *comm;
-    /* Status is needed for wait/test/recv */
-    MPI_Status status;
-
-    union {
-        struct {
-            struct MPIR_Grequest_fns *greq_fns;
-        } ureq; /* kind : MPIR_REQUEST_KIND__GREQUEST */
-        struct {
-            MPIR_Errflag_t errflag;
-        } nbc;  /* kind : MPIR_REQUEST_KIND__COLL */
-#if defined HAVE_DEBUGGER_SUPPORT
-        struct {
-            struct MPIR_Sendq *dbg_next;
-        } send; /* kind : MPID_REQUEST_SEND */
-#endif  /* HAVE_DEBUGGER_SUPPORT */
-        struct {
-            /* Persistent requests have their own "real" requests */
-            struct MPIR_Request *real_request;
-        } persist;  /* kind : MPID_PREQUEST_SEND or MPID_PREQUEST_RECV */
-    } u;
-
-    /* Notes about request_completed_cb:
-     *
-     *   1. The callback function is triggered when this requests
-     *      completion count reaches 0.
-     *
-     *   2. The callback function should be nonblocking.
-     *
-     *   3. The callback function should not poke the progress engine,
-     *      or call any function that pokes the progress engine.
-     *
-     *   4. The callback function can complete other requests, thus
-     *      calling those requests' callback functions.  However, the
-     *      recursion depth of request completion function is limited.
-     *      If we ever need deeper recurisve calls, we need to change
-     *      to an iterative design instead of a recursive design for
-     *      request completion.
-     *
-     *   5. In multithreaded programs, since the callback function is
-     *      nonblocking and never calls the progress engine, it would
-     *      never yield the lock to other threads.  So the recursion
-     *      should be multithreading-safe.
-     */
-    int (*request_completed_cb)(struct MPIR_Request *);
-
-    /* Other, device-specific information */
-#ifdef MPID_DEV_REQUEST_DECL
-    MPID_DEV_REQUEST_DECL
-#endif
-} MPIR_Request ATTRIBUTE((__aligned__(32)));
-
-#define MPIR_REQUEST_PREALLOC 8
-
-extern MPIU_Object_alloc_t MPIR_Request_mem;
-/* Preallocated request objects */
-extern MPIR_Request MPIR_Request_direct[];
-
-/*@
-  MPID_Request_init - Initialize device parts of request
-
-  Return value:
-  None
-  @*/
-void MPID_Request_init(MPIR_Request *);
-
-/*@
-  MPID_Request_finalize - Deallocate device parts of request
-
-  Input Parameter:
-. request - request to release
-
-  Module:
-  Request
-@*/
-void MPID_Request_finalize(MPIR_Request *);
-
-/*@
-  MPID_Request_complete - Complete a request
-
-  Input Parameter:
-. request - request to complete
-
-  Notes:
-  This routine is called to decrement the completion count of a
-  request object.  If the completion count of the request object has
-  reached zero, the reference count for the object will be
-  decremented.
-
-  Module:
-  Request
-@*/
-int MPID_Request_complete(MPIR_Request *);
-
-static inline MPIR_Request *MPIR_Request_create(MPIR_Request_kind_t kind)
-{
-    MPIR_Request *req;
-
-    req = MPIU_Handle_obj_alloc(&MPIR_Request_mem);
-    if (req != NULL) {
-	MPL_DBG_MSG_P(MPIR_DBG_REQUEST,VERBOSE,
-                      "allocated request, handle=0x%08x", req->handle);
-#ifdef MPICH_DBG_OUTPUT
-	/*MPIU_Assert(HANDLE_GET_MPI_KIND(req->handle) == MPIR_REQUEST);*/
-	if (HANDLE_GET_MPI_KIND(req->handle) != MPIR_REQUEST)
-	{
-	    int mpi_errno;
-	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
-                                             FCNAME, __LINE__, MPI_ERR_OTHER,
-                                             "**invalid_handle", "**invalid_handle %d", req->handle);
-	    MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
-	}
-#endif
-	/* FIXME: This makes request creation expensive.  We need to
-         * trim this to the basics, with additional setup for
-         * special-purpose requests (think base class and
-         * inheritance).  For example, do we really* want to set the
-         * kind to UNDEFINED? And should the RMA values be set only
-         * for RMA requests? */
-	MPIU_Object_set_ref(req, 1);
-	req->kind = kind;
-        MPIR_cc_set(&req->cc, 1);
-	req->cc_ptr		   = &req->cc;
-
-	req->status.MPI_ERROR	   = MPI_SUCCESS;
-        MPIR_STATUS_SET_CANCEL_BIT(req->status, FALSE);
-
-	req->comm		   = NULL;
-        req->request_completed_cb  = NULL;
-
-        switch(kind) {
-        case MPIR_REQUEST_KIND__COLL:
-            req->u.nbc.errflag = MPIR_ERR_NONE;
-            break;
-        default:
-            break;
-        }
-
-        MPID_Request_init(req);
-    }
-    else
-    {
-	/* FIXME: This fails to fail if debugging is turned off */
-	MPL_DBG_MSG(MPIR_DBG_REQUEST,TYPICAL,"unable to allocate a request");
-    }
-
-    return req;
-}
-
-#define MPIR_Request_add_ref( _req ) \
-    do { MPIU_Object_add_ref( _req ); } while (0)
-
-#define MPIR_Request_release_ref( _req, _inuse ) \
-    do { MPIU_Object_release_ref( _req, _inuse ); } while (0)
-
-static inline void MPIR_Request_free(MPIR_Request *req)
-{
-    int inuse;
-
-    MPIR_Request_release_ref(req, &inuse);
-    if (inuse == 0) {
-        MPL_DBG_MSG_P(MPIR_DBG_REQUEST,VERBOSE,
-                       "freeing request, handle=0x%08x", req->handle);
-
-#ifdef MPICH_DBG_OUTPUT
-        if (HANDLE_GET_MPI_KIND(req->handle) != MPIR_REQUEST)
-        {
-            int mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
-                                                 FCNAME, __LINE__, MPI_ERR_OTHER,
-                                                 "**invalid_handle", "**invalid_handle %d", req->handle);
-            MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
-        }
-
-        if (req->ref_count != 0)
-        {
-            int mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
-                                                 FCNAME, __LINE__, MPI_ERR_OTHER,
-                                                 "**invalid_refcount", "**invalid_refcount %d", req->ref_count);
-            MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
-        }
-#endif
-
-        /* FIXME: We need a better way to handle these so that we do
-         * not always need to initialize these fields and check them
-         * when we destroy a request */
-        /* FIXME: We need a way to call these routines ONLY when the
-         * related ref count has become zero. */
-        if (req->comm != NULL) {
-            MPIR_Comm_release(req->comm);
-        }
-
-        if (req->kind == MPIR_REQUEST_KIND__GREQUEST && req->u.ureq.greq_fns != NULL) {
-            MPL_free(req->u.ureq.greq_fns);
-        }
-
-        MPID_Request_finalize(req);
-
-        MPIU_Handle_obj_free(&MPIR_Request_mem, req);
-    }
-}
-
-/* These macros allow us to implement a sendq when debugger support is
-   selected.  As there is extra overhead for this, we only do this
-   when specifically requested 
-*/
-#ifdef HAVE_DEBUGGER_SUPPORT
-void MPIR_WaitForDebugger( void );
-void MPIR_DebuggerSetAborting( const char * );
-void MPIR_Sendq_remember(MPIR_Request *, int, int, int );
-void MPIR_Sendq_forget(MPIR_Request *);
-void MPIR_CommL_remember( MPIR_Comm * );
-void MPIR_CommL_forget( MPIR_Comm * );
-
-#define MPIR_SENDQ_REMEMBER(_a,_b,_c,_d) MPIR_Sendq_remember(_a,_b,_c,_d)
-#define MPIR_SENDQ_FORGET(_a) MPIR_Sendq_forget(_a)
-#define MPIR_COMML_REMEMBER(_a) MPIR_CommL_remember( _a )
-#define MPIR_COMML_FORGET(_a) MPIR_CommL_forget( _a )
-#else
-#define MPIR_SENDQ_REMEMBER(a,b,c,d)
-#define MPIR_SENDQ_FORGET(a)
-#define MPIR_COMML_REMEMBER(_a) 
-#define MPIR_COMML_FORGET(_a) 
-#endif
-
-/* must come after MPIR_Comm is declared/defined */
-int MPIR_Get_contextid_nonblock(MPIR_Comm *comm_ptr, MPIR_Comm *newcommp, MPIR_Request **req);
-int MPIR_Get_intercomm_contextid_nonblock(MPIR_Comm *comm_ptr, MPIR_Comm *newcommp, MPIR_Request **req);
-
-/* ------------------------------------------------------------------------- */
-/* Prototypes and definitions for the node ID code.  This is used to support
-   hierarchical collectives in a (mostly) device-independent way. */
-#if defined(MPID_USE_NODE_IDS)
-/* MPID_Node_id_t is a signed integer type defined by the device in mpidpre.h. */
-int MPID_Get_node_id(MPIR_Comm *comm, int rank, MPID_Node_id_t *id_p);
-int MPID_Get_max_node_id(MPIR_Comm *comm, MPID_Node_id_t *max_id_p);
-#endif
-
-/* ------------------------------------------------------------------------- */
-
-/* ------------------------------------------------------------------------- */
-/* end of mpirma.h (in src/mpi/rma?) */
-/* ------------------------------------------------------------------------- */
-
-/*S
-  MPIR_Win - Description of the Window Object data structure.
-
-  Module:
-  Win-DS
-
-  Notes:
-  The following 3 keyvals are defined for attributes on all MPI 
-  Window objects\:
-.vb
- MPI_WIN_SIZE
- MPI_WIN_BASE
- MPI_WIN_DISP_UNIT
-.ve
-  These correspond to the values in 'length', 'start_address', and 
-  'disp_unit'.
-
-  The communicator in the window is the same communicator that the user
-  provided to 'MPI_Win_create' (not a dup).  However, each intracommunicator
-  has a special context id that may be used if MPI communication is used 
-  by the implementation to implement the RMA operations.
-
-  There is no separate window group; the group of the communicator should be
-  used.
-
-  Question:
-  Should a 'MPID_Win' be defined after 'MPID_Segment' in case the device 
-  wants to 
-  store a queue of pending put/get operations, described with 'MPID_Segment'
-  (or 'MPIR_Request')s?
-
-  S*/
-typedef struct MPIR_Win {
-    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
-    MPID_Thread_mutex_t mutex;
-    MPIR_Errhandler *errhandler;  /* Pointer to the error handler structure */
-    void *base;
-    MPI_Aint    size;        
-    int          disp_unit;      /* Displacement unit of *local* window */
-    MPIR_Attribute *attributes;
-    MPIR_Comm *comm_ptr;         /* Pointer to comm of window (dup) */
-#ifdef USE_THREADED_WINDOW_CODE
-    /* These were causing compilation errors.  We need to figure out how to
-       integrate threads into MPICH before including these fields. */
-    /* FIXME: The test here should be within a test for threaded support */
-#ifdef HAVE_PTHREAD_H
-    pthread_t wait_thread_id; /* id of thread handling MPI_Win_wait */
-    pthread_t passive_target_thread_id; /* thread for passive target RMA */
-#elif defined(HAVE_WINTHREADS)
-    HANDLE wait_thread_id;
-    HANDLE passive_target_thread_id;
-#endif
-#endif
-    /* These are COPIES of the values so that addresses to them
-       can be returned as attributes.  They are initialized by the
-       MPI_Win_get_attr function.
-     
-       These values are constant for the lifetime of the window, so
-       this is thread-safe.
-     */
-    int  copyDispUnit;
-    MPI_Aint copySize;
-    
-    char          name[MPI_MAX_OBJECT_NAME];  
-
-    MPIR_Win_flavor_t create_flavor;
-    MPIR_Win_model_t  model;
-    MPIR_Win_flavor_t copyCreateFlavor;
-    MPIR_Win_model_t  copyModel;
-
-  /* Other, device-specific information */
-#ifdef MPID_DEV_WIN_DECL
-    MPID_DEV_WIN_DECL
-#endif
-} MPIR_Win;
-extern MPIU_Object_alloc_t MPIR_Win_mem;
-/* Preallocated win objects */
-extern MPIR_Win MPIR_Win_direct[];
-
-/* ------------------------------------------------------------------------- */
-/* also in mpirma.h ?*/
-/* ------------------------------------------------------------------------- */
-
-/*
- * Good Memory (may be required for passive target operations on MPI_Win)
- */
-
-/*@
-  MPID_Alloc_mem - Allocate memory suitable for passive target RMA operations
-
-  Input Parameter:
-+ size - Number of types to allocate.
-- info - Info object
-
-  Return value:
-  Pointer to the allocated memory.  If the memory is not available, 
-  returns null.
-
-  Notes:
-  This routine is used to implement 'MPI_Alloc_mem'.  It is for that reason
-  that there is no communicator argument.  
-
-  This memory may `only` be freed with 'MPID_Free_mem'.
-
-  This is a `local`, not a collective operation.  It functions more like a
-  good form of 'malloc' than collective shared-memory allocators such as
-  the 'shmalloc' found on SGI systems.
-
-  Implementations of this routine may wish to use 'MPID_Memory_register'.  
-  However, this routine has slighly different requirements, so a separate
-  entry point is provided.
-
-  Question:
-  Since this takes an info object, should there be an error routine in the 
-  case that the info object contains an error?
-
-  Module:
-  Win
-  @*/
-void *MPID_Alloc_mem( size_t size, MPIR_Info *info );
-
-/*@
-  MPID_Free_mem - Frees memory allocated with 'MPID_Alloc_mem'
-
-  Input Parameter:
-. ptr - Pointer to memory allocated by 'MPID_Alloc_mem'.
-
-  Return value:
-  'MPI_SUCCESS' if memory was successfully freed; an MPI error code otherwise.
-
-  Notes:
-  The return value is provided because it may not be easy to validate the
-  value of 'ptr' without attempting to free the memory.
-
-  Module:
-  Win
-  @*/
-int MPID_Free_mem( void *ptr );
-
-/* ------------------------------------------------------------------------- */
-/* end of also in mpirma.h ? */
-/* ------------------------------------------------------------------------- */
-
-/* ------------------------------------------------------------------------- */
-/* Reduction and accumulate operations */
-/*E
-  MPIR_Op_kind - Enumerates types of MPI_Op types
-
-  Notes:
-  These are needed for implementing 'MPI_Accumulate', since only predefined
-  operations are allowed for that operation.  
-
-  A gap in the enum values was made allow additional predefined operations
-  to be inserted.  This might include future additions to MPI or experimental
-  extensions (such as a Read-Modify-Write operation).
-
-  Module:
-  Collective-DS
-  E*/
-typedef enum MPIR_Op_kind {
-    MPIR_OP_KIND__NULL=0,
-    MPIR_OP_KIND__MAX=1,
-    MPIR_OP_KIND__MIN=2,
-    MPIR_OP_KIND__SUM=3,
-    MPIR_OP_KIND__PROD=4,
-    MPIR_OP_KIND__LAND=5,
-    MPIR_OP_KIND__BAND=6,
-    MPIR_OP_KIND__LOR=7,
-    MPIR_OP_KIND__BOR=8,
-    MPIR_OP_KIND__LXOR=9,
-    MPIR_OP_KIND__BXOR=10,
-    MPIR_OP_KIND__MAXLOC=11,
-    MPIR_OP_KIND__MINLOC=12,
-    MPIR_OP_KIND__REPLACE=13,
-    MPIR_OP_KIND__NO_OP=14,
-    MPIR_OP_KIND__USER_NONCOMMUTE=32,
-    MPIR_OP_KIND__USER=33
-} MPIR_Op_kind;
-
-/*S
-  MPIR_User_function - Definition of a user function for MPI_Op types.
-
-  Notes:
-  This includes a 'const' to make clear which is the 'in' argument and 
-  which the 'inout' argument, and to indicate that the 'count' and 'datatype'
-  arguments are unchanged (they are addresses in an attempt to allow 
-  interoperation with Fortran).  It includes 'restrict' to emphasize that 
-  no overlapping operations are allowed.
-
-  We need to include a Fortran version, since those arguments will
-  have type 'MPI_Fint *' instead.  We also need to add a test to the
-  test suite for this case; in fact, we need tests for each of the handle
-  types to ensure that the transfered handle works correctly.
-
-  This is part of the collective module because user-defined operations
-  are valid only for the collective computation routines and not for 
-  RMA accumulate.
-
-  Yes, the 'restrict' is in the correct location.  C compilers that 
-  support 'restrict' should be able to generate code that is as good as a
-  Fortran compiler would for these functions.
-
-  We should note on the manual pages for user-defined operations that
-  'restrict' should be used when available, and that a cast may be 
-  required when passing such a function to 'MPI_Op_create'.
-
-  Question:
-  Should each of these function types have an associated typedef?
-
-  Should there be a C++ function here?
-
-  Module:
-  Collective-DS
-  S*/
-typedef union MPIR_User_function {
-    void (*c_function) ( const void *, void *, 
-			 const int *, const MPI_Datatype * ); 
-    void (*f77_function) ( const void *, void *,
-			  const MPI_Fint *, const MPI_Fint * );
-} MPIR_User_function;
-/* FIXME: Should there be "restrict" in the definitions above, e.g., 
-   (*c_function)( const void restrict * , void restrict *, ... )? */
-
-/*S
-  MPIR_Op - MPI_Op structure
-
-  Notes:
-  All of the predefined functions are commutative.  Only user functions may 
-  be noncummutative, so there are two separate op types for commutative and
-  non-commutative user-defined operations.
-
-  Operations do not require reference counts because there are no nonblocking
-  operations that accept user-defined operations.  Thus, there is no way that
-  a valid program can free an 'MPI_Op' while it is in use.
-
-  Module:
-  Collective-DS
-  S*/
-typedef struct MPIR_Op {
-     MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
-     MPIR_Op_kind       kind;
-     MPIR_Lang_t        language;
-     MPIR_User_function function;
-  } MPIR_Op;
-#define MPIR_OP_N_BUILTIN 15
-extern MPIR_Op MPIR_Op_builtin[MPIR_OP_N_BUILTIN];
-extern MPIR_Op MPIR_Op_direct[];
-extern MPIU_Object_alloc_t MPIR_Op_mem;
-
-#define MPIR_Op_add_ref(_op) \
-    do { MPIU_Object_add_ref(_op); } while (0)
-#define MPIR_Op_release_ref( _op, _inuse ) \
-    do { MPIU_Object_release_ref( _op, _inuse ); } while (0)
-
-/* release and free-if-not-in-use helper */
-#define MPIR_Op_release(op_p_)                           \
-    do {                                                 \
-        int in_use_;                                     \
-        MPIR_Op_release_ref((op_p_), &in_use_);          \
-        if (!in_use_) {                                  \
-            MPIU_Handle_obj_free(&MPIR_Op_mem, (op_p_)); \
-        }                                                \
-    } while (0)
+ *   6. Finally, we'll add the device "post" header that is allowed to
+ *      use anything from the MPIR layer.
+ *****************************************************************************/
 
-/* ------------------------------------------------------------------------- */
 
-/* ------------------------------------------------------------------------- */
-/* mpicoll.h (in src/mpi/coll?) */
-/* ------------------------------------------------------------------------- */
+/*****************************************************************************/
+/*********************** PART 1: INDEPENDENT HEADERS *************************/
+/*****************************************************************************/
 
-/* Collective operations */
-typedef struct MPIR_Collops {
-    int ref_count;   /* Supports lazy copies */
-    /* Contains pointers to the functions for the MPI collectives */
-    int (*Barrier) (MPIR_Comm *, MPIR_Errflag_t *);
-    int (*Bcast) (void*, int, MPI_Datatype, int, MPIR_Comm *, MPIR_Errflag_t *);
-    int (*Gather) (const void*, int, MPI_Datatype, void*, int, MPI_Datatype,
-                   int, MPIR_Comm *, MPIR_Errflag_t *);
-    int (*Gatherv) (const void*, int, MPI_Datatype, void*, const int *, const int *,
-                    MPI_Datatype, int, MPIR_Comm *, MPIR_Errflag_t *);
-    int (*Scatter) (const void*, int, MPI_Datatype, void*, int, MPI_Datatype,
-                    int, MPIR_Comm *, MPIR_Errflag_t *);
-    int (*Scatterv) (const void*, const int *, const int *, MPI_Datatype,
-                     void*, int, MPI_Datatype, int, MPIR_Comm *, MPIR_Errflag_t *);
-    int (*Allgather) (const void*, int, MPI_Datatype, void*, int,
-                      MPI_Datatype, MPIR_Comm *, MPIR_Errflag_t *);
-    int (*Allgatherv) (const void*, int, MPI_Datatype, void*, const int *,
-                       const int *, MPI_Datatype, MPIR_Comm *, MPIR_Errflag_t *);
-    int (*Alltoall) (const void*, int, MPI_Datatype, void*, int, MPI_Datatype,
-                               MPIR_Comm *, MPIR_Errflag_t *);
-    int (*Alltoallv) (const void*, const int *, const int *, MPI_Datatype,
-                      void*, const int *, const int *, MPI_Datatype, MPIR_Comm *,
-                      MPIR_Errflag_t *);
-    int (*Alltoallw) (const void*, const int *, const int *, const MPI_Datatype *, void*,
-                      const int *, const int *, const MPI_Datatype *, MPIR_Comm *, MPIR_Errflag_t *);
-    int (*Reduce) (const void*, void*, int, MPI_Datatype, MPI_Op, int,
-                   MPIR_Comm *, MPIR_Errflag_t *);
-    int (*Allreduce) (const void*, void*, int, MPI_Datatype, MPI_Op,
-                      MPIR_Comm *, MPIR_Errflag_t *);
-    int (*Reduce_scatter) (const void*, void*, const int *, MPI_Datatype, MPI_Op,
-                           MPIR_Comm *, MPIR_Errflag_t *);
-    int (*Scan) (const void*, void*, int, MPI_Datatype, MPI_Op, MPIR_Comm *, MPIR_Errflag_t * );
-    int (*Exscan) (const void*, void*, int, MPI_Datatype, MPI_Op, MPIR_Comm *, MPIR_Errflag_t * );
-    int (*Reduce_scatter_block) (const void*, void*, int, MPI_Datatype, MPI_Op,
-                           MPIR_Comm *, MPIR_Errflag_t *);
-
-    /* MPI-3 nonblocking collectives */
-    int (*Ibarrier_sched)(MPIR_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Ibcast_sched)(void *buffer, int count, MPI_Datatype datatype, int root,
-                  MPIR_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Igather_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
-                   int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr,
-                   MPID_Sched_t s);
-    int (*Igatherv_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
-                    const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root,
-                    MPIR_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Iscatter_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
-                    int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr,
-                    MPID_Sched_t s);
-    int (*Iscatterv_sched)(const void *sendbuf, const int *sendcounts, const int *displs,
-                     MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                     int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Iallgather_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
-                      int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
-                      MPID_Sched_t s);
-    int (*Iallgatherv_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
-                       const int *recvcounts, const int *displs, MPI_Datatype recvtype,
-                       MPIR_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Ialltoall_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
-                     int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
-                     MPID_Sched_t s);
-    int (*Ialltoallv_sched)(const void *sendbuf, const int *sendcounts, const int *sdispls,
-                      MPI_Datatype sendtype, void *recvbuf, const int *recvcounts,
-                      const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
-                      MPID_Sched_t s);
-    int (*Ialltoallw_sched)(const void *sendbuf, const int *sendcounts, const int *sdispls,
-                      const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts,
-                      const int *rdispls, const MPI_Datatype *recvtypes,
-                      MPIR_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Ireduce_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
-                   int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Iallreduce_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                      MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Ireduce_scatter_sched)(const void *sendbuf, void *recvbuf, const int *recvcounts,
-                           MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Ireduce_scatter_block_sched)(const void *sendbuf, void *recvbuf, int recvcount,
-                                 MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr,
-                                 MPID_Sched_t s);
-    int (*Iscan_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
-                 MPIR_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Iexscan_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
-                   MPIR_Comm *comm_ptr, MPID_Sched_t s);
-
-    struct MPIR_Collops *prev_coll_fns; /* when overriding this table, set this to point to the old table */
-
-    /* MPI-3 neighborhood collectives (blocking & nonblocking) */
-    int (*Neighbor_allgather)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                              void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                              MPIR_Comm *comm_ptr);
-    int (*Neighbor_allgatherv)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                               void *recvbuf, const int recvcounts[], const int displs[],
-                               MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
-    int (*Neighbor_alltoall)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                             void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                             MPIR_Comm *comm_ptr);
-    int (*Neighbor_alltoallv)(const void *sendbuf, const int sendcounts[], const int sdispls[],
-                              MPI_Datatype sendtype, void *recvbuf, const int recvcounts[],
-                              const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
-    int (*Neighbor_alltoallw)(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[],
-                              const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[],
-                              const MPI_Aint rdispls[], const MPI_Datatype recvtypes[],
-                              MPIR_Comm *comm_ptr);
-    int (*Ineighbor_allgather)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                               void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                               MPIR_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Ineighbor_allgatherv)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                                void *recvbuf, const int recvcounts[], const int displs[],
-                                MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Ineighbor_alltoall)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                              void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                              MPIR_Comm *comm_ptr, MPID_Sched_t s);
-    int (*Ineighbor_alltoallv)(const void *sendbuf, const int sendcounts[], const int sdispls[],
-                               MPI_Datatype sendtype, void *recvbuf, const int recvcounts[],
-                               const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
-                               MPID_Sched_t s);
-    int (*Ineighbor_alltoallw)(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[],
-                               const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[],
-                               const MPI_Aint rdispls[], const MPI_Datatype recvtypes[],
-                               MPIR_Comm *comm_ptr, MPID_Sched_t s);
-} MPIR_Collops;
-
-/* ------------------------------------------------------------------------- */
-/* end of mpicoll.h (in src/mpi/coll? */
-/* ------------------------------------------------------------------------- */
-
-/* ------------------------------------------------------------------------- */
-/* mpitopo.h (in src/mpi/topo? */
-/*
- * The following struture allows the device detailed control over the 
- * functions that are used to implement the topology routines.  If either
- * the pointer to this structure is null or any individual entry is null,
- * the default function is used (this follows exactly the same rules as the
- * collective operations, provided in the MPIR_Collops structure).
- */
-/* ------------------------------------------------------------------------- */
-
-typedef struct MPIR_TopoOps {
-    int (*cartCreate)( const MPIR_Comm *, int, const int[], const int [],
-		       int, MPI_Comm * );
-    int (*cartMap)   ( const MPIR_Comm *, int, const int[], const int [],
-		       int * );
-    int (*graphCreate)( const MPIR_Comm *, int, const int[], const int [],
-			int, MPI_Comm * );
-    int (*graphMap)   ( const MPIR_Comm *, int, const int[], const int[],
-			int * );
-} MPIR_TopoOps;
-/* ------------------------------------------------------------------------- */
-/* end of mpitopo.h (in src/mpi/topo? */
-/* ------------------------------------------------------------------------- */
-
-
-typedef struct MPIR_Commops {
-    int (*split_type)(MPIR_Comm *, int, int, MPIR_Info *, MPIR_Comm **);
-} MPIR_Commops;
-extern struct MPIR_Commops  *MPIR_Comm_fns; /* Communicator creation functions */
-
-
-/* Per process data */
-typedef enum MPIR_MPI_State_t {
-    MPICH_MPI_STATE__PRE_INIT=0,
-    MPICH_MPI_STATE__IN_INIT,
-    MPICH_MPI_STATE__POST_INIT,
-    MPICH_MPI_STATE__POST_FINALIZED
-} MPIR_MPI_State_t;
-
-typedef struct PreDefined_attrs {
-    int appnum;          /* Application number provided by mpiexec (MPI-2) */
-    int host;            /* host */
-    int io;              /* standard io allowed */
-    int lastusedcode;    /* last used error code (MPI-2) */
-    int tag_ub;          /* Maximum message tag */
-    int universe;        /* Universe size from mpiexec (MPI-2) */
-    int wtime_is_global; /* Wtime is global over processes in COMM_WORLD */
-} PreDefined_attrs;
-
-struct MPID_Datatype;
-
-typedef struct MPICH_PerProcess_t {
-    OPA_int_t mpich_state; /* State of MPICH. Use OPA_int_t to make MPI_Initialized() etc.
-                              thread-safe per MPI-3.1.  See MPI-Forum ticket 357 */
-    int               do_error_checks;  /* runtime error check control */
-    struct MPIR_Comm  *comm_world;      /* Easy access to comm_world for
-                                           error handler */
-    struct MPIR_Comm  *comm_self;       /* Easy access to comm_self */
-    struct MPIR_Comm  *comm_parent;     /* Easy access to comm_parent */
-    struct MPIR_Comm  *icomm_world;     /* An internal version of comm_world
-					   that is separate from user's 
-					   versions */
-    PreDefined_attrs  attrs;            /* Predefined attribute values */
-    int               tagged_coll_mask; /* Tag space mask for tagged collectives */
-
-    /* The topology routines dimsCreate is independent of any communicator.
-       If this pointer is null, the default routine is used */
-    int (*dimsCreate)( int, int, int *);
-
-    /* Attribute dup functions.  Here for lazy initialization */
-    int (*attr_dup)( int, MPIR_Attribute *, MPIR_Attribute ** );
-    int (*attr_free)( int, MPIR_Attribute ** );
-    /* There is no win_attr_dup function because there can be no MPI_Win_dup
-       function */
-    /* Routine to get the messages corresponding to dynamically created
-       error messages */
-    const char *(*errcode_to_string)( int );
-#ifdef HAVE_CXX_BINDING
-    /* Routines to call C++ functions from the C implementation of the
-       MPI reduction and attribute routines */
-    void (*cxx_call_op_fn)(const void *, void *, int, MPI_Datatype,
-			    MPI_User_function * );
-    /* Error handling functions.  As for the attribute functions,
-       we pass the integer file/comm/win, the address of the error code, 
-       and the C function to call (itself a function defined by the
-       C++ interface and exported to C).  The first argument is used
-       to specify the kind (comm,file,win) */
-    void  (*cxx_call_errfn) ( int, int *, int *, void (*)(void) );
-#endif /* HAVE_CXX_BINDING */
-} MPICH_PerProcess_t;
-extern MPICH_PerProcess_t MPIR_Process;
-
-/* ------------------------------------------------------------------------- */
-/* In MPICH, each function has an "enter" and "exit" macro.  These can be 
- * used to add various features to each function at compile time, or they
- * can be set to empty to provide the fastest possible production version.
- *
- * There are at this time three choices of features (beyond the empty choice)
- * 1. timing (controlled by macros in mpitimerimpl.h)
- *    These collect data on when each function began and finished; the
- *    resulting data can be displayed using special programs
- * 2. Debug logging (selected with --enable-g=log)
- *    Invokes MPL_DBG_MSG at the entry and exit for each routine
- * 3. Additional memory validation of the memory arena (--enable-g=memarena)
- */
-/* ------------------------------------------------------------------------- */
-/* allow the timing module the opportunity to define the macros */
-#include "mpifunc.h"
-#if !defined(NEEDS_FUNC_ENTER_EXIT_DEFS)
-    /* If no timing choice is selected, this sets the entry/exit macros 
-       to empty */
-#   include "mpitimerimpl.h"
+/* if we are defining this, we must define it before including mpl.h */
+#if defined(MPICH_DEBUG_MEMINIT)
+#define MPL_VG_ENABLED 1
 #endif
 
-#ifdef NEEDS_FUNC_ENTER_EXIT_DEFS
-/* mpich layer definitions */
-#define MPID_MPI_FUNC_ENTER(a)			MPIR_FUNC_ENTER(a)
-#define MPID_MPI_FUNC_EXIT(a)			MPIR_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_FRONT(a)	MPIR_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_FRONT(a)	MPIR_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_BACK(a)	MPIR_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_BOTH(a)	MPIR_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BACK(a)	MPIR_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BOTH(a)	MPIR_FUNC_EXIT(a)
-#define MPID_MPI_COLL_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
-#define MPID_MPI_COLL_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
-#define MPID_MPI_RMA_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
-#define MPID_MPI_RMA_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
-#define MPID_MPI_INIT_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
-#define MPID_MPI_INIT_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
-#define MPID_MPI_FINALIZE_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
-#define MPID_MPI_FINALIZE_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
-
-/* device layer definitions */
-#define MPIDI_FUNC_ENTER(a)			MPIR_FUNC_ENTER(a)
-#define MPIDI_FUNC_EXIT(a)			MPIR_FUNC_EXIT(a)
-#define MPIDI_PT2PT_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
-#define MPIDI_RMA_FUNC_ENTER(a)			MPIR_FUNC_ENTER(a)
-#define MPIDI_RMA_FUNC_EXIT(a)			MPIR_FUNC_EXIT(a)
+#include "mpl.h"
+#include "opa_primitives.h"
+#include "mpi.h"
 
-/* evaporate the timing macros since timing is not selected */
-#define MPIU_Timer_init(rank, size)
-#define MPIU_Timer_finalize()
-#endif /* NEEDS_FUNC_ENTER_EXIT_DEFS */
 
-/* Definitions for error handling and reporting */
-#include "mpierror.h"
-#include "mpierrs.h"
+/*****************************************************************************/
+/*********************** PART 2: FORWARD DECLARATION *************************/
+/*****************************************************************************/
 
-/* ------------------------------------------------------------------------- */
+struct MPIR_Request;
+typedef struct MPIR_Request MPIR_Request;
 
-/* FIXME: Move these to the communicator block; make sure that all 
-   objects have such hooks */
-#ifndef HAVE_DEV_COMM_HOOK
-#define MPID_Dev_comm_create_hook( a ) MPI_SUCCESS
-#define MPID_Dev_comm_destroy_hook( a ) MPI_SUCCESS
-#endif
+struct MPIR_Comm;
+typedef struct MPIR_Comm MPIR_Comm;
 
-/* ------------------------------------------------------------------------- */
-/* FIXME: What is the scope of these functions?  Can they be moved into
-   src/mpi/pt2pt? */
-/* ------------------------------------------------------------------------- */
+struct MPIR_Datatype;
+/* FIXME: waiting for Wes' patch revert to fix this */
+/* typedef struct MPIR_Datatype MPIR_Datatype; */
 
+struct MPIR_Win;
+typedef struct MPIR_Win MPIR_Win;
 
-/* ------------------------------------------------------------------------- */
+struct MPIR_Info;
+typedef struct MPIR_Info MPIR_Info;
 
-/* FIXME: The bindings should be divided into three groups:
-   1. ADI3 routines.  These should have structure comment documentation, e.g., 
-   the text from doc/adi3/adi3.c
-   2. General utility routines.  These should have a short description
-   3. Local utility routines, e.g., routines used within a single subdirectory.
-   These should be moved into an include file in that subdirectory 
-*/
-/* Bindings for internal routines */
-/*@ MPIR_Add_finalize - Add a routine to be called when MPI_Finalize is invoked
+struct MPIR_Group;
+typedef struct MPIR_Group MPIR_Group;
 
-+ routine - Routine to call
-. extra   - Void pointer to data to pass to the routine
-- priority - Indicates the priority of this callback and controls the order
-  in which callbacks are executed.  Use a priority of zero for most handlers;
-  higher priorities will be executed first.
+struct MPIR_Topology;
+typedef struct MPIR_Topology MPIR_Topology;
 
-Notes:
-  The routine 'MPID_Finalize' is executed with priority 
-  'MPIR_FINALIZE_CALLBACK_PRIO' (currently defined as 5).  Handlers with
-  a higher priority execute before 'MPID_Finalize' is called; those with
-  a lower priority after 'MPID_Finalize' is called.  
-@*/
-void MPIR_Add_finalize( int (*routine)( void * ), void *extra, int priority );
 
-#define MPIR_FINALIZE_CALLBACK_PRIO 5
-#define MPIR_FINALIZE_CALLBACK_HANDLE_CHECK_PRIO 1
-#define MPIR_FINALIZE_CALLBACK_DEFAULT_PRIO 0
-#define MPIR_FINALIZE_CALLBACK_MAX_PRIO 10
+/*****************************************************************************/
+/******************* PART 3: DEVICE INDEPENDENT HEADERS **********************/
+/*****************************************************************************/
 
-/*int MPIR_Comm_attr_dup(MPIR_Comm *, MPIR_Attribute **);
-  int MPIR_Comm_attr_delete(MPIR_Comm *, MPIR_Attribute *);*/
-int MPIR_Comm_copy( MPIR_Comm *, int, MPIR_Comm ** );
-int MPIR_Comm_copy_data(MPIR_Comm *comm_ptr, MPIR_Comm **outcomm_ptr);
+#include "mpir_misc.h"
+#include "mpir_dbg.h"
+#include "mpir_objects.h"
+#include "mpir_strerror.h"
+#include "mpir_type_defs.h"
+#include "mpir_assert.h"
+#include "mpir_pointers.h"
+#include "mpir_refcount.h"
+#include "mpir_mem.h"
+#include "mpir_info.h"
+#include "mpir_errhandler.h"
+#include "mpir_attr_generic.h"
+#include "mpir_contextid.h"
+#include "mpir_status.h"
+#include "mpir_debugger.h"
+#include "mpir_op.h"
+#include "mpir_topo.h"
+#include "mpir_tags.h"
+#include "mpir_pt2pt.h"
+#include "mpir_ext.h"
 
-/* Fortran keyvals are set with functions in mpi_f77interface.h */
 #ifdef HAVE_CXX_BINDING
-extern void MPIR_Keyval_set_cxx( int, void (*)(void), void (*)(void) );
-extern void MPIR_Op_set_cxx( MPI_Op, void (*)(void) );
-extern void MPIR_Errhandler_set_cxx( MPI_Errhandler, void (*)(void) );
+#include "mpir_cxxinterface.h"
 #endif
 
-int MPIR_Group_create( int, MPIR_Group ** );
-int MPIR_Group_release(MPIR_Group *group_ptr);
-
-/* marks a request as complete, extracting the status */
-int MPIR_Request_complete(MPI_Request *, MPIR_Request *, MPI_Status *, int *);
-
-int MPIR_Request_get_error(MPIR_Request *);
-/* run the progress engine until the given request is complete */
-int MPIR_Progress_wait_request(MPIR_Request *req);
-
-/* The following routines perform the callouts to the user routines registered
-   as part of a generalized request.  They handle any language binding issues
-   that are necessary. They are used when completing, freeing, cancelling or
-   extracting the status from a generalized request. */
-int MPIR_Grequest_cancel(MPIR_Request * request_ptr, int complete);
-int MPIR_Grequest_query(MPIR_Request * request_ptr);
-int MPIR_Grequest_free(MPIR_Request * request_ptr);
-
-/* this routine was added to support our extension relaxing the progress rules
- * for generalized requests */
-int MPIR_Grequest_progress_poke(int count, MPIR_Request **request_ptrs,
-		MPI_Status array_of_statuses[] );
-int MPIR_Grequest_waitall(int count, MPIR_Request * const *  request_ptrs);
-
-/* ------------------------------------------------------------------------- */
-/* Prototypes for language-specific routines, such as routines to set
-   Fortran keyval attributes */
 #ifdef HAVE_FORTRAN_BINDING
-#include "mpi_f77interface.h"
+#include "mpir_f77interface.h"
 #endif
 
-/* ADI Bindings */
-/*@
-  MPID_Init - Initialize the device
-
-  Input Parameters:
-+ argc_p - Pointer to the argument count
-. argv_p - Pointer to the argument list
-- requested - Requested level of thread support.  Values are the same as
-  for the 'required' argument to 'MPI_Init_thread', except that we define
-  an enum for these values.
-
-  Output Parameters:
-+ provided - Provided level of thread support.  May be less than the 
-  requested level of support.
-. has_args - Set to true if 'argc_p' and 'argv_p' contain the command
-  line arguments.  See below.
-- has_env  - Set to true if the environment of the process has been 
-  set as the user expects.  See below.
-
-  Return value:
-  Returns 'MPI_SUCCESS' on success and an MPI error code on failure.  Failure
-  can happen when, for example, the device is unable  to start or contact the
-  number of processes specified by the 'mpiexec' command.
-
-  Notes:
-  Null arguments for 'argc_p' and 'argv_p' `must` be valid (see MPI-2, section
-  4.2)
-
-  Multi-method devices should initialize each method within this call.
-  They can use environment variables and/or command-line arguments
-  to decide which methods to initialize (but note that they must not
-  `depend` on using command-line arguments).
-
-  This call also initializes all MPID data needed by the device.  This
-  includes the 'MPIR_Request's and any other data structures used by
-  the device.
-
-  The arguments 'has_args' and 'has_env' indicate whether the process was
-  started with command-line arguments or environment variables.  In some
-  cases, only the root process is started with these values; in others, 
-  the startup environment ensures that each process receives the 
-  command-line arguments and environment variables that the user expects. 
-  While the MPI standard makes no requirements that command line arguments or 
-  environment variables are provided to all processes, most users expect a
-  common environment.  These variables allow an MPI implementation (that is
-  based on ADI-3) to provide both of these by making use of MPI communication
-  after 'MPID_Init' is called but before 'MPI_Init' returns to the user, if
-  the process management environment does not provide this service.
-
-
-  This routine is used to implement both 'MPI_Init' and 'MPI_Init_thread'.
-
-  Setting the environment requires a 'setenv' function.  Some
-  systems may not have this.  In that case, the documentation must make 
-  clear that the environment may not be propagated to the generated processes.
-
-  Module:
-  MPID_CORE
-
-  Questions:
-
-  The values for 'has_args' and 'has_env' are boolean.  
-  They could be more specific.  For 
-  example, the value could indicate the rank in 'MPI_COMM_WORLD' of a 
-  process that has the values; the value 'MPI_ANY_SOURCE' (or a '-1') could
-  indicate that the value is available on all processes (including this one).
-  We may want this since otherwise the processes may need to determine whether
-  any process needs the command line.  Another option would be to use positive 
-  values in the same way that the 'color' argument is used in 'MPI_Comm_split';
-  a negative value indicates the member of the processes with that color that 
-  has the values of the command line arguments (or environment).  This allows
-  for non-SPMD programs.
-
-  Do we require that the startup environment (e.g., whatever 'mpiexec' is 
-  using to start processes) is responsible for delivering
-  the command line arguments and environment variables that the user expects?
-  That is, if the user is running an SPMD program, and expects each process
-  to get the same command line argument, who is responsible for this?  
-  The 'has_args' and 'has_env' values are intended to allow the ADI to 
-  handle this while taking advantage of any support that the process 
-  manager framework may provide.
-
-  Alternately, how do we find out from the process management environment
-  whether it took care of the environment or the command line arguments?  
-  Do we need a 'PMI_Env_query' function that can answer these questions
-  dynamically (in case a different process manager is used through the same
-  interface)?
-
-  Can we fix the Fortran command-line arguments?  That is, can we arrange for
-  'iargc' and 'getarg' (and the POSIX equivalents) to return the correct 
-  values?  See, for example, the Absoft implementations of 'getarg'.  
-  We could also contact PGI about the Portland Group compilers, and of 
-  course the 'g77' source code is available.
-  Does each process have the same values for the environment variables 
-  when this routine returns?
-
-  If we don''t require that all processes get the same argument list, 
-  we need to find out if they did anyway so that 'MPI_Init_thread' can
-  fixup the list for the user.  This argues for another return value that
-  flags how much of the environment the 'MPID_Init' routine set up
-  so that the 'MPI_Init_thread' call can provide the rest.  The reason
-  for this is that, even though the MPI standard does not require it, 
-  a user-friendly implementation should, in the SPMD mode, give each
-  process the same environment and argument lists unless the user 
-  explicitly directed otherwise.
-
-  How does this interface to PMI?  Do we need to know anything?  Should
-  this call have an info argument to support PMI?
-
-  The following questions involve how environment variables and command
-  line arguments are used to control the behavior of the implementation. 
-  Many of these values must be determined at the time that 'MPID_Init' 
-  is called.  These all should be considered in the context of the 
-  parameter routines described in the MPICH Design Document.
-
-  Are there recommended environment variable names?  For example, in ADI-2,
-  there are many debugging options that are part of the common device.
-  In MPI-2, we can''t require command line arguments, so any such options
-  must also have environment variables.  E.g., 'MPICH_ADI_DEBUG' or
-  'MPICH_ADI_DB'.
-
-  Names that are explicitly prohibited?  For example, do we want to 
-  reserve any names that 'MPI_Init_thread' (as opposed to 'MPID_Init')
-  might use?  
-
-  How does information on command-line arguments and environment variables
-  recognized by the device get added to the documentation?
-
-  What about control for other impact on the environment?  For example,
-  what signals should the device catch (e.g., 'SIGFPE'? 'SIGTRAP'?)?  
-  Which of these should be optional (e.g., ignore or leave signal alone) 
-  or selectable (e.g., port to listen on)?  For example, catching 'SIGTRAP'
-  causes problems for 'gdb', so we''d like to be able to leave 'SIGTRAP' 
-  unchanged in some cases.
-
-  Another environment variable should control whether fault-tolerance is 
-  desired.  If fault-tolerance is selected, then some collective operations 
-  will need to use different algorithms and most fatal errors detected by the 
-  MPI implementation should abort only the affected process, not all processes.
-  @*/
-int MPID_Init( int *argc_p, char ***argv_p, int requested, 
-	       int *provided, int *has_args, int *has_env );
-
-/* was: 
- int MPID_Init( int *argc_p, char ***argv_p, 
-	       int requested, int *provided,
-	       MPIR_Comm **parent_comm, int *has_args, int *has_env ); */
-
-/*@ 
-  MPID_InitCompleted - Notify the device that the MPI_Init or MPI_Initthread
-  call has completed setting up MPI
-
- Notes:
- This call allows the device to complete any setup that it wishes to perform
- and for which it needs to access any of the structures (such as 'MPIR_Process')
- that are initialized after 'MPID_Init' is called.  If the device does not need
- any extra operations, then it may provide either an empty function or even
- define this as a macro with the value 'MPI_SUCCESS'.
-  @*/
-int MPID_InitCompleted( void );
-
-/*@
-  MPID_Finalize - Perform the device-specific termination of an MPI job
-
-  Return Value:
-  'MPI_SUCCESS' or a valid MPI error code.  Normally, this routine will
-  return 'MPI_SUCCESS'.  Only in extrordinary circumstances can this
-  routine fail; for example, if some process stops responding during the
-  finalize step.  In this case, 'MPID_Finalize' should return an MPI 
-  error code indicating the reason that it failed.
-
-  Notes:
-
-  Module:
-  MPID_CORE
-
-  Questions:
-  Need to check the MPI-2 requirements on 'MPI_Finalize' with respect to
-  things like which process must remain after 'MPID_Finalize' is called.
-  @*/
-int MPID_Finalize(void);
-/*@
-  MPID_Abort - Abort at least the processes in the specified communicator.
-
-  Input Parameters:
-+ comm        - Communicator of processes to abort
-. mpi_errno   - MPI error code containing the reason for the abort
-. exit_code   - Exit code to return to the calling environment.  See notes.
-- error_msg   - Optional error message
-
-  Return value:
-  'MPI_SUCCESS' or an MPI error code.  Normally, this routine should not 
-  return, since the calling process must be a member of the communicator.  
-  However, under some circumstances, the 'MPID_Abort' might fail; in this 
-  case, returning an error indication is appropriate.
-
-  Notes:
-
-  In a fault-tolerant MPI implementation, this operation should abort `only` 
-  the processes in the specified communicator.  Any communicator that shares
-  processes with the aborted communicator becomes invalid.  For more 
-  details, see (paper not yet written on fault-tolerant MPI).
-
-  In particular, if the communicator is 'MPI_COMM_SELF', only the calling 
-  process should be aborted.
-
-  The 'exit_code' is the exit code that this particular process will 
-  attempt to provide to the 'mpiexec' or other program invocation 
-  environment.  See 'mpiexec' for a discussion of how exit codes from 
-  many processes may be combined.
-
-  If the error_msg field is non-NULL this string will be used as the message
-  with the abort output.  Otherwise, the output message will be base on the
-  error message associated with the mpi_errno.
-
-  An external agent that is aborting processes can invoke this with either
-  'MPI_COMM_WORLD' or 'MPI_COMM_SELF'.  For example, if the process manager
-  wishes to abort a group of processes, it should cause 'MPID_Abort' to 
-  be invoked with 'MPI_COMM_SELF' on each process in the group.
-
-  Question:
-  An alternative design is to provide an 'MPIR_Group' instead of a
-  communicator.  This would allow a process manager to ask the ADI 
-  to kill an entire group of processes without needing a communicator.
-  However, the implementation of 'MPID_Abort' will either do this by
-  communicating with other processes or by requesting the process manager
-  to kill the processes.  That brings up this question: should 
-  'MPID_Abort' use 'PMI' to kill processes?  Should it be required to
-  notify the process manager?  What about persistent resources (such 
-  as SYSV segments or forked processes)?  
-
-  This suggests that for any persistent resource, an exit handler be
-  defined.  These would be executed by 'MPID_Abort' or 'MPID_Finalize'.  
-  See the implementation of 'MPI_Finalize' for an example of exit callbacks.
-  In addition, code that registered persistent resources could use persistent
-  storage (i.e., a file) to record that information, allowing cleanup 
-  utilities (such as 'mpiexec') to remove any resources left after the 
-  process exits.
-
-  'MPI_Finalize' requires that attributes on 'MPI_COMM_SELF' be deleted 
-  before anything else happens; this allows libraries to attach end-of-job
-  actions to 'MPI_Finalize'.  It is valuable to have a similar 
-  capability on 'MPI_Abort', with the caveat that 'MPI_Abort' may not 
-  guarantee that the run-on-abort routines were called.  This provides a
-  consistent way for the MPICH implementation to handle freeing any 
-  persistent resources.  However, such callbacks must be limited since
-  communication may not be possible once 'MPI_Abort' is called.  Further,
-  any callbacks must guarantee that they have finite termination.  
-  
-  One possible extension would be to allow `users` to add actions to be 
-  run when 'MPI_Abort' is called, perhaps through a special attribute value
-  applied to 'MPI_COMM_SELF'.  Note that is is incorrect to call the delete 
-  functions for the normal attributes on 'MPI_COMM_SELF' because MPI
-  only specifies that those are run on 'MPI_Finalize' (i.e., normal 
-  termination). 
-
-  Module:
-  MPID_CORE
-  @*/
-
-int MPID_Abort( MPIR_Comm *comm, int mpi_errno, int exit_code, const char *error_msg );
-
-int MPID_Open_port(MPIR_Info *, char *);
-int MPID_Close_port(const char *);
-
-/*@
-   MPID_Comm_accept - MPID entry point for MPI_Comm_accept
-
-   Input Parameters:
-+  port_name - port name
-.  info - info
-.  root - root
--  comm - communicator
-
-   Output Parameters:
-.  MPI_Comm *newcomm - new communicator
-
-  Return Value:
-  'MPI_SUCCESS' or a valid MPI error code.
-@*/
-int MPID_Comm_accept(const char *, MPIR_Info *, int, MPIR_Comm *, MPIR_Comm **);
-
-/*@
-   MPID_Comm_connect - MPID entry point for MPI_Comm_connect
-
-   Input Parameters:
-+  port_name - port name
-.  info - info
-.  root - root
--  comm - communicator
-
-   Output Parameters:
-.  newcomm_ptr - new intercommunicator
-
-  Return Value:
-  'MPI_SUCCESS' or a valid MPI error code.
-@*/
-int MPID_Comm_connect(const char *, MPIR_Info *, int, MPIR_Comm *, MPIR_Comm **);
-
-int MPID_Comm_disconnect(MPIR_Comm *);
-
-int MPID_Comm_spawn_multiple(int, char *[], char **[], const int [], MPIR_Info* [],
-                             int, MPIR_Comm *, MPIR_Comm **, int []);
-
-/*@
-  MPID_Comm_failure_ack - MPID entry point for MPI_Comm_failure_ack
-
-  Input Parameters:
-. comm - communicator
-
-  Return Value:
-  'MPI_SUCCESS' or a valid MPI error code.
-@*/
-int MPID_Comm_failure_ack(MPIR_Comm *comm);
-
-/*@
-  MPID_Comm_failure_get_acked - MPID entry point for MPI_Comm_failure_get_acked
-
-  Input Parameters:
-. comm - communicator
-
-  Output Parameters
-. failed_group_ptr - group of failed processes
-
-  Return Value:
-  'MPI_SUCCESS' or a valid MPI error code.
-@*/
-int MPID_Comm_failure_get_acked(MPIR_Comm *comm, MPIR_Group **failed_group_ptr);
-
-/*@
-  MPID_Comm_get_all_failed_procs - Constructs a group of failed processes that it uniform over a communicator
-
-  Input Parameters:
-. comm - communicator
-. tag - Tag used to do communciation
-
-  Output Parameters:
-. failed_grp - group of all failed processes
-
-  Return Value:
-  'MPI_SUCCESS' or a valid MPI error code.
-@*/
-int MPID_Comm_get_all_failed_procs(MPIR_Comm *comm_ptr, MPIR_Group **failed_group, int tag);
-
-/*@
-  MPID_Comm_revoke - MPID entry point for MPI_Comm_revoke
-
-  Input Parameters:
-  comm - communicator
-  remote - True if we received the revoke message from a remote process
-
-  Return Value:
-  'MPI_SUCCESS' or a valid MPI error code.
-@*/
-int MPID_Comm_revoke(MPIR_Comm *comm, int is_remote);
-
-/*@
-  MPID_Send - MPID entry point for MPI_Send
-
-  Notes:
-  The only difference between this and 'MPI_Send' is that the basic
-  error checks (e.g., valid communicator, datatype, dest, and tag)
-  have been made, the MPI opaque objects have been replaced by
-  MPID objects, a context id offset is provided in addition to the 
-  communicator, and a request may be returned.  The context offset is 
-  added to the context of the communicator
-  to get the context it used by the message.
-  A request is returned only if the ADI implementation was unable to 
-  complete the send of the message.  In that case, the usual 'MPI_Wait'
-  logic should be used to complete the request.  This approach is used to 
-  allow a simple implementation of the ADI.  The ADI is free to always 
-  complete the message and never return a request.
-
-  Module:
-  Communication
-
-  @*/
-int MPID_Send( const void *buf, MPI_Aint count, MPI_Datatype datatype,
-	       int dest, int tag, MPIR_Comm *comm, int context_offset,
-	       MPIR_Request **request );
-
-/*@
-  MPID_Rsend - MPID entry point for MPI_Rsend
-
-  Notes:
-  The only difference between this and 'MPI_Rsend' is that the basic
-  error checks (e.g., valid communicator, datatype, dest, and tag)
-  have been made, the MPI opaque objects have been replaced by
-  MPID objects, a context id offset is provided in addition to the 
-  communicator, and a request may be returned.  The context offset is 
-  added to the context of the communicator
-  to get the context it used by the message.
-  A request is returned only if the ADI implementation was unable to 
-  complete the send of the message.  In that case, the usual 'MPI_Wait'
-  logic should be used to complete the request.  This approach is used to 
-  allow a simple implementation of the ADI.  The ADI is free to always 
-  complete the message and never return a request.
-
-  Module:
-  Communication
-
-  @*/
-int MPID_Rsend( const void *buf, int count, MPI_Datatype datatype,
-		int dest, int tag, MPIR_Comm *comm, int context_offset,
-		MPIR_Request **request );
-
-/*@
-  MPID_Ssend - MPID entry point for MPI_Ssend
-
-  Notes:
-  The only difference between this and 'MPI_Ssend' is that the basic
-  error checks (e.g., valid communicator, datatype, dest, and tag)
-  have been made, the MPI opaque objects have been replaced by
-  MPID objects, a context id offset is provided in addition to the 
-  communicator, and a request may be returned.  The context offset is 
-  added to the context of the communicator
-  to get the context it used by the message.
-  A request is returned only if the ADI implementation was unable to 
-  complete the send of the message.  In that case, the usual 'MPI_Wait'
-  logic should be used to complete the request.  This approach is used to 
-  allow a simple implementation of the ADI.  The ADI is free to always 
-  complete the message and never return a request.
-
-  Module:
-  Communication
-
-  @*/
-int MPID_Ssend( const void *buf, MPI_Aint count, MPI_Datatype datatype,
-		int dest, int tag, MPIR_Comm *comm, int context_offset,
-		MPIR_Request **request );
-
-/*@
-  MPID_tBsend - Attempt a send and return if it would block
-
-  Notes:
-  This has the semantics of 'MPI_Bsend', except that it returns the internal
-  error code 'MPID_WOULD_BLOCK' if the message can''t be sent immediately
-  (t is for "try").  
- 
-  The reason that this interface is chosen over a query to check whether
-  a message `can` be sent is that the query approach is not
-  thread-safe.  Since the decision on whether a message can be sent
-  without blocking depends (among other things) on the state of flow
-  control managed by the device, this approach also gives the device
-  the greatest freedom in implementing flow control.  In particular,
-  if another MPI process can change the flow control parameters, then
-  even in a single-threaded implementation, it would not be safe to
-  return, for example, a message size that could be sent with 'MPI_Bsend'.
-
-  This routine allows an MPI implementation to optimize 'MPI_Bsend'
-  for the case when the message can be delivered without blocking the
-  calling process.  An ADI implementation is free to have this routine
-  always return 'MPID_WOULD_BLOCK', but is encouraged not to.
-
-  To allow the MPI implementation to avoid trying this routine when it
-  is not implemented by the ADI, the C preprocessor constant 'MPID_HAS_TBSEND'
-  should be defined if this routine has a nontrivial implementation.
-
-  This is an optional routine.  The MPI code for 'MPI_Bsend' will attempt
-  to call this routine only if the device defines 'MPID_HAS_TBSEND'.
-
-  Module:
-  Communication
-  @*/
-int MPID_tBsend( const void *buf, int count, MPI_Datatype datatype,
-		 int dest, int tag, MPIR_Comm *comm, int context_offset );
-
-/*@
-  MPID_Isend - MPID entry point for MPI_Isend
-
-  Notes:
-  The only difference between this and 'MPI_Isend' is that the basic
-  error checks (e.g., valid communicator, datatype, dest, and tag)
-  have been made, the MPI opaque objects have been replaced by
-  MPID objects, and a context id offset is provided in addition to the 
-  communicator.  This offset is added to the context of the communicator
-  to get the context it used by the message.
-
-  Module:
-  Communication
-
-  @*/
-int MPID_Isend( const void *buf, MPI_Aint count, MPI_Datatype datatype,
-		int dest, int tag, MPIR_Comm *comm, int context_offset,
-		MPIR_Request **request );
-
-/*@
-  MPID_Irsend - MPID entry point for MPI_Irsend
-
-  Notes:
-  The only difference between this and 'MPI_Irsend' is that the basic
-  error checks (e.g., valid communicator, datatype, dest, and tag)
-  have been made, the MPI opaque objects have been replaced by
-  MPID objects, and a context id offset is provided in addition to the 
-  communicator.  This offset is added to the context of the communicator
-  to get the context it used by the message.
-
-  Module:
-  Communication
-
-  @*/
-int MPID_Irsend( const void *buf, int count, MPI_Datatype datatype,
-		 int dest, int tag, MPIR_Comm *comm, int context_offset,
-		 MPIR_Request **request );
-
-/*@
-  MPID_Issend - MPID entry point for MPI_Issend
-
-  Notes:
-  The only difference between this and 'MPI_Issend' is that the basic
-  error checks (e.g., valid communicator, datatype, dest, and tag)
-  have been made, the MPI opaque objects have been replaced by
-  MPID objects, and a context id offset is provided in addition to the 
-  communicator.  This offset is added to the context of the communicator
-  to get the context it used by the message.
-
-  Module:
-  Communication
-
-  @*/
-int MPID_Issend( const void *buf, int count, MPI_Datatype datatype,
-		 int dest, int tag, MPIR_Comm *comm, int context_offset,
-		 MPIR_Request **request );
-
-/*@
-  MPID_Recv - MPID entry point for MPI_Recv
-
-  Notes:
-  The only difference between this and 'MPI_Recv' is that the basic
-  error checks (e.g., valid communicator, datatype, source, and tag)
-  have been made, the MPI opaque objects have been replaced by
-  MPID objects, a context id offset is provided in addition to the 
-  communicator, and a request may be returned.  The context offset is added 
-  to the context of the communicator to get the context it used by the message.
-  As in 'MPID_Send', the request is returned only if the operation did not
-  complete.  Conversely, the status object is populated with valid information
-  only if the operation completed.
-
-  Module:
-  Communication
-
-  @*/
-int MPID_Recv( void *buf, MPI_Aint count, MPI_Datatype datatype,
-	       int source, int tag, MPIR_Comm *comm, int context_offset,
-	       MPI_Status *status, MPIR_Request **request );
-
-
-/*@
-  MPID_Irecv - MPID entry point for MPI_Irecv
-
-  Notes:
-  The only difference between this and 'MPI_Irecv' is that the basic
-  error checks (e.g., valid communicator, datatype, source, and tag)
-  have been made, the MPI opaque objects have been replaced by
-  MPID objects, and a context id offset is provided in addition to the 
-  communicator.  This offset is added to the context of the communicator
-  to get the context it used by the message.
-
-  Module:
-  Communication
-
-  @*/
-int MPID_Irecv( void *buf, MPI_Aint count, MPI_Datatype datatype,
-		int source, int tag, MPIR_Comm *comm, int context_offset,
-		MPIR_Request **request );
-
-/*@
-  MPID_Send_init - MPID entry point for MPI_Send_init
-
-  Notes:
-  The only difference between this and 'MPI_Send_init' is that the basic
-  error checks (e.g., valid communicator, datatype, dest, and tag)
-  have been made, the MPI opaque objects have been replaced by
-  MPID objects, and a context id offset is provided in addition to the 
-  communicator.  This offset is added to the context of the communicator
-  to get the context it used by the message.
-
-  Module:
-  Communication
-
-  @*/
-int MPID_Send_init( const void *buf, int count, MPI_Datatype datatype,
-		    int dest, int tag, MPIR_Comm *comm, int context_offset,
-		    MPIR_Request **request );
-
-int MPID_Bsend_init(const void *, int, MPI_Datatype, int, int, MPIR_Comm *,
-		   int, MPIR_Request **);
-/*@
-  MPID_Rsend_init - MPID entry point for MPI_Rsend_init
-
-  Notes:
-  The only difference between this and 'MPI_Rsend_init' is that the basic
-  error checks (e.g., valid communicator, datatype, dest, and tag)
-  have been made, the MPI opaque objects have been replaced by
-  MPID objects, and a context id offset is provided in addition to the 
-  communicator.  This offset is added to the context of the communicator
-  to get the context it used by the message.
-
-  Module:
-  Communication
-
-  @*/
-int MPID_Rsend_init( const void *buf, int count, MPI_Datatype datatype,
-		     int dest, int tag, MPIR_Comm *comm, int context_offset,
-		     MPIR_Request **request );
-/*@
-  MPID_Ssend_init - MPID entry point for MPI_Ssend_init
-
-  Notes:
-  The only difference between this and 'MPI_Ssend_init' is that the basic
-  error checks (e.g., valid communicator, datatype, dest, and tag)
-  have been made, the MPI opaque objects have been replaced by
-  MPID objects, and a context id offset is provided in addition to the 
-  communicator.  This offset is added to the context of the communicator
-  to get the context it used by the message.
-
-  Module:
-  Communication
-
-  @*/
-int MPID_Ssend_init( const void *buf, int count, MPI_Datatype datatype,
-		     int dest, int tag, MPIR_Comm *comm, int context_offset,
-		     MPIR_Request **request );
-
-/*@
-  MPID_Recv_init - MPID entry point for MPI_Recv_init
-
-  Notes:
-  The only difference between this and 'MPI_Recv_init' is that the basic
-  error checks (e.g., valid communicator, datatype, source, and tag)
-  have been made, the MPI opaque objects have been replaced by
-  MPID objects, and a context id offset is provided in addition to the 
-  communicator.  This offset is added to the context of the communicator
-  to get the context it used by the message.
-
-  Module:
-  Communication
-
-  @*/
-int MPID_Recv_init( void *buf, int count, MPI_Datatype datatype,
-		    int source, int tag, MPIR_Comm *comm, int context_offset,
-		    MPIR_Request **request );
-
-/*@
-  MPID_Startall - MPID entry point for MPI_Startall
-
-  Notes:
-  The only difference between this and 'MPI_Startall' is that the basic
-  error checks (e.g., count) have been made, and the MPI opaque objects
-  have been replaced by pointers to MPID objects.  
-
-  Rationale:
-  This allows the device to schedule communication involving multiple requests,
-  whereas an implementation built on just 'MPID_Start' would force the
-  ADI to initiate the communication in the order encountered.
-  @*/
-int MPID_Startall(int count, MPIR_Request *requests[]);
 
-/*@
-   MPID_Probe - Block until a matching request is found and return information 
-   about it
+/*****************************************************************************/
+/********************** PART 4: DEVICE PRE DECLARATION ***********************/
+/*****************************************************************************/
 
-  Input Parameters:
-+ source - rank to match (or 'MPI_ANY_SOURCE')
-. tag - Tag to match (or 'MPI_ANY_TAG')
-. comm - communicator to match.
-- context_offset - context id offset of communicator to match
-
-  Output Parameter:
-. status - 'MPI_Status' set as defined by 'MPI_Probe'
-
-
-  Return Value:
-  Error code.
-  
-  Notes:
-  Note that the values returned in 'status' will be valid for a subsequent
-  MPI receive operation only if no other thread attempts to receive the same
-  message.  
-  (See the 
-  discussion of probe in Section 8.7.2 Clarifications of the MPI-2 standard.)
-
-  Providing the 'context_offset' is necessary at this level to support the 
-  way in which the MPICH implementation uses context ids in the implementation
-  of other operations.  The communicator is present to allow the device 
-  to use message-queues attached to particular communicators or connections
-  between processes.
-
-  Module:
-  Request
-
-  @*/
-int MPID_Probe(int, int, MPIR_Comm *, int, MPI_Status *);
-/*@
-   MPID_Iprobe - Look for a matching request in the receive queue 
-   but do not remove or return it
-
-  Input Parameters:
-+ source - rank to match (or 'MPI_ANY_SOURCE')
-. tag - Tag to match (or 'MPI_ANY_TAG')
-. comm - communicator to match.
-- context_offset - context id offset of communicator to match
-
-  Output Parameter:
-+ flag - true if a matching request was found, false otherwise.
-- status - 'MPI_Status' set as defined by 'MPI_Iprobe' (only valid when return 
-  flag is true).
-
-  Return Value:
-  Error Code.
-
-  Notes:
-  Note that the values returned in 'status' will be valid for a subsequent
-  MPI receive operation only if no other thread attempts to receive the same
-  message.  
-  (See the 
-  discussion of probe in Section 8.7.2 (Clarifications) of the MPI-2 standard.)
-
-  Providing the 'context_offset' is necessary at this level to support the 
-  way in which the MPICH implementation uses context ids in the implementation
-  of other operations.  The communicator is present to allow the device 
-  to use message-queues attached to particular communicators or connections
-  between processes.
-
-  Devices that rely solely on polling to make progress should call
-  MPID_Progress_poke() (or some equivalent function) if a matching request
-  could not be found.  This insures that progress continues to be made even if
-  the application is calling MPI_Iprobe() from within a loop not containing
-  calls to any other MPI functions.
-  
-  Module:
-  Request
-
-  @*/
-int MPID_Iprobe(int, int, MPIR_Comm *, int, int *, MPI_Status *);
-
-/*@
-   MPID_Mprobe - Block until a matching request is found and return information
-   about it, including a message handle for later reception.
-
-  Input Parameters:
-+ source - rank to match (or 'MPI_ANY_SOURCE')
-. tag - Tag to match (or 'MPI_ANY_TAG')
-. comm - communicator to match.
-- context_offset - context id offset of communicator to match
-
-  Output Parameter:
-+ message - 'MPIR_Request' (logically a message) set as defined by 'MPI_Mprobe'
-- status - 'MPI_Status' set as defined by 'MPI_Mprobe'
-
-  Return Value:
-  Error code.
-
-  Providing the 'context_offset' is necessary at this level to support the
-  way in which the MPICH implementation uses context ids in the implementation
-  of other operations.  The communicator is present to allow the device
-  to use message-queues attached to particular communicators or connections
-  between processes.
-
-  Module:
-  Request
-
-  @*/
-int MPID_Mprobe(int source, int tag, MPIR_Comm *comm, int context_offset,
-                MPIR_Request **message, MPI_Status *status);
-
-/*@
-   MPID_Improbe - Look for a matching request in the receive queue and return
-   information about it, including a message handle for later reception.
-
-  Input Parameters:
-+ source - rank to match (or 'MPI_ANY_SOURCE')
-. tag - Tag to match (or 'MPI_ANY_TAG')
-. comm - communicator to match.
-- context_offset - context id offset of communicator to match
-
-  Output Parameter:
-+ flag - 'flag' set as defined by 'MPI_Improbe'
-. message - 'MPIR_Request' (logically a message) set as defined by 'MPI_Improbe'
-- status - 'MPI_Status' set as defined by 'MPI_Improbe'
-
-  Return Value:
-  Error code.
-
-  Providing the 'context_offset' is necessary at this level to support the
-  way in which the MPICH implementation uses context ids in the implementation
-  of other operations.  The communicator is present to allow the device
-  to use message-queues attached to particular communicators or connections
-  between processes.
-
-  Module:
-  Request
-
-  @*/
-int MPID_Improbe(int source, int tag, MPIR_Comm *comm, int context_offset,
-                 int *flag, MPIR_Request **message, MPI_Status *status);
-
-/*@
-   MPID_Imrecv - Begin receiving the message indicated by the given message
-   handle and return a request object for later completion.
-
-  Input Parameters:
-+ count - number of elements to receive
-. datatype - datatype of each recv buffer element
-- message - 'MPIR_Request' (logically a message) set as defined by 'MPI_Mprobe'
-
-  Output Parameter:
-+ buf - receive buffer
-- request - request object for completing the recv
-
-  Return Value:
-  Error code.
-
-  Module:
-  Request
-
-  NOTE: under most implementations the request object returned will
-  probably be some modified version of the "message" object passed in.
-
-  @*/
-int MPID_Imrecv(void *buf, int count, MPI_Datatype datatype,
-                MPIR_Request *message, MPIR_Request **rreqp);
-
-/*@
-   MPID_Mrecv - Receive the message indicated by the given message handle.
-
-  Input Parameters:
-+ count - number of elements to receive
-. datatype - datatype of each recv buffer element
-- message - 'MPIR_Request' (logically a message) set as defined by 'MPI_Mprobe'
-
-  Output Parameter:
-+ buf - receive buffer
-- status - 'MPI_Status' set as defined by 'MPI_Mrecv'
-
-  Return Value:
-  Error code.
-
-  Module:
-  Request
-
-  NOTE: under most implementations the request object returned will
-  probably be some modified version of the "message" object passed in.
-
-  @*/
-int MPID_Mrecv(void *buf, int count, MPI_Datatype datatype,
-               MPIR_Request *message, MPI_Status *status);
-
-/*@
-  MPID_Cancel_send - Cancel the indicated send request
-
-  Input Parameter:
-. request - Send request to cancel
-
-  Return Value:
-  MPI error code.
-  
-  Notes:
-  Cancel is a tricky operation, particularly for sends.  Read the
-  discussion in the MPI-1 and MPI-2 documents carefully.  This call
-  only requests that the request be cancelled; a subsequent wait 
-  or test must first succeed (i.e., the request completion counter must be
-  zeroed).
-
-  Module:
-  Request
-
-  @*/
-int MPID_Cancel_send(MPIR_Request *);
-/*@
-  MPID_Cancel_recv - Cancel the indicated recv request
-
-  Input Parameter:
-. request - Receive request to cancel
-
-  Return Value:
-  MPI error code.
-  
-  Notes:
-  This cancels a pending receive request.  In many cases, this is implemented
-  by simply removing the request from a pending receive request queue.  
-  However, some ADI implementations may maintain these queues in special 
-  places, such as within a NIC (Network Interface Card).
-  This call only requests that the request be cancelled; a subsequent wait 
-  or test must first succeed (i.e., the request completion counter must be
-  zeroed).
-
-  Module:
-  Request
-
-  @*/
-int MPID_Cancel_recv(MPIR_Request *);
-
-/*@
-  MPID_Comm_AS_enabled - Query whether anysource operations are enabled for a communicator
-
-  Input Parameter:
-  communicator - Communicator being queried
-
-  Return Value:
-  0 - The communicator will not currently permit anysource operations
-  1 - The communicator will currently permit anysource operations
-  @*/
-int MPID_Comm_AS_enabled(MPIR_Comm *);
-
-/*@
-  MPID_Request_is_anysource - Query whether the request is an anysource receive
-
-  Input Parameter:
-  request - Receive request being queried
-
-  Return Value:
-  0 - The request is not anysource
-  1 - The request is anysource
-
-  @*/
-int MPID_Request_is_anysource(MPIR_Request *);
-
-/*@
-  MPID_Aint_add - Returns the sum of base and disp
-
-  Input Parameters:
-+ base - base address (integer)
-- disp - displacement (integer)
-
-  Return value:
-  Sum of the base and disp argument
-  @*/
-MPI_Aint MPID_Aint_add(MPI_Aint base, MPI_Aint disp);
-
-/*@
-  MPID_Aint_diff - Returns the difference between addr1 and addr2
-
-  Input Parameters:
-+ addr1 - minuend address (integer)
-- addr2 - subtrahend address (integer)
-
-  Return value:
-  Difference between addr1 and addr2
-  @*/
-MPI_Aint MPID_Aint_diff(MPI_Aint addr1, MPI_Aint addr2);
-
-/* MPI-2 RMA Routines */
-
-int MPID_Win_create(void *, MPI_Aint, int, MPIR_Info *, MPIR_Comm *,
-                    MPIR_Win **);
-int MPID_Win_free(MPIR_Win **);
-
-int MPID_Put(const void *, int, MPI_Datatype, int, MPI_Aint, int,
-             MPI_Datatype, MPIR_Win *);
-int MPID_Get(void *, int, MPI_Datatype, int, MPI_Aint, int,
-             MPI_Datatype, MPIR_Win *);
-int MPID_Accumulate(const void *, int, MPI_Datatype, int, MPI_Aint, int, 
-                    MPI_Datatype, MPI_Op, MPIR_Win *);
-
-int MPID_Win_fence(int, MPIR_Win *);
-int MPID_Win_post(MPIR_Group *group_ptr, int assert, MPIR_Win *win_ptr);
-int MPID_Win_start(MPIR_Group *group_ptr, int assert, MPIR_Win *win_ptr);
-int MPID_Win_test(MPIR_Win *win_ptr, int *flag);
-int MPID_Win_wait(MPIR_Win *win_ptr);
-int MPID_Win_complete(MPIR_Win *win_ptr);
-
-int MPID_Win_lock(int lock_type, int dest, int assert, MPIR_Win *win_ptr);
-int MPID_Win_unlock(int dest, MPIR_Win *win_ptr);
-
-/* MPI-3 RMA Routines */
-
-int MPID_Win_allocate(MPI_Aint size, int disp_unit, MPIR_Info *info,
-                      MPIR_Comm *comm, void *baseptr, MPIR_Win **win);
-int MPID_Win_allocate_shared(MPI_Aint size, int disp_unit, MPIR_Info *info_ptr, MPIR_Comm *comm_ptr,
-                             void *base_ptr, MPIR_Win **win_ptr);
-int MPID_Win_shared_query(MPIR_Win *win, int rank, MPI_Aint *size, int *disp_unit,
-                          void *baseptr);
-int MPID_Win_create_dynamic(MPIR_Info *info, MPIR_Comm *comm, MPIR_Win **win);
-int MPID_Win_attach(MPIR_Win *win, void *base, MPI_Aint size);
-int MPID_Win_detach(MPIR_Win *win, const void *base);
-int MPID_Win_get_info(MPIR_Win *win, MPIR_Info **info_used);
-int MPID_Win_set_info(MPIR_Win *win, MPIR_Info *info);
-
-int MPID_Get_accumulate(const void *origin_addr, int origin_count,
-                        MPI_Datatype origin_datatype, void *result_addr, int result_count,
-                        MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
-                        int target_count, MPI_Datatype target_datatype, MPI_Op op, MPIR_Win *win);
-int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
-                      MPI_Datatype datatype, int target_rank, MPI_Aint target_disp,
-                      MPI_Op op, MPIR_Win *win);
-int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
-                          void *result_addr, MPI_Datatype datatype, int target_rank,
-                          MPI_Aint target_disp, MPIR_Win *win);
-int MPID_Rput(const void *origin_addr, int origin_count,
-              MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp,
-              int target_count, MPI_Datatype target_datatype, MPIR_Win *win,
-              MPIR_Request **request);
-int MPID_Rget(void *origin_addr, int origin_count,
-              MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp,
-              int target_count, MPI_Datatype target_datatype, MPIR_Win *win,
-              MPIR_Request **request);
-int MPID_Raccumulate(const void *origin_addr, int origin_count,
-                     MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp,
-                     int target_count, MPI_Datatype target_datatype, MPI_Op op, MPIR_Win *win,
-                     MPIR_Request **request);
-int MPID_Rget_accumulate(const void *origin_addr, int origin_count,
-                         MPI_Datatype origin_datatype, void *result_addr, int result_count,
-                         MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
-                         int target_count, MPI_Datatype target_datatype, MPI_Op op, MPIR_Win *win,
-                         MPIR_Request **request);
-
-int MPID_Win_lock_all(int assert, MPIR_Win *win);
-int MPID_Win_unlock_all(MPIR_Win *win);
-int MPID_Win_flush(int rank, MPIR_Win *win);
-int MPID_Win_flush_all(MPIR_Win *win);
-int MPID_Win_flush_local(int rank, MPIR_Win *win);
-int MPID_Win_flush_local_all(MPIR_Win *win);
-int MPID_Win_sync(MPIR_Win *win);
-
-
-/*@
-  MPID_Progress_start - Begin a block of operations that check the completion
-  counters in requests.
-
-  Input parameters:
-. state - pointer to a progress state variable
-    
-  Notes:
-  This routine is informs the progress engine that a block of code follows that
-  will examine the completion counter of some 'MPIR_Request' objects and then
-  call 'MPID_Progress_wait' zero or more times followed by a call to
-  'MPID_Progress_end'.
-  
-  The progress state variable must be specific to the thread calling it.  If at
-  all possible, the state should be declared as an auto variable and thus
-  allocated on the stack of the current thread.  Thread specific storage could
-  be used instead, but doing such would incur additional (and typically
-  unnecessary) overhead.
-  
-  This routine is needed to properly implement blocking tests when
-  multithreaded progress engines are used.  In a single-threaded implementation
-  of the ADI, this may be defined as an empty macro.
-
-  Module:
-  Communication
-  @*/
-void MPID_Progress_start(MPID_Progress_state * state);
-/*@
-  MPID_Progress_wait - Wait for some communication since 'MPID_Progress_start' 
-
-    Input parameters:
-.   state - pointer to the progress state initialized by MPID_Progress_start
-    
-    Return value:
-    An mpi error code.
-
-    Notes:
-    This instructs the progress engine to wait until some communication event
-    happens since 'MPID_Progress_start' was called.  This call blocks the 
-    calling thread (only, not the process).
-
-  Module:
-  Communication
- @*/
-int MPID_Progress_wait(MPID_Progress_state * state);
-/*@
-  MPID_Progress_end - End a block of operations begun with 'MPID_Progress_start'
-
-  Input parameters:
-  . state - pointer to the progress state variable passed to
-    'MPID_Progress_start'
-
-   Notes:
-   This routine instructs the progress engine to end the block begun with
-   'MPID_Progress_start'.  The progress engine is not required to check for any
-   pending communication.
-
-   The purpose of this call is to release any locks initiated by
-   'MPID_Progess_start' or 'MPID_Progess_wait'.  In a single threaded ADI
-   implementation, this may be defined as an empty macro.
-
-  Module:
-  Communication
-   @*/
-void MPID_Progress_end(MPID_Progress_state * stae);
-/*@
-  MPID_Progress_test - Check for communication
-
-  Return value:
-  An mpi error code.
-  
-  Notes:
-  Unlike 'MPID_Progress_wait', this routine is nonblocking.  Therefore, it
-  does not require the use of 'MPID_Progress_start' and 'MPID_Progress_end'.
-  
-  Module:
-  Communication
-  @*/
-int MPID_Progress_test(void);
-/*@
-  MPID_Progress_poke - Allow a progress engine to check for pending 
-  communication
-
-  Return value:
-  An mpi error code.
-  
-  Notes:
-  This routine provides a way to invoke the progress engine in a polling 
-  implementation of the ADI.  This routine must be nonblocking.
-
-  A multithreaded implementation is free to define this as an empty macro.
-
-  Module:
-  Communication
-  @*/
-int MPID_Progress_poke(void);
-
-typedef struct MPIR_Grequest_class {
-     MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
-     MPI_Grequest_query_function *query_fn;
-     MPI_Grequest_free_function *free_fn;
-     MPI_Grequest_cancel_function *cancel_fn;
-     MPIX_Grequest_poll_function *poll_fn;
-     MPIX_Grequest_wait_function *wait_fn;
-     struct MPIR_Grequest_class *next;
-} MPIR_Grequest_class;
-
-
-/* Interfaces exposed by MPI_T */
-#include "mpit.h"
-
-/*@
-  MPID_Get_processor_name - Return the name of the current processor
-
-  Input Parameter:
-. namelen - Length of name
-  
-  Output Parameters:
-+ name - A unique specifier for the actual (as opposed to virtual) node. This
-  must be an array of size at least 'MPI_MAX_PROCESSOR_NAME'.
-- resultlen - Length (in characters) of the name.  If this pointer is null,
-   this value is not set.
-
-  Notes:
-  The name returned should identify a particular piece of hardware; 
-  the exact format is implementation defined.  This name may or may not
-  be the same as might be returned by 'gethostname', 'uname', or 'sysinfo'.
-
-  This routine is essentially an MPID version of 'MPI_Get_processor_name' .  
-  It must be part of the device because not all environments support calls
-  to return the processor name.  The additional argument (input name 
-  length) is used to provide better error checking and to ensure that 
-  the input buffer is large enough (rather than assuming that it is
-  'MPI_MAX_PROCESSOR_NAME' long).
-  @*/
-int MPID_Get_processor_name( char *name, int namelen, int *resultlen);
-
-void MPIR_Errhandler_free(MPIR_Errhandler *errhan_ptr);
-
-/*@
-  MPID_Get_universe_size - Return the number of processes that the current
-  process management environment can handle
-
-  Output Parameters:
-. universe_size - the universe size; MPIR_UNIVERSE_SIZE_NOT_AVAILABLE if the
-  size cannot be determined
-  
-  Return value:
-  A MPI error code.
-@*/
-int MPID_Get_universe_size(int  * universe_size);
-
-#define MPIR_UNIVERSE_SIZE_NOT_SET -1
-#define MPIR_UNIVERSE_SIZE_NOT_AVAILABLE -2
-
-/*@
-   MPID_Comm_get_lpid - Get the local process id that corresponds to a
-   comm rank.
+#include "mpidpre.h"
 
-   Notes:
-   The local process ids are described elsewhere.  Basically, they are
-   a nonnegative number by which this process can refer to other processes 
-   to which it is connected.  These are local process ids because different
-   processes may use different ids to identify the same target process
-  @*/
-int MPID_Comm_get_lpid(MPIR_Comm *comm_ptr, int idx, int * lpid_ptr, MPIU_BOOL is_remote);
 
-/* prototypes and declarations for the MPID_Sched interface for nonblocking
- * collectives */
+/*****************************************************************************/
+/********************* PART 5: DEVICE DEPENDENT HEADERS **********************/
+/*****************************************************************************/
+
+#include "mpir_thread.h"
+#include "mpir_attr.h"
+#include "mpir_group.h"
+#include "mpir_comm.h"
+#include "mpir_request.h"
+#include "mpir_win.h"
+#include "mpir_coll.h"
+#include "mpir_func.h"
+#include "mpir_err.h"
+#include "mpir_datatype.h"
 #include "mpir_nbc.h"
+#include "mpir_process.h"
+#include "mpir_cvars.h"
+#include "mpir_misc_post.h"
+#include "mpit.h"
 
-/* tunable cvar values */
-#include "mpich_cvars.h"
-
-/* Tags for point to point operations which implement collective and other
-   internal operations */
-#define MPIR_BARRIER_TAG               1
-#define MPIR_BCAST_TAG                 2
-#define MPIR_GATHER_TAG                3
-#define MPIR_GATHERV_TAG               4
-#define MPIR_SCATTER_TAG               5
-#define MPIR_SCATTERV_TAG              6
-#define MPIR_ALLGATHER_TAG             7
-#define MPIR_ALLGATHERV_TAG            8
-#define MPIR_ALLTOALL_TAG              9
-#define MPIR_ALLTOALLV_TAG            10
-#define MPIR_REDUCE_TAG               11
-#define MPIR_USER_REDUCE_TAG          12
-#define MPIR_USER_REDUCEA_TAG         13
-#define MPIR_ALLREDUCE_TAG            14
-#define MPIR_USER_ALLREDUCE_TAG       15
-#define MPIR_USER_ALLREDUCEA_TAG      16
-#define MPIR_REDUCE_SCATTER_TAG       17
-#define MPIR_USER_REDUCE_SCATTER_TAG  18
-#define MPIR_USER_REDUCE_SCATTERA_TAG 19
-#define MPIR_SCAN_TAG                 20
-#define MPIR_USER_SCAN_TAG            21
-#define MPIR_USER_SCANA_TAG           22
-#define MPIR_LOCALCOPY_TAG            23
-#define MPIR_EXSCAN_TAG               24
-#define MPIR_ALLTOALLW_TAG            25
-#define MPIR_TOPO_A_TAG               26
-#define MPIR_TOPO_B_TAG               27
-#define MPIR_REDUCE_SCATTER_BLOCK_TAG 28
-#define MPIR_SHRINK_TAG               29
-#define MPIR_AGREE_TAG                30
-#define MPIR_FIRST_NBC_TAG            31
-
-/* These macros must be used carefully. These macros will not work with
- * negative tags. By definition, users are not to use negative tags and the
- * only negative tag in MPICH is MPI_ANY_TAG which is checked seperately, but
- * if there is a time where negative tags become more common, this setup won't
- * work anymore. */
-
-/* This bitmask can be used to manually mask the tag space wherever it might
- * be necessary to do so (for instance in the receive queue */
-#define MPIR_TAG_ERROR_BIT (1 << 30)
-
-/* This bitmask is used to differentiate between a process failure
- * (MPIX_ERR_PROC_FAILED) and any other kind of failure (MPI_ERR_OTHER). */
-#define MPIR_TAG_PROC_FAILURE_BIT (1 << 29)
-
-/* This macro checks the value of the error bit in the MPI tag and returns 1
- * if the tag is set and 0 if it is not. */
-#define MPIR_TAG_CHECK_ERROR_BIT(tag) ((MPIR_TAG_ERROR_BIT & (tag)) == MPIR_TAG_ERROR_BIT ? 1 : 0)
-
-/* This macro checks the value of the process failure bit in the MPI tag and
- * returns 1 if the tag is set and 0 if it is not. */
-#define MPIR_TAG_CHECK_PROC_FAILURE_BIT(tag) ((MPIR_TAG_PROC_FAILURE_BIT & (tag)) == MPIR_TAG_PROC_FAILURE_BIT ? 1 : 0)
-
-/* This macro sets the value of the error bit in the MPI tag to 1 */
-#define MPIR_TAG_SET_ERROR_BIT(tag) ((tag) |= MPIR_TAG_ERROR_BIT)
-
-/* This macro sets the value of the process failure bit in the MPI tag to 1 */
-#define MPIR_TAG_SET_PROC_FAILURE_BIT(tag) ((tag) |= (MPIR_TAG_ERROR_BIT | MPIR_TAG_PROC_FAILURE_BIT))
-
-/* This macro clears the value of the error bits in the MPI tag */
-#define MPIR_TAG_CLEAR_ERROR_BITS(tag) ((tag) &= ~(MPIR_TAG_ERROR_BIT ^ MPIR_TAG_PROC_FAILURE_BIT))
-
-/* This macro masks the value of the error bits in the MPI tag */
-#define MPIR_TAG_MASK_ERROR_BITS(tag) ((tag) & ~(MPIR_TAG_ERROR_BIT ^ MPIR_TAG_PROC_FAILURE_BIT))
-
-/* These functions are used in the implementation of collective and
-   other internal operations. They are wrappers around MPID send/recv
-   functions. They do sends/receives by setting the context offset to
-   MPIR_CONTEXT_INTRA(INTER)_COLL. */
-int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
-                   void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype);
-int MPIC_Wait(MPIR_Request * request_ptr, MPIR_Errflag_t *errflag);
-int MPIC_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status);
-
-/* FT versions of te MPIC_ functions */
-int MPIC_Send(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, int tag,
-                 MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIC_Recv(void *buf, MPI_Aint count, MPI_Datatype datatype, int source, int tag,
-                 MPIR_Comm *comm_ptr, MPI_Status *status, MPIR_Errflag_t *errflag);
-int MPIC_Ssend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, int tag,
-                  MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIC_Sendrecv(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
-                     int dest, int sendtag, void *recvbuf, MPI_Aint recvcount,
-                     MPI_Datatype recvtype, int source, int recvtag,
-                     MPIR_Comm *comm_ptr, MPI_Status *status, MPIR_Errflag_t *errflag);
-int MPIC_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype,
-                             int dest, int sendtag,
-                             int source, int recvtag,
-                             MPIR_Comm *comm_ptr, MPI_Status *status, MPIR_Errflag_t *errflag);
-int MPIC_Isend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, int tag,
-                  MPIR_Comm *comm_ptr, MPIR_Request **request, MPIR_Errflag_t *errflag);
-int MPIC_Issend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, int tag,
-                  MPIR_Comm *comm_ptr, MPIR_Request **request, MPIR_Errflag_t *errflag);
-int MPIC_Irecv(void *buf, MPI_Aint count, MPI_Datatype datatype, int source,
-                  int tag, MPIR_Comm *comm_ptr, MPIR_Request **request);
-int MPIC_Waitall(int numreq, MPIR_Request *requests[], MPI_Status statuses[], MPIR_Errflag_t *errflag);
-
-
-void MPIR_MAXF  ( void *, void *, int *, MPI_Datatype * ) ;
-void MPIR_MINF  ( void *, void *, int *, MPI_Datatype * ) ;
-void MPIR_SUM  ( void *, void *, int *, MPI_Datatype * ) ;
-void MPIR_PROD  ( void *, void *, int *, MPI_Datatype * ) ;
-void MPIR_LAND  ( void *, void *, int *, MPI_Datatype * ) ;
-void MPIR_BAND  ( void *, void *, int *, MPI_Datatype * ) ;
-void MPIR_LOR  ( void *, void *, int *, MPI_Datatype * ) ;
-void MPIR_BOR  ( void *, void *, int *, MPI_Datatype * ) ;
-void MPIR_LXOR  ( void *, void *, int *, MPI_Datatype * ) ;
-void MPIR_BXOR  ( void *, void *, int *, MPI_Datatype * ) ;
-void MPIR_MAXLOC  ( void *, void *, int *, MPI_Datatype * ) ;
-void MPIR_MINLOC  ( void *, void *, int *, MPI_Datatype * ) ;
-void MPIR_REPLACE  ( void *, void *, int *, MPI_Datatype * ) ;
-void MPIR_NO_OP  ( void *, void *, int *, MPI_Datatype * ) ;
-
-int MPIR_MAXF_check_dtype  ( MPI_Datatype ) ;
-int MPIR_MINF_check_dtype ( MPI_Datatype ) ;
-int MPIR_SUM_check_dtype  ( MPI_Datatype ) ;
-int MPIR_PROD_check_dtype  ( MPI_Datatype ) ;
-int MPIR_LAND_check_dtype  ( MPI_Datatype ) ;
-int MPIR_BAND_check_dtype  ( MPI_Datatype ) ;
-int MPIR_LOR_check_dtype  ( MPI_Datatype ) ;
-int MPIR_BOR_check_dtype  ( MPI_Datatype ) ;
-int MPIR_LXOR_check_dtype ( MPI_Datatype ) ;
-int MPIR_BXOR_check_dtype  ( MPI_Datatype ) ;
-int MPIR_MAXLOC_check_dtype  ( MPI_Datatype ) ;
-int MPIR_MINLOC_check_dtype  ( MPI_Datatype ) ;
-int MPIR_REPLACE_check_dtype  ( MPI_Datatype ) ;
-int MPIR_NO_OP_check_dtype  ( MPI_Datatype ) ;
-
-#define MPIR_PREDEF_OP_COUNT 14
-extern MPI_User_function *MPIR_Op_table[];
-
-typedef int (MPIR_Op_check_dtype_fn) ( MPI_Datatype ); 
-extern MPIR_Op_check_dtype_fn *MPIR_Op_check_dtype_table[];
-
-#define MPIR_OP_HDL_TO_FN(op) MPIR_Op_table[((op)&0xf) - 1]
-#define MPIR_OP_HDL_TO_DTYPE_FN(op) MPIR_Op_check_dtype_table[((op)&0xf) - 1]
-
-int MPIR_Type_is_rma_atomic(MPI_Datatype type);
-int MPIR_Compare_equal(const void *a, const void *b, MPI_Datatype type);
-
-int MPIR_Allgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                        void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                        MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                   void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                   MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Allgather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                         void *recvbuf, int recvcount, MPI_Datatype recvtype, 
-                         MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Allgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                         void *recvbuf, int recvcount, MPI_Datatype recvtype, 
-                         MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Allgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                         void *recvbuf, const int *recvcounts, const int *displs,
-                         MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                    void *recvbuf, const int *recvcounts, const int *displs,
-                    MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Allgatherv_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                          void *recvbuf, const int *recvcounts, const int *displs,
-                          MPI_Datatype recvtype, MPIR_Comm *comm_pt, MPIR_Errflag_t *errflag );
-int MPIR_Allgatherv_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                          void *recvbuf, const int *recvcounts, const int *displs,
-                          MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Allreduce_impl(const void *sendbuf, void *recvbuf, int count,
-                        MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Allreduce(const void *sendbuf, void *recvbuf, int count,
-                   MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Allreduce_intra(const void *sendbuf, void *recvbuf, int count,
-                         MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Allreduce_inter(const void *sendbuf, void *recvbuf, int count,
-                        MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Alltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                       void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                       MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                  void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                  MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Alltoall_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                        void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                        MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Alltoall_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                        void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                        MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Alltoallv_impl(const void *sendbuf, const int *sendcnts, const int *sdispls,
-                        MPI_Datatype sendtype, void *recvbuf, const int *recvcnts,
-                        const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
-                        MPIR_Errflag_t *errflag);
-int MPIR_Alltoallv(const void *sendbuf, const int *sendcnts, const int *sdispls,
-                   MPI_Datatype sendtype, void *recvbuf, const int *recvcnts,
-                   const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Alltoallv_intra(const void *sendbuf, const int *sendcnts, const int *sdispls,
-                         MPI_Datatype sendtype, void *recvbuf, const int *recvcnts,
-                         const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
-                         MPIR_Errflag_t *errflag);
-int MPIR_Alltoallv_inter(const void *sendbuf, const int *sendcnts, const int *sdispls,
-                         MPI_Datatype sendtype, void *recvbuf, const int *recvcnts,
-                         const int *rdispls, MPI_Datatype recvtype,
-                         MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Alltoallw_impl(const void *sendbuf, const int *sendcnts, const int *sdispls,
-                        const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcnts,
-                        const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr,
-                        MPIR_Errflag_t *errflag);
-int MPIR_Alltoallw(const void *sendbuf, const int *sendcnts, const int *sdispls,
-                   const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcnts,
-                   const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr,
-                   MPIR_Errflag_t *errflag);
-int MPIR_Alltoallw_intra(const void *sendbuf, const int *sendcnts, const int *sdispls,
-                         const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcnts,
-                         const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr,
-                         MPIR_Errflag_t *errflag);
-int MPIR_Alltoallw_inter(const void *sendbuf, const int *sendcnts, const int *sdispls,
-                         const MPI_Datatype *sendtypes, void *recvbuf,
-                         const int *recvcnts, const int *rdispls, const MPI_Datatype *recvtypes,
-                         MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Bcast_inter(void *buffer, int count, MPI_Datatype datatype,
-		     int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Bcast_intra (void *buffer, int count, MPI_Datatype datatype, int
-                      root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Bcast (void *buffer, int count, MPI_Datatype datatype, int
-                root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Bcast_impl (void *buffer, int count, MPI_Datatype datatype, int
-                root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Exscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                     MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Gather_impl (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
-                      void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                      int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Gather (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
-                 void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                 int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Gather_intra (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
-                       void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                       int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Gather_inter (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
-                       void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                       int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Gatherv (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
-                  void *recvbuf, const int *recvcnts, const int *displs,
-                  MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Gatherv_impl (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
-                       void *recvbuf, const int *recvcnts, const int *displs,
-                       MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Reduce_scatter_impl(const void *sendbuf, void *recvbuf, const int *recvcnts,
-                             MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Reduce_scatter(const void *sendbuf, void *recvbuf, const int *recvcnts,
-                        MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Reduce_scatter_intra(const void *sendbuf, void *recvbuf, const int *recvcnts,
-                              MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Reduce_scatter_inter(const void *sendbuf, void *recvbuf, const int *recvcnts,
-                              MPI_Datatype datatype, MPI_Op op,
-                              MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Reduce_scatter_block_impl(const void *sendbuf, void *recvbuf, int recvcount,
-                                   MPI_Datatype datatype, MPI_Op op, MPIR_Comm
-                                   *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Reduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount,
-                              MPI_Datatype datatype, MPI_Op op, MPIR_Comm
-                              *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Reduce_scatter_block_intra(const void *sendbuf, void *recvbuf, int recvcount,
-                                    MPI_Datatype datatype, MPI_Op op, MPIR_Comm
-                                    *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Reduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int recvcount,
-                                    MPI_Datatype datatype, MPI_Op op, MPIR_Comm
-                                    *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Reduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                     MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Reduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                      MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Reduce_inter (const void *sendbuf, void *recvbuf, int count, MPI_Datatype
-                       datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Scan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-                   MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
-              MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Scatter_impl(const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
-                      void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                      int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Scatter(const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
-                 void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                 int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Scatter_intra(const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
-                       void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                       int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Scatter_inter(const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
-                       void *recvbuf, int recvcnt, MPI_Datatype recvtype,
-                       int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
-int MPIR_Scatterv_impl (const void *sendbuf, const int *sendcnts, const int *displs,
-                        MPI_Datatype sendtype, void *recvbuf, int recvcnt,
-                        MPI_Datatype recvtype, int root, MPIR_Comm
-                        *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Scatterv (const void *sendbuf, const int *sendcnts, const int *displs,
-                   MPI_Datatype sendtype, void *recvbuf, int recvcnt,
-                   MPI_Datatype recvtype, int root, MPIR_Comm
-                   *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Barrier_impl( MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Barrier( MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Barrier_intra( MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-int MPIR_Barrier_inter( MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
-
-int MPIR_Reduce_local_impl(const void *inbuf, void *inoutbuf, int count, MPI_Datatype datatype, MPI_Op op);
-
-int MPIR_Setup_intercomm_localcomm( MPIR_Comm * );
-
-int MPIR_Comm_create( MPIR_Comm ** );
-int MPIR_Comm_create_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr, int tag,
-                           MPIR_Comm ** newcomm);
-
-/* comm_create helper functions, used by both comm_create and comm_create_group */
-int MPIR_Comm_create_calculate_mapping(MPIR_Group  *group_ptr,
-                                       MPIR_Comm   *comm_ptr,
-                                       int        **mapping_out,
-                                       MPIR_Comm **mapping_comm);
-
-int MPIR_Comm_create_map(int local_n,
-                         int remote_n,
-                         int *local_mapping,
-                         int *remote_mapping,
-                         MPIR_Comm *mapping_comm,
-                         MPIR_Comm *newcomm);
-
-/* implements the logic for MPI_Comm_create for intracommunicators only */
-int MPIR_Comm_create_intra(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr,
-                           MPIR_Comm **newcomm_ptr);
-
-
-int MPIR_Comm_commit( MPIR_Comm * );
-
-int MPIR_Comm_is_node_aware( MPIR_Comm * );
-
-int MPIR_Comm_is_node_consecutive( MPIR_Comm *);
-
-int MPIR_Comm_idup_impl(MPIR_Comm *comm_ptr, MPIR_Comm **newcomm, MPIR_Request **reqp);
-
-int MPIR_Comm_shrink(MPIR_Comm *comm_ptr, MPIR_Comm **newcomm_ptr);
-int MPIR_Comm_agree(MPIR_Comm *comm_ptr, int *flag);
-
-int MPIR_Allreduce_group(void *sendbuf, void *recvbuf, int count,
-                         MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr,
-                         MPIR_Group *group_ptr, int tag, MPIR_Errflag_t *errflag);
-int MPIR_Allreduce_group_intra(void *sendbuf, void *recvbuf, int count,
-                               MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr,
-                               MPIR_Group *group_ptr, int tag, MPIR_Errflag_t *errflag);
-
-
-int MPIR_Barrier_group(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr, int tag, MPIR_Errflag_t *errflag);
-
-
-/* topology impl functions */
-int MPIR_Dist_graph_neighbors_count_impl(MPIR_Comm *comm_ptr, int *indegree, int *outdegree, int *weighted);
-int MPIR_Dist_graph_neighbors_impl(MPIR_Comm *comm_ptr,
-                                   int maxindegree, int sources[], int sourceweights[],
-                                   int maxoutdegree, int destinations[], int destweights[]);
-int MPIR_Graph_neighbors_count_impl(MPIR_Comm *comm_ptr, int rank, int *nneighbors);
-int MPIR_Graph_neighbors_impl(MPIR_Comm *comm_ptr, int rank, int maxneighbors, int *neighbors);
-int MPIR_Cart_shift_impl(MPIR_Comm *comm_ptr, int direction, int displ, int *source, int *dest);
-
-/* begin impl functions for NBC */
-int MPIR_Ibarrier_impl(MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Ibcast_impl(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Igather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Igatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Iscatter_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Iscatterv_impl(const void *sendbuf, const int *sendcounts, const int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Iallgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Iallgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Ialltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Ialltoallv_impl(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Ialltoallw_impl(const void *sendbuf, const int *sendcounts, const int *sdispls, const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts, const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Ireduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Iallreduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Ireduce_scatter_impl(const void *sendbuf, void *recvbuf, const int *recvcounts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Ireduce_scatter_block_impl(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Iscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Iexscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPI_Request *request);
-/* end impl functions for NBC */
-
-/* begin impl functions for neighborhood collectives */
-int MPIR_Ineighbor_allgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Ineighbor_allgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Ineighbor_alltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Ineighbor_alltoallv_impl(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Ineighbor_alltoallw_impl(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Neighbor_allgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
-int MPIR_Neighbor_allgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
-int MPIR_Neighbor_alltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
-int MPIR_Neighbor_alltoallv_impl(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
-int MPIR_Neighbor_alltoallw_impl(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm *comm_ptr);
-/* end impl functions for neighborhood collectives */
-
-/* neighborhood collective default algorithms */
-int MPIR_Neighbor_allgather_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
-int MPIR_Neighbor_allgatherv_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
-int MPIR_Neighbor_alltoall_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
-int MPIR_Neighbor_alltoallv_default(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
-int MPIR_Neighbor_alltoallw_default(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm *comm_ptr);
-int MPIR_Ineighbor_allgather_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ineighbor_allgatherv_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ineighbor_alltoall_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ineighbor_alltoallv_default(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ineighbor_alltoallw_default(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm *comm_ptr, MPID_Sched_t s);
-
-/* nonblocking collective default algorithms */
-int MPIR_Ibcast_intra(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ibcast_binomial(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iscatter_for_bcast(void *tmp_buf, int root, MPIR_Comm *comm_ptr, int nbytes, MPID_Sched_t s);
-int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ibcast_scatter_ring_allgather(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ibarrier_intra(MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ibarrier_inter(MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoallv_intra(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoallv_inter(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallreduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallreduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallreduce_naive(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallreduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallreduce_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Igather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Igather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iscatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iscatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iscatterv(const void *sendbuf, const int *sendcounts, const int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_intra(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_inter(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_rec_dbl(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_rec_hlv(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_pairwise(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_block_intra(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_block_rec_hlv(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_block_pairwise(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_block_rec_dbl(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ireduce_scatter_block_noncomm(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoall_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoall_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoall_inplace(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoall_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoall_perm_sr(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoall_pairwise(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgather_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgather_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgather_ring(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgatherv_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgatherv_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgatherv_ring(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgatherv_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iallgatherv_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iscan_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoallw_intra(const void *sendbuf, const int *sendcounts, const int *sdispls, const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts, const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-int MPIR_Ialltoallw_inter(const void *sendbuf, const int *sendcounts, const int *sdispls, const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts, const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr, MPID_Sched_t s);
-
-/* group functionality */
-int MPIR_Group_check_subset(MPIR_Group * group_ptr, MPIR_Comm * comm_ptr);
-
-/* begin impl functions for MPI_T (MPI_T_ right now) */
-int MPIR_T_cvar_handle_alloc_impl(int cvar_index, void *obj_handle, MPI_T_cvar_handle *handle, int *count);
-int MPIR_T_cvar_read_impl(MPI_T_cvar_handle handle, void *buf);
-int MPIR_T_cvar_write_impl(MPI_T_cvar_handle handle, const void *buf);
-int MPIR_T_pvar_session_create_impl(MPI_T_pvar_session *session);
-int MPIR_T_pvar_session_free_impl(MPI_T_pvar_session *session);
-int MPIR_T_pvar_handle_alloc_impl(MPI_T_pvar_session session, int pvar_index, void *obj_handle, MPI_T_pvar_handle *handle, int *count);
-int MPIR_T_pvar_handle_free_impl(MPI_T_pvar_session session, MPI_T_pvar_handle *handle);
-int MPIR_T_pvar_start_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle);
-int MPIR_T_pvar_stop_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle);
-int MPIR_T_pvar_read_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle, void *buf);
-int MPIR_T_pvar_write_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle, const void *buf);
-int MPIR_T_pvar_reset_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle);
-int MPIR_T_pvar_readreset_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle, void *buf);
-int MPIR_T_category_get_cvars_impl(int cat_index, int len, int indices[]);
-int MPIR_T_category_get_pvars_impl(int cat_index, int len, int indices[]);
-int MPIR_T_category_get_categories_impl(int cat_index, int len, int indices[]);
-/* end impl functions for MPI_T (MPI_T_ right now) */
-
-/* MPI-3 "large count" impl routines */
-int MPIR_Get_elements_x_impl(const MPI_Status *status, MPI_Datatype datatype, MPI_Count *elements);
-int MPIR_Status_set_elements_x_impl(MPI_Status *status, MPI_Datatype datatype, MPI_Count count);
-void MPIR_Type_get_extent_x_impl(MPI_Datatype datatype, MPI_Count *lb, MPI_Count *extent);
-void MPIR_Type_get_true_extent_x_impl(MPI_Datatype datatype, MPI_Count *true_lb, MPI_Count *true_extent);
-int MPIR_Type_size_x_impl(MPI_Datatype datatype, MPI_Count *size);
-
-/* random initializers */
-int MPIR_Group_init(void);
-int MPIR_Comm_init(MPIR_Comm *);
-
-
-/* Communicator info hint functions */
-typedef int (*MPIR_Comm_hint_fn_t)(MPIR_Comm *, MPIR_Info *, void *);
-int MPIR_Comm_register_hint(const char *hint_key, MPIR_Comm_hint_fn_t fn, void *state);
-
-#if defined(HAVE_VSNPRINTF) && defined(NEEDS_VSNPRINTF_DECL) && !defined(vsnprintf)
-int vsnprintf(char *str, size_t size, const char *format, va_list ap);
-# endif
-
-/* Routines for determining local and remote processes */
-
-int MPIU_Find_local_and_external(struct MPIR_Comm *comm, int *local_size_p, int *local_rank_p, int **local_ranks_p,
-                                 int *external_size_p, int *external_rank_p, int **external_ranks_p,
-                                 int **intranode_table, int **internode_table_p);
-int MPIU_Get_internode_rank(MPIR_Comm *comm_ptr, int r);
-int MPIU_Get_intranode_rank(MPIR_Comm *comm_ptr, int r);
-
-/* Trivial accessor macros */
-
-#define MPIR_Comm_rank(comm_ptr) ((comm_ptr)->rank)
-#define MPIR_Comm_size(comm_ptr) ((comm_ptr)->local_size)
-#define MPIR_Type_extent_impl(datatype, extent_ptr) MPID_Datatype_get_extent_macro(datatype, *(extent_ptr))
-#define MPIR_Type_size_impl(datatype, size) MPID_Datatype_get_size_macro(datatype, *(size))
-#define MPIR_Test_cancelled_impl(status, flag) *(flag) = MPIR_STATUS_GET_CANCEL_BIT(*(status))
-
-/* MPIR_ functions.  These are versions of MPI_ functions appropriate for calling within MPI */
-int MPIR_Cancel_impl(MPIR_Request *request_ptr);
-struct MPIR_Topology;
-void MPIR_Cart_rank_impl(struct MPIR_Topology *cart_ptr, const int *coords, int *rank);
-int MPIR_Cart_create_impl(MPIR_Comm *comm_ptr, int ndims, const int dims[],
-                          const int periods[], int reorder, MPI_Comm *comm_cart);
-int MPIR_Cart_map_impl(const MPIR_Comm *comm_ptr, int ndims, const int dims[],
-                       const int periodic[], int *newrank);
-int MPIR_Close_port_impl(const char *port_name);
-int MPIR_Open_port_impl(MPIR_Info *info_ptr, char *port_name);
-int MPIR_Info_get_impl(MPIR_Info *info_ptr, const char *key, int valuelen, char *value, int *flag);
-void MPIR_Info_get_nkeys_impl(MPIR_Info *info_ptr, int *nkeys);
-int MPIR_Info_get_nthkey_impl(MPIR_Info *info, int n, char *key);
-void MPIR_Info_get_valuelen_impl(MPIR_Info *info_ptr, const char *key, int *valuelen, int *flag);
-int MPIR_Info_set_impl(MPIR_Info *info_ptr, const char *key, const char *value);
-int MPIR_Info_dup_impl(MPIR_Info *info_ptr, MPIR_Info **new_info_ptr);
-int MPIR_Comm_delete_attr_impl(MPIR_Comm *comm_ptr, MPIR_Keyval *keyval_ptr);
-int MPIR_Comm_create_keyval_impl(MPI_Comm_copy_attr_function *comm_copy_attr_fn,
-                                 MPI_Comm_delete_attr_function *comm_delete_attr_fn,
-                                 int *comm_keyval, void *extra_state);
-int MPIR_Comm_accept_impl(const char * port_name, MPIR_Info * info_ptr, int root,
-                          MPIR_Comm * comm_ptr, MPIR_Comm ** newcomm_ptr);
-int MPIR_Comm_connect_impl(const char * port_name, MPIR_Info * info_ptr, int root,
-                           MPIR_Comm * comm_ptr, MPIR_Comm ** newcomm_ptr);
-int MPIR_Comm_create_errhandler_impl(MPI_Comm_errhandler_function *function,
-                                     MPI_Errhandler *errhandler);
-int MPIR_Comm_dup_impl(MPIR_Comm *comm_ptr, MPIR_Comm **newcomm_ptr);
-int MPIR_Comm_dup_with_info_impl(MPIR_Comm *comm_ptr, MPIR_Info *info_ptr, MPIR_Comm **newcomm_ptr);
-int MPIR_Comm_get_info_impl(MPIR_Comm *comm_ptr, MPIR_Info **info_ptr);
-int MPIR_Comm_set_info_impl(MPIR_Comm *comm_ptr, MPIR_Info *info_ptr);
-int MPIR_Comm_free_impl(MPIR_Comm * comm_ptr);
-void MPIR_Comm_free_keyval_impl(int keyval);
-void MPIR_Comm_get_errhandler_impl(MPIR_Comm *comm_ptr, MPIR_Errhandler **errhandler_ptr);
-void MPIR_Comm_set_errhandler_impl(MPIR_Comm *comm_ptr, MPIR_Errhandler *errhandler_ptr);
-void MPIR_Comm_get_name_impl(MPIR_Comm *comm, char *comm_name, int *resultlen);
-int MPIR_Intercomm_merge_impl(MPIR_Comm *comm_ptr, int high, MPIR_Comm **new_intracomm_ptr);
-int MPIR_Intercomm_create_impl(MPIR_Comm *local_comm_ptr, int local_leader,
-                               MPIR_Comm *peer_comm_ptr, int remote_leader, int tag,
-                               MPIR_Comm **new_intercomm_ptr);
-int MPIR_Comm_group_impl(MPIR_Comm *comm_ptr, MPIR_Group **group_ptr);
-int MPIR_Comm_remote_group_impl(MPIR_Comm *comm_ptr, MPIR_Group **group_ptr);
-int MPIR_Comm_group_failed_impl(MPIR_Comm *comm, MPIR_Group **failed_group_ptr);
-int MPIR_Comm_remote_group_failed_impl(MPIR_Comm *comm, MPIR_Group **failed_group_ptr);
-int MPIR_Comm_split_impl(MPIR_Comm *comm_ptr, int color, int key, MPIR_Comm **newcomm_ptr);
-int MPIR_Comm_split_type_impl(MPIR_Comm *comm_ptr, int split_type, int key, MPIR_Info *info_ptr,
-                              MPIR_Comm **newcomm_ptr);
-int MPIR_Group_compare_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2, int *result);
-int MPIR_Group_difference_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2, MPIR_Group **new_group_ptr);
-int MPIR_Group_excl_impl(MPIR_Group *group_ptr, int n, const int *ranks, MPIR_Group **new_group_ptr);
-int MPIR_Group_free_impl(MPIR_Group *group_ptr);
-int MPIR_Group_incl_impl(MPIR_Group *group_ptr, int n, const int *ranks, MPIR_Group **new_group_ptr);
-int MPIR_Group_intersection_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2, MPIR_Group **new_group_ptr);
-int MPIR_Group_range_excl_impl(MPIR_Group *group_ptr, int n, int ranges[][3], MPIR_Group **new_group_ptr);
-int MPIR_Group_range_incl_impl(MPIR_Group *group_ptr, int n, int ranges[][3], MPIR_Group **new_group_ptr);
-int MPIR_Group_translate_ranks_impl(MPIR_Group *group_ptr1, int n, const int *ranks1,
-                                     MPIR_Group *group_ptr2, int *ranks2);
-int MPIR_Group_union_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2, MPIR_Group **new_group_ptr);
-void MPIR_Get_count_impl(const MPI_Status *status, MPI_Datatype datatype, int *count);
-void MPIR_Grequest_complete_impl(MPIR_Request *request_ptr);
-int MPIR_Grequest_start_impl(MPI_Grequest_query_function *query_fn,
-                             MPI_Grequest_free_function *free_fn,
-                             MPI_Grequest_cancel_function *cancel_fn,
-                             void *extra_state, MPIR_Request **request_ptr);
-int MPIX_Grequest_start_impl(MPI_Grequest_query_function *,
-                             MPI_Grequest_free_function *,
-                             MPI_Grequest_cancel_function *,
-                             MPIX_Grequest_poll_function *,
-                             MPIX_Grequest_wait_function *, void *,
-                             MPIR_Request **);
-int MPIR_Graph_map_impl(const MPIR_Comm *comm_ptr, int nnodes,
-                        const int indx[], const int edges[], int *newrank);
-int MPIR_Type_commit_impl(MPI_Datatype *datatype);
-int MPIR_Type_create_struct_impl(int count,
-                                 const int array_of_blocklengths[],
-                                 const MPI_Aint array_of_displacements[],
-                                 const MPI_Datatype array_of_types[],
-                                 MPI_Datatype *newtype);
-int MPIR_Type_create_indexed_block_impl(int count,
-                                        int blocklength,
-                                        const int array_of_displacements[],
-                                        MPI_Datatype oldtype,
-                                        MPI_Datatype *newtype);
-int MPIR_Type_create_hindexed_block_impl(int count, int blocklength,
-                                         const MPI_Aint array_of_displacements[],
-                                         MPI_Datatype oldtype, MPI_Datatype *newtype);
-int MPIR_Type_contiguous_impl(int count,
-                              MPI_Datatype old_type,
-                              MPI_Datatype *new_type_p);
-int MPIR_Type_contiguous_x_impl(MPI_Count count,
-                              MPI_Datatype old_type,
-                              MPI_Datatype *new_type_p);
-void MPIR_Type_get_extent_impl(MPI_Datatype datatype, MPI_Aint *lb, MPI_Aint *extent);
-void MPIR_Type_get_true_extent_impl(MPI_Datatype datatype, MPI_Aint *true_lb, MPI_Aint *true_extent);
-void MPIR_Type_get_envelope_impl(MPI_Datatype datatype, int *num_integers, int *num_addresses,
-                                 int *num_datatypes, int *combiner);
-int MPIR_Type_hvector_impl(int count, int blocklen, MPI_Aint stride, MPI_Datatype old_type, MPI_Datatype *newtype_p);
-int MPIR_Type_indexed_impl(int count, const int blocklens[], const int indices[],
-                           MPI_Datatype old_type, MPI_Datatype *newtype);
-void MPIR_Type_free_impl(MPI_Datatype *datatype);
-int MPIR_Type_vector_impl(int count, int blocklength, int stride, MPI_Datatype old_type, MPI_Datatype *newtype_p);
-int MPIR_Type_struct_impl(int count, const int blocklens[], const MPI_Aint indices[], const MPI_Datatype old_types[], MPI_Datatype *newtype);
-int MPIR_Pack_impl(const void *inbuf, MPI_Aint incount, MPI_Datatype datatype, void *outbuf, MPI_Aint outcount, MPI_Aint *position);
-void MPIR_Pack_size_impl(int incount, MPI_Datatype datatype, MPI_Aint *size);
-int MPIR_Unpack_impl(const void *inbuf, MPI_Aint insize, MPI_Aint *position,
-                     void *outbuf, int outcount, MPI_Datatype datatype);
-void MPIR_Type_lb_impl(MPI_Datatype datatype, MPI_Aint *displacement);
-int MPIR_Ibsend_impl(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
-                     MPIR_Comm *comm_ptr, MPI_Request *request);
-int MPIR_Test_impl(MPI_Request *request, int *flag, MPI_Status *status);
-int MPIR_Testall_impl(int count, MPI_Request array_of_requests[], int *flag,
-                      MPI_Status array_of_statuses[]);
-int MPIR_Wait_impl(MPI_Request *request, MPI_Status *status);
-int MPIR_Waitall_impl(int count, MPI_Request array_of_requests[],
-                      MPI_Status array_of_statuses[]);
-int MPIR_Comm_set_attr_impl(MPIR_Comm *comm_ptr, int comm_keyval, void *attribute_val,
-                            MPIR_AttrType attrType);
-
-/* Pull the error status out of the tag space and put it into an errflag. */
-#undef FUNCNAME
-#define FUNCNAME MPIR_process_status
-#undef FCNAME
-#define FCNAME MPL_QUOTE(FUNCNAME)
-static inline void MPIR_Process_status(MPI_Status *status, MPIR_Errflag_t *errflag)
-{
-    if (MPI_PROC_NULL != status->MPI_SOURCE &&
-        (MPIX_ERR_REVOKED == MPIR_ERR_GET_CLASS(status->MPI_ERROR) ||
-        MPIX_ERR_PROC_FAILED == MPIR_ERR_GET_CLASS(status->MPI_ERROR) ||
-        MPIR_TAG_CHECK_ERROR_BIT(status->MPI_TAG)) && !*errflag) {
-        /* If the receive was completed within the MPID_Recv, handle the
-        * errflag here. */
-        if (MPIR_TAG_CHECK_PROC_FAILURE_BIT(status->MPI_TAG) ||
-            MPIX_ERR_PROC_FAILED == MPIR_ERR_GET_CLASS(status->MPI_ERROR)) {
-            *errflag = MPIR_ERR_PROC_FAILED;
-            MPIR_TAG_CLEAR_ERROR_BITS(status->MPI_TAG);
-        } else {
-            *errflag = MPIR_ERR_OTHER;
-            MPIR_TAG_CLEAR_ERROR_BITS(status->MPI_TAG);
-        }
-    }
-}
-
-extern const char MPIR_Version_string[];
-extern const char MPIR_Version_date[];
-extern const char MPIR_Version_configure[];
-extern const char MPIR_Version_device[];
-extern const char MPIR_Version_CC[];
-extern const char MPIR_Version_CXX[];
-extern const char MPIR_Version_F77[];
-extern const char MPIR_Version_FC[];
+/*****************************************************************************/
+/******************** PART 6: DEVICE "POST" FUNCTIONALITY ********************/
+/*****************************************************************************/
 
-/* Include definitions from the device which require items defined by this
-   file (mpiimpl.h). */
 #include "mpidpost.h"
 
 /* avoid conflicts in source files with old-style "char FCNAME[]" vars */
diff --git a/src/include/mpiinfo.h b/src/include/mpiinfo.h
deleted file mode 100644
index ebdc6c4..0000000
--- a/src/include/mpiinfo.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- *  (C) 2001 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-extern void MPIU_Info_free( MPIR_Info *info_ptr );
-extern int MPIU_Info_alloc(MPIR_Info **info_p_p);
diff --git a/src/util/assert/mpiassert.h b/src/include/mpir_assert.h
similarity index 97%
rename from src/util/assert/mpiassert.h
rename to src/include/mpir_assert.h
index f06170f..11887ce 100644
--- a/src/util/assert/mpiassert.h
+++ b/src/include/mpir_assert.h
@@ -3,10 +3,10 @@
  *  (C) 2001 by Argonne National Laboratory.
  *      See COPYRIGHT in top-level directory.
  */
-#if !defined(MPIASSERT_H_INCLUDED)
-#define MPIASSERT_H_INCLUDED
+#if !defined(MPIR_ASSERT_H_INCLUDED)
+#define MPIR_ASSERT_H_INCLUDED
 
-#include "mpiu_type_defs.h"
+#include "mpir_type_defs.h"
 
 /* modern versions of clang support lots of C11 features */
 #if defined(__has_extension)
@@ -127,4 +127,4 @@ int MPIR_Assert_fail_fmt(const char *cond, const char *file_name, int line_num,
 #  define MPIU_Static_assert(cond_,msg_) MPIU_Assert_fmt_msg((cond_), ("%s", (msg_)))
 #endif
 
-#endif /* !defined(MPIASSERT_H_INCLUDED) */
+#endif /* !defined(MPIR_ASSERT_H_INCLUDED) */
diff --git a/src/include/mpir_attr.h b/src/include/mpir_attr.h
new file mode 100644
index 0000000..12f295c
--- /dev/null
+++ b/src/include/mpir_attr.h
@@ -0,0 +1,205 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_ATTR_H_INCLUDED
+#define MPIR_ATTR_H_INCLUDED
+
+/* Because Comm, Datatype, and File handles are all ints, and because
+   attributes are otherwise identical between the three types, we
+   only store generic copy and delete functions.  This allows us to use
+   common code for the attribute set, delete, and dup functions */
+/*E
+  MPIR_Copy_function - MPID Structure to hold an attribute copy function
+
+  Notes:
+  The appropriate element of this union is selected by using the language
+  field of the 'keyval'.
+
+  Because 'MPI_Comm', 'MPI_Win', and 'MPI_Datatype' are all 'int's in
+  MPICH, we use a single C copy function rather than have separate
+  ones for the Communicator, Window, and Datatype attributes.
+
+  There are no corresponding typedefs for the Fortran functions.  The
+  F77 function corresponds to the Fortran 77 binding used in MPI-1 and the
+  F90 function corresponds to the Fortran 90 binding used in MPI-2.
+
+  Module:
+  Attribute-DS
+
+  E*/
+int
+MPIR_Attr_copy_c_proxy(
+    MPI_Comm_copy_attr_function* user_function,
+    int handle,
+    int keyval,
+    void* extra_state,
+    MPIR_AttrType attrib_type,
+    void* attrib,
+    void** attrib_copy,
+    int* flag
+    );
+
+typedef struct MPIR_Copy_function {
+  int  (*C_CopyFunction)( int, int, void *, void *, void *, int * );
+  void (*F77_CopyFunction)  ( MPI_Fint *, MPI_Fint *, MPI_Fint *, MPI_Fint *,
+                              MPI_Fint *, MPI_Fint *, MPI_Fint * );
+  void (*F90_CopyFunction)  ( MPI_Fint *, MPI_Fint *, MPI_Aint *, MPI_Aint *,
+                              MPI_Aint *, MPI_Fint *, MPI_Fint * );
+  /* The generic lang-independent user_function and proxy will
+   * replace the lang dependent copy funcs above
+   * Currently the lang-indpendent funcs are used only for keyvals
+   */
+  MPI_Comm_copy_attr_function *user_function;
+  MPIR_Attr_copy_proxy *proxy;
+  /* The C++ function is the same as the C function */
+} MPIR_Copy_function;
+
+/*E
+  MPIR_Delete_function - MPID Structure to hold an attribute delete function
+
+  Notes:
+  The appropriate element of this union is selected by using the language
+  field of the 'keyval'.
+
+  Because 'MPI_Comm', 'MPI_Win', and 'MPI_Datatype' are all 'int's in
+  MPICH, we use a single C delete function rather than have separate
+  ones for the Communicator, Window, and Datatype attributes.
+
+  There are no corresponding typedefs for the Fortran functions.  The
+  F77 function corresponds to the Fortran 77 binding used in MPI-1 and the
+  F90 function corresponds to the Fortran 90 binding used in MPI-2.
+
+  Module:
+  Attribute-DS
+
+  E*/
+int
+MPIR_Attr_delete_c_proxy(
+    MPI_Comm_delete_attr_function* user_function,
+    int handle,
+    int keyval,
+    MPIR_AttrType attrib_type,
+    void* attrib,
+    void* extra_state
+    );
+
+typedef struct MPIR_Delete_function {
+  int  (*C_DeleteFunction)  ( int, int, void *, void * );
+  void (*F77_DeleteFunction)( MPI_Fint *, MPI_Fint *, MPI_Fint *, MPI_Fint *,
+                              MPI_Fint * );
+  void (*F90_DeleteFunction)( MPI_Fint *, MPI_Fint *, MPI_Aint *, MPI_Aint *,
+                              MPI_Fint * );
+  /* The generic lang-independent user_function and proxy will
+   * replace the lang dependent copy funcs above
+   * Currently the lang-indpendent funcs are used only for keyvals
+   */
+  MPI_Comm_delete_attr_function *user_function;
+  MPIR_Attr_delete_proxy *proxy;
+} MPIR_Delete_function;
+
+/*S
+  MPIR_Keyval - Structure of an MPID keyval
+
+  Module:
+  Attribute-DS
+
+  S*/
+typedef struct MPIR_Keyval {
+    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    MPIR_Object_kind     kind;
+    int                  was_freed;
+    void                 *extra_state;
+    MPIR_Copy_function   copyfn;
+    MPIR_Delete_function delfn;
+  /* other, device-specific information */
+#ifdef MPID_DEV_KEYVAL_DECL
+    MPID_DEV_KEYVAL_DECL
+#endif
+} MPIR_Keyval;
+
+#define MPIR_Keyval_add_ref( _keyval )                                  \
+    do {                                                                \
+        MPIU_Object_add_ref( _keyval );                                 \
+    } while(0)
+
+#define MPIR_Keyval_release_ref( _keyval, _inuse )                      \
+    do {                                                                \
+        MPIU_Object_release_ref( _keyval, _inuse );                     \
+    } while(0)
+
+
+/* Attribute values in C/C++ are void * and in Fortran are ADDRESS_SIZED
+   integers.  Normally, these are the same size, but in at least one
+   case, the address-sized integers was selected as longer than void *
+   to work with the datatype code used in the I/O library.  While this
+   is really a limitation in the current Datatype implementation. */
+#ifdef USE_AINT_FOR_ATTRVAL
+typedef MPI_Aint MPIR_AttrVal_t;
+#else
+typedef void * MPIR_AttrVal_t;
+#endif
+
+/* Attributes need no ref count or handle, but since we want to use the
+   common block allocator for them, we must provide those elements
+*/
+/*S
+  MPIR_Attribute - Structure of an MPID attribute
+
+  Notes:
+  Attributes don''t have 'ref_count's because they don''t have reference
+  count semantics.  That is, there are no shallow copies or duplicates
+  of an attibute.  An attribute is copied when the communicator that
+  it is attached to is duplicated.  Subsequent operations, such as
+  'MPI_Comm_attr_free', can change the attribute list for one of the
+  communicators but not the other, making it impractical to keep the
+  same list.  (We could defer making the copy until the list is changed,
+  but even then, there would be no reference count on the individual
+  attributes.)
+
+  A pointer to the keyval, rather than the (integer) keyval itself is
+  used since there is no need within the attribute structure to make
+  it any harder to find the keyval structure.
+
+  The attribute value is a 'void *'.  If 'sizeof(MPI_Fint)' > 'sizeof(void*)',
+  then this must be changed (no such system has been encountered yet).
+  For the Fortran 77 routines in the case where 'sizeof(MPI_Fint)' <
+  'sizeof(void*)', the high end of the 'void *' value is used.  That is,
+  we cast it to 'MPI_Fint *' and use that value.
+
+  MPI defines three kinds of attributes (see MPI 2.1, Section 16.3, pages
+  487-488 (the standard says two, but there are really three, as discussed
+  below).  These are pointer-valued attributes and two types of integer-valued
+  attributes.
+  Pointer-valued attributes are used in C.
+  Integer-valued attributes are used in Fortran.  These are of type either
+  INTEGER or INTEGER(KIND=MPI_ADDRESS_KIND).
+
+  The predefined attributes are a combination of INTEGER and pointers.
+
+  Module:
+  Attribute-DS
+
+ S*/
+typedef struct MPIR_Attribute {
+    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    MPIR_Keyval  *keyval;           /* Keyval structure for this attribute */
+
+    struct MPIR_Attribute *next;    /* Pointer to next in the list */
+    MPIR_AttrType attrType;         /* Type of the attribute */
+    long        pre_sentinal;       /* Used to detect user errors in accessing
+				       the value */
+    MPIR_AttrVal_t value;           /* Stored value. An Aint must be at least
+				       as large as an address - some builds
+				       may make an Aint larger than a void * */
+    long        post_sentinal;      /* Like pre_sentinal */
+    /* other, device-specific information */
+#ifdef MPID_DEV_ATTR_DECL
+    MPID_DEV_ATTR_DECL
+#endif
+} MPIR_Attribute;
+
+#endif /* MPIR_ATTR_H_INCLUDED */
diff --git a/src/include/mpir_attr_generic.h b/src/include/mpir_attr_generic.h
new file mode 100644
index 0000000..7bf6b48
--- /dev/null
+++ b/src/include/mpir_attr_generic.h
@@ -0,0 +1,188 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#ifndef MPIR_ATTR_GENERIC_H_INCLUDED
+#define MPIR_ATTR_GENERIC_H_INCLUDED
+
+/* ------------------------------------------------------------------------- */
+/* Keyvals and attributes */
+/*TKyOverview.tex
+
+  Keyvals are MPI objects that, unlike most MPI objects, are defined to be
+  integers rather than a handle (e.g., 'MPI_Comm').  However, they really
+  `are` MPI opaque objects and are handled by the MPICH implementation in
+  the same way as all other MPI opaque objects.  The only difference is that
+  there is no 'typedef int MPI_Keyval;' in 'mpi.h'.  In particular, keyvals
+  are encoded (for direct and indirect references) in the same way that
+  other MPI opaque objects are
+
+  Each keyval has a copy and a delete function associated with it.
+  Unfortunately, these have a slightly different calling sequence for
+  each language, particularly when the size of a pointer is
+  different from the size of a Fortran integer.  The unions
+  'MPIR_Copy_function' and 'MPIR_Delete_function' capture the differences
+  in a single union type.
+
+  The above comment is out of date but has never been updated as it should
+  have to match the introduction of a different interface.  Beware!
+
+  Notes:
+
+  In the original design, retrieving a attribute from a different
+  language that set it was thought to be an error.  The MPI Forum
+  decided that this should be allowed, and after much discussion, the
+  behavior was defined.  Thus, we need to record what sort of
+  attribute was provided, and be able to properly return the correct
+  value in each case.  See MPI 2.2, Section 16.3.7 (Attributes) for
+  specific requirements.  One consequence of this is that the value
+  that is returned may have a different length that how it was set.
+  On little-endian platforms (e.g., x86), this doesn't cause much of a
+  problem, because the address is that of the least significant byte,
+  and the lower bytes have the data that is needed in the case that
+  the desired attribute type is shorter than the stored attribute.
+  However, on a big-endian platform (e.g., IBM POWER), since the most
+  significant bytes are stored first, depending on the length of the
+  result type, the address of the result may not be the beginning of
+  the memory area.  For example, assume that an MPI_Fint is 4 bytes
+  and a void * (and a Fortran INTEGER of kind MPI_ADDRESS_KIND) is 8
+  bytes, and let the attribute store the value in an 8 byte integer in
+  a field named "value".  On a little-endian platform, the address of
+  the value is always the beginning of the field "value".  On a
+  big-endian platform, the address of the value is the beginning of
+  the field if the return type is a pointer (e.g., from C) or Fortran
+  (KIND=MPI_ADDRESS_KIND), and the address of the beginning of the
+  field + 4 if the return type is a Fortran 77 integer (and, as
+  specified above, an MPI_Fint is 4 bytes shorter than a void *).
+
+  For the big-endian case, it is possible to manage these shifts (using
+  WORDS_LITTLEENDIAN to detect the big-endian case).  Alternatively,
+  at a small cost in space, copies in variables of the correct length
+  can be maintained.  At this writing, the code in src/mpi/attr makes
+  use of WORDS_LITTLEENDIAN to provide the appropriate code for the most
+  common cases.
+
+  T*/
+/*TAttrOverview.tex
+ *
+ * The MPI standard allows `attributes`, essentially an '(integer,pointer)'
+ * pair, to be attached to communicators, windows, and datatypes.
+ * The integer is a `keyval`, which is allocated by a call (at the MPI level)
+ * to 'MPI_Comm/Type/Win_create_keyval'.  The pointer is the value of
+ * the attribute.
+ * Attributes are primarily intended for use by the user, for example, to save
+ * information on a communicator, but can also be used to pass data to the
+ * MPI implementation.  For example, an attribute may be used to pass
+ * Quality of Service information to an implementation to be used with
+ * communication on a particular communicator.
+ * To provide the most general access by the ADI to all attributes, the
+ * ADI defines a collection of routines that are used by the implementation
+ * of the MPI attribute routines (such as 'MPI_Comm_get_attr').
+ * In addition, the MPI routines involving attributes will invoke the
+ * corresponding 'hook' functions (e.g., 'MPID_Dev_comm_attr_set_hook')
+ * should the device define them.
+ *
+ * Attributes on windows and datatypes are defined by MPI but not of
+ * interest (as yet) to the device.
+ *
+ * In addition, there are seven predefined attributes that the device must
+ * supply to the implementation.  This is accomplished through
+ * data values that are part of the 'MPIR_Process' data block.
+ *  The predefined keyvals on 'MPI_COMM_WORLD' are\:
+ *.vb
+ * Keyval                     Related Module
+ * MPI_APPNUM                 Dynamic
+ * MPI_HOST                   Core
+ * MPI_IO                     Core
+ * MPI_LASTUSEDCODE           Error
+ * MPI_TAG_UB                 Communication
+ * MPI_UNIVERSE_SIZE          Dynamic
+ * MPI_WTIME_IS_GLOBAL        Timer
+ *.ve
+ * The values stored in the 'MPIR_Process' block are the actual values.  For
+ * example, the value of 'MPI_TAG_UB' is the integer value of the largest tag.
+ * The
+ * value of 'MPI_WTIME_IS_GLOBAL' is a '1' for true and '0' for false.  Likely
+ * values for 'MPI_IO' and 'MPI_HOST' are 'MPI_ANY_SOURCE' and 'MPI_PROC_NULL'
+ * respectively.
+ *
+ T*/
+
+/* bit 0 distinguishes between pointers (0) and integers (1) */
+typedef enum {
+    MPIR_ATTR_PTR=0,
+    MPIR_ATTR_AINT=1,
+    MPIR_ATTR_INT=3
+} MPIR_AttrType;
+
+#define MPIR_ATTR_KIND(_a) (_a & 0x1)
+
+int MPIR_CommSetAttr( MPI_Comm, int, void *, MPIR_AttrType );
+int MPIR_TypeSetAttr( MPI_Datatype, int, void *, MPIR_AttrType );
+int MPIR_WinSetAttr( MPI_Win, int, void *, MPIR_AttrType );
+int MPIR_CommGetAttr( MPI_Comm, int, void *, int *, MPIR_AttrType );
+int MPIR_TypeGetAttr( MPI_Datatype, int, void *, int *, MPIR_AttrType );
+int MPIR_WinGetAttr( MPI_Win, int, void *, int *, MPIR_AttrType );
+
+int MPIR_CommGetAttr_fort( MPI_Comm, int, void *, int *, MPIR_AttrType );
+
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*E
+  Language bindings for MPI
+
+  A few operations in MPI need to know how to marshal the callback into the calling
+  lanuage calling convention. The marshaling code is provided by a thunk layer which
+  implements the correct behavior.  Examples of these callback functions are the
+  keyval attribute copy and delete functions.
+
+  Module:
+  Attribute-DS
+  E*/
+
+/*
+ * Support bindings for Attribute copy/del callbacks
+ * Consolidate Comm/Type/Win attribute functions together as the handle type is the same
+ * use MPI_Comm for the prototypes
+ */
+typedef
+int
+(MPIR_Attr_copy_proxy)(
+    MPI_Comm_copy_attr_function* user_function,
+    int handle,
+    int keyval,
+    void* extra_state,
+    MPIR_AttrType attrib_type,
+    void* attrib,
+    void** attrib_copy,
+    int* flag
+    );
+
+typedef
+int
+(MPIR_Attr_delete_proxy)(
+    MPI_Comm_delete_attr_function* user_function,
+    int handle,
+    int keyval,
+    MPIR_AttrType attrib_type,
+    void* attrib,
+    void* extra_state
+    );
+
+void
+MPIR_Keyval_set_proxy(
+    int keyval,
+    MPIR_Attr_copy_proxy copy_proxy,
+    MPIR_Attr_delete_proxy delete_proxy
+    );
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* MPIR_ATTR_GENERIC_H_INCLUDED */
diff --git a/src/include/mpibsend.h b/src/include/mpir_bsend.h
similarity index 96%
rename from src/include/mpibsend.h
rename to src/include/mpir_bsend.h
index 8a3bf5d..227174d 100644
--- a/src/include/mpibsend.h
+++ b/src/include/mpir_bsend.h
@@ -5,8 +5,8 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-#ifndef MPIBSEND_H_INCLUDED
-#define MPIBSEND_H_INCLUDED
+#ifndef MPIR_BSEND_H_INCLUDED
+#define MPIR_BSEND_H_INCLUDED
 
 /* This file is separated out as it is used by the configure script to
  * find the Bsend overhead value. */
@@ -73,4 +73,4 @@ typedef struct MPIR_Bsend_data {
                                           shares double alignment */
 } MPIR_Bsend_data_t;
 
-#endif /* MPIBSEND_H_INCLUDED */
+#endif /* MPIR_BSEND_H_INCLUDED */
diff --git a/src/include/mpir_coll.h b/src/include/mpir_coll.h
new file mode 100644
index 0000000..ff6c1e1
--- /dev/null
+++ b/src/include/mpir_coll.h
@@ -0,0 +1,451 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_COLL_H_INCLUDED
+#define MPIR_COLL_H_INCLUDED
+
+/* Collective operations */
+typedef struct MPIR_Collops {
+    int ref_count;   /* Supports lazy copies */
+    /* Contains pointers to the functions for the MPI collectives */
+    int (*Barrier) (MPIR_Comm *, MPIR_Errflag_t *);
+    int (*Bcast) (void*, int, MPI_Datatype, int, MPIR_Comm *, MPIR_Errflag_t *);
+    int (*Gather) (const void*, int, MPI_Datatype, void*, int, MPI_Datatype,
+                   int, MPIR_Comm *, MPIR_Errflag_t *);
+    int (*Gatherv) (const void*, int, MPI_Datatype, void*, const int *, const int *,
+                    MPI_Datatype, int, MPIR_Comm *, MPIR_Errflag_t *);
+    int (*Scatter) (const void*, int, MPI_Datatype, void*, int, MPI_Datatype,
+                    int, MPIR_Comm *, MPIR_Errflag_t *);
+    int (*Scatterv) (const void*, const int *, const int *, MPI_Datatype,
+                     void*, int, MPI_Datatype, int, MPIR_Comm *, MPIR_Errflag_t *);
+    int (*Allgather) (const void*, int, MPI_Datatype, void*, int,
+                      MPI_Datatype, MPIR_Comm *, MPIR_Errflag_t *);
+    int (*Allgatherv) (const void*, int, MPI_Datatype, void*, const int *,
+                       const int *, MPI_Datatype, MPIR_Comm *, MPIR_Errflag_t *);
+    int (*Alltoall) (const void*, int, MPI_Datatype, void*, int, MPI_Datatype,
+                               MPIR_Comm *, MPIR_Errflag_t *);
+    int (*Alltoallv) (const void*, const int *, const int *, MPI_Datatype,
+                      void*, const int *, const int *, MPI_Datatype, MPIR_Comm *,
+                      MPIR_Errflag_t *);
+    int (*Alltoallw) (const void*, const int *, const int *, const MPI_Datatype *, void*,
+                      const int *, const int *, const MPI_Datatype *, MPIR_Comm *, MPIR_Errflag_t *);
+    int (*Reduce) (const void*, void*, int, MPI_Datatype, MPI_Op, int,
+                   MPIR_Comm *, MPIR_Errflag_t *);
+    int (*Allreduce) (const void*, void*, int, MPI_Datatype, MPI_Op,
+                      MPIR_Comm *, MPIR_Errflag_t *);
+    int (*Reduce_scatter) (const void*, void*, const int *, MPI_Datatype, MPI_Op,
+                           MPIR_Comm *, MPIR_Errflag_t *);
+    int (*Scan) (const void*, void*, int, MPI_Datatype, MPI_Op, MPIR_Comm *, MPIR_Errflag_t * );
+    int (*Exscan) (const void*, void*, int, MPI_Datatype, MPI_Op, MPIR_Comm *, MPIR_Errflag_t * );
+    int (*Reduce_scatter_block) (const void*, void*, int, MPI_Datatype, MPI_Op,
+                           MPIR_Comm *, MPIR_Errflag_t *);
+
+    /* MPI-3 nonblocking collectives */
+    int (*Ibarrier_sched)(MPIR_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Ibcast_sched)(void *buffer, int count, MPI_Datatype datatype, int root,
+                  MPIR_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Igather_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+                   int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr,
+                   MPID_Sched_t s);
+    int (*Igatherv_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+                    const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root,
+                    MPIR_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Iscatter_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+                    int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr,
+                    MPID_Sched_t s);
+    int (*Iscatterv_sched)(const void *sendbuf, const int *sendcounts, const int *displs,
+                     MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                     int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Iallgather_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+                      int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
+                      MPID_Sched_t s);
+    int (*Iallgatherv_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+                       const int *recvcounts, const int *displs, MPI_Datatype recvtype,
+                       MPIR_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Ialltoall_sched)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+                     int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
+                     MPID_Sched_t s);
+    int (*Ialltoallv_sched)(const void *sendbuf, const int *sendcounts, const int *sdispls,
+                      MPI_Datatype sendtype, void *recvbuf, const int *recvcounts,
+                      const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
+                      MPID_Sched_t s);
+    int (*Ialltoallw_sched)(const void *sendbuf, const int *sendcounts, const int *sdispls,
+                      const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts,
+                      const int *rdispls, const MPI_Datatype *recvtypes,
+                      MPIR_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Ireduce_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
+                   int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Iallreduce_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+                      MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Ireduce_scatter_sched)(const void *sendbuf, void *recvbuf, const int *recvcounts,
+                           MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Ireduce_scatter_block_sched)(const void *sendbuf, void *recvbuf, int recvcount,
+                                 MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr,
+                                 MPID_Sched_t s);
+    int (*Iscan_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
+                 MPIR_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Iexscan_sched)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
+                   MPIR_Comm *comm_ptr, MPID_Sched_t s);
+
+    struct MPIR_Collops *prev_coll_fns; /* when overriding this table, set this to point to the old table */
+
+    /* MPI-3 neighborhood collectives (blocking & nonblocking) */
+    int (*Neighbor_allgather)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                              void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                              MPIR_Comm *comm_ptr);
+    int (*Neighbor_allgatherv)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                               void *recvbuf, const int recvcounts[], const int displs[],
+                               MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
+    int (*Neighbor_alltoall)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                             void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                             MPIR_Comm *comm_ptr);
+    int (*Neighbor_alltoallv)(const void *sendbuf, const int sendcounts[], const int sdispls[],
+                              MPI_Datatype sendtype, void *recvbuf, const int recvcounts[],
+                              const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
+    int (*Neighbor_alltoallw)(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[],
+                              const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[],
+                              const MPI_Aint rdispls[], const MPI_Datatype recvtypes[],
+                              MPIR_Comm *comm_ptr);
+    int (*Ineighbor_allgather)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                               void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                               MPIR_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Ineighbor_allgatherv)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                                void *recvbuf, const int recvcounts[], const int displs[],
+                                MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Ineighbor_alltoall)(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                              void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                              MPIR_Comm *comm_ptr, MPID_Sched_t s);
+    int (*Ineighbor_alltoallv)(const void *sendbuf, const int sendcounts[], const int sdispls[],
+                               MPI_Datatype sendtype, void *recvbuf, const int recvcounts[],
+                               const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
+                               MPID_Sched_t s);
+    int (*Ineighbor_alltoallw)(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[],
+                               const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[],
+                               const MPI_Aint rdispls[], const MPI_Datatype recvtypes[],
+                               MPIR_Comm *comm_ptr, MPID_Sched_t s);
+} MPIR_Collops;
+
+
+/* Internal point-to-point communication for collectives */
+/* These functions are used in the implementation of collective and
+   other internal operations. They are wrappers around MPID send/recv
+   functions. They do sends/receives by setting the context offset to
+   MPIR_CONTEXT_INTRA(INTER)_COLL. */
+int MPIC_Wait(MPIR_Request * request_ptr, MPIR_Errflag_t *errflag);
+int MPIC_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status);
+
+int MPIC_Send(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, int tag,
+                 MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIC_Recv(void *buf, MPI_Aint count, MPI_Datatype datatype, int source, int tag,
+                 MPIR_Comm *comm_ptr, MPI_Status *status, MPIR_Errflag_t *errflag);
+int MPIC_Ssend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, int tag,
+                  MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIC_Sendrecv(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
+                     int dest, int sendtag, void *recvbuf, MPI_Aint recvcount,
+                     MPI_Datatype recvtype, int source, int recvtag,
+                     MPIR_Comm *comm_ptr, MPI_Status *status, MPIR_Errflag_t *errflag);
+int MPIC_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype,
+                             int dest, int sendtag,
+                             int source, int recvtag,
+                             MPIR_Comm *comm_ptr, MPI_Status *status, MPIR_Errflag_t *errflag);
+int MPIC_Isend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, int tag,
+                  MPIR_Comm *comm_ptr, MPIR_Request **request, MPIR_Errflag_t *errflag);
+int MPIC_Issend(const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest, int tag,
+                  MPIR_Comm *comm_ptr, MPIR_Request **request, MPIR_Errflag_t *errflag);
+int MPIC_Irecv(void *buf, MPI_Aint count, MPI_Datatype datatype, int source,
+                  int tag, MPIR_Comm *comm_ptr, MPIR_Request **request);
+int MPIC_Waitall(int numreq, MPIR_Request *requests[], MPI_Status statuses[], MPIR_Errflag_t *errflag);
+
+
+/* Collective fallback implementations */
+int MPIR_Allgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                        void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                        MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                   void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                   MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Allgather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                         void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                         MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Allgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                         void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                         MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Allgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                         void *recvbuf, const int *recvcounts, const int *displs,
+                         MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                    void *recvbuf, const int *recvcounts, const int *displs,
+                    MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Allgatherv_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                          void *recvbuf, const int *recvcounts, const int *displs,
+                          MPI_Datatype recvtype, MPIR_Comm *comm_pt, MPIR_Errflag_t *errflag );
+int MPIR_Allgatherv_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                          void *recvbuf, const int *recvcounts, const int *displs,
+                          MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Allreduce_impl(const void *sendbuf, void *recvbuf, int count,
+                        MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Allreduce(const void *sendbuf, void *recvbuf, int count,
+                   MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Allreduce_intra(const void *sendbuf, void *recvbuf, int count,
+                         MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Allreduce_inter(const void *sendbuf, void *recvbuf, int count,
+                        MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Alltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                       void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                       MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                  void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                  MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Alltoall_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                        void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                        MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Alltoall_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                        void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                        MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Alltoallv_impl(const void *sendbuf, const int *sendcnts, const int *sdispls,
+                        MPI_Datatype sendtype, void *recvbuf, const int *recvcnts,
+                        const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
+                        MPIR_Errflag_t *errflag);
+int MPIR_Alltoallv(const void *sendbuf, const int *sendcnts, const int *sdispls,
+                   MPI_Datatype sendtype, void *recvbuf, const int *recvcnts,
+                   const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Alltoallv_intra(const void *sendbuf, const int *sendcnts, const int *sdispls,
+                         MPI_Datatype sendtype, void *recvbuf, const int *recvcnts,
+                         const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr,
+                         MPIR_Errflag_t *errflag);
+int MPIR_Alltoallv_inter(const void *sendbuf, const int *sendcnts, const int *sdispls,
+                         MPI_Datatype sendtype, void *recvbuf, const int *recvcnts,
+                         const int *rdispls, MPI_Datatype recvtype,
+                         MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Alltoallw_impl(const void *sendbuf, const int *sendcnts, const int *sdispls,
+                        const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcnts,
+                        const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr,
+                        MPIR_Errflag_t *errflag);
+int MPIR_Alltoallw(const void *sendbuf, const int *sendcnts, const int *sdispls,
+                   const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcnts,
+                   const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr,
+                   MPIR_Errflag_t *errflag);
+int MPIR_Alltoallw_intra(const void *sendbuf, const int *sendcnts, const int *sdispls,
+                         const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcnts,
+                         const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr,
+                         MPIR_Errflag_t *errflag);
+int MPIR_Alltoallw_inter(const void *sendbuf, const int *sendcnts, const int *sdispls,
+                         const MPI_Datatype *sendtypes, void *recvbuf,
+                         const int *recvcnts, const int *rdispls, const MPI_Datatype *recvtypes,
+                         MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Bcast_inter(void *buffer, int count, MPI_Datatype datatype,
+		     int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Bcast_intra (void *buffer, int count, MPI_Datatype datatype, int
+                      root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Bcast (void *buffer, int count, MPI_Datatype datatype, int
+                root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Bcast_impl (void *buffer, int count, MPI_Datatype datatype, int
+                root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+                MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Exscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+                     MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Gather_impl (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
+                      void *recvbuf, int recvcnt, MPI_Datatype recvtype,
+                      int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Gather (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
+                 void *recvbuf, int recvcnt, MPI_Datatype recvtype,
+                 int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Gather_intra (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
+                       void *recvbuf, int recvcnt, MPI_Datatype recvtype,
+                       int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Gather_inter (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
+                       void *recvbuf, int recvcnt, MPI_Datatype recvtype,
+                       int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Gatherv (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
+                  void *recvbuf, const int *recvcnts, const int *displs,
+                  MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Gatherv_impl (const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
+                       void *recvbuf, const int *recvcnts, const int *displs,
+                       MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Reduce_scatter_impl(const void *sendbuf, void *recvbuf, const int *recvcnts,
+                             MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Reduce_scatter(const void *sendbuf, void *recvbuf, const int *recvcnts,
+                        MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Reduce_scatter_intra(const void *sendbuf, void *recvbuf, const int *recvcnts,
+                              MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Reduce_scatter_inter(const void *sendbuf, void *recvbuf, const int *recvcnts,
+                              MPI_Datatype datatype, MPI_Op op,
+                              MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Reduce_scatter_block_impl(const void *sendbuf, void *recvbuf, int recvcount,
+                                   MPI_Datatype datatype, MPI_Op op, MPIR_Comm
+                                   *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Reduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount,
+                              MPI_Datatype datatype, MPI_Op op, MPIR_Comm
+                              *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Reduce_scatter_block_intra(const void *sendbuf, void *recvbuf, int recvcount,
+                                    MPI_Datatype datatype, MPI_Op op, MPIR_Comm
+                                    *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Reduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int recvcount,
+                                    MPI_Datatype datatype, MPI_Op op, MPIR_Comm
+                                    *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Reduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+                     MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+                MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Reduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+                      MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Reduce_inter (const void *sendbuf, void *recvbuf, int count, MPI_Datatype
+                       datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Scan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+                   MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
+              MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Scatter_impl(const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
+                      void *recvbuf, int recvcnt, MPI_Datatype recvtype,
+                      int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Scatter(const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
+                 void *recvbuf, int recvcnt, MPI_Datatype recvtype,
+                 int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Scatter_intra(const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
+                       void *recvbuf, int recvcnt, MPI_Datatype recvtype,
+                       int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Scatter_inter(const void *sendbuf, int sendcnt, MPI_Datatype sendtype,
+                       void *recvbuf, int recvcnt, MPI_Datatype recvtype,
+                       int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag );
+int MPIR_Scatterv_impl (const void *sendbuf, const int *sendcnts, const int *displs,
+                        MPI_Datatype sendtype, void *recvbuf, int recvcnt,
+                        MPI_Datatype recvtype, int root, MPIR_Comm
+                        *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Scatterv (const void *sendbuf, const int *sendcnts, const int *displs,
+                   MPI_Datatype sendtype, void *recvbuf, int recvcnt,
+                   MPI_Datatype recvtype, int root, MPIR_Comm
+                   *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Barrier_impl( MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Barrier( MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Barrier_intra( MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Barrier_inter( MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag);
+int MPIR_Reduce_local_impl(const void *inbuf, void *inoutbuf, int count, MPI_Datatype datatype, MPI_Op op);
+
+
+/* group collectives */
+int MPIR_Allreduce_group(void *sendbuf, void *recvbuf, int count,
+                         MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr,
+                         MPIR_Group *group_ptr, int tag, MPIR_Errflag_t *errflag);
+int MPIR_Allreduce_group_intra(void *sendbuf, void *recvbuf, int count,
+                               MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr,
+                               MPIR_Group *group_ptr, int tag, MPIR_Errflag_t *errflag);
+
+
+int MPIR_Barrier_group(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr, int tag, MPIR_Errflag_t *errflag);
+
+
+/* impl functions for NBC */
+int MPIR_Ibarrier_impl(MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Ibcast_impl(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Igather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Igatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Iscatter_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Iscatterv_impl(const void *sendbuf, const int *sendcounts, const int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Iallgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Iallgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Ialltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Ialltoallv_impl(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Ialltoallw_impl(const void *sendbuf, const int *sendcounts, const int *sdispls, const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts, const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Ireduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Iallreduce_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Ireduce_scatter_impl(const void *sendbuf, void *recvbuf, const int *recvcounts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Ireduce_scatter_block_impl(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Iscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Iexscan_impl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPI_Request *request);
+
+
+/* impl functions for neighborhood collectives */
+int MPIR_Ineighbor_allgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Ineighbor_allgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Ineighbor_alltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Ineighbor_alltoallv_impl(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Ineighbor_alltoallw_impl(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Neighbor_allgather_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
+int MPIR_Neighbor_allgatherv_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
+int MPIR_Neighbor_alltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
+int MPIR_Neighbor_alltoallv_impl(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
+int MPIR_Neighbor_alltoallw_impl(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm *comm_ptr);
+
+
+/* neighborhood collective default algorithms */
+int MPIR_Neighbor_allgather_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
+int MPIR_Neighbor_allgatherv_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
+int MPIR_Neighbor_alltoall_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
+int MPIR_Neighbor_alltoallv_default(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr);
+int MPIR_Neighbor_alltoallw_default(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm *comm_ptr);
+int MPIR_Ineighbor_allgather_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ineighbor_allgatherv_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ineighbor_alltoall_default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ineighbor_alltoallv_default(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ineighbor_alltoallw_default(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPIR_Comm *comm_ptr, MPID_Sched_t s);
+
+
+/* nonblocking collective default algorithms */
+int MPIR_Ibcast_intra(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ibcast_binomial(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ibcast_SMP(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iscatter_for_bcast(void *tmp_buf, int root, MPIR_Comm *comm_ptr, int nbytes, MPID_Sched_t s);
+int MPIR_Ibcast_scatter_rec_dbl_allgather(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ibcast_scatter_ring_allgather(void *buffer, int count, MPI_Datatype datatype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ibarrier_intra(MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ibarrier_inter(MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_redscat_gather(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ialltoallv_intra(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ialltoallv_inter(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallreduce_intra(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallreduce_inter(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallreduce_naive(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallreduce_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallreduce_redscat_allgather(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallreduce_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Igather_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Igather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Igather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iscatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iscatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iscatterv(const void *sendbuf, const int *sendcounts, const int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_scatter_intra(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_scatter_inter(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_scatter_rec_dbl(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_scatter_rec_hlv(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_scatter_pairwise(const void *sendbuf, void *recvbuf, const int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_scatter_block_intra(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_scatter_block_inter(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_scatter_block_rec_hlv(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_scatter_block_pairwise(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_scatter_block_rec_dbl(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ireduce_scatter_block_noncomm(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ialltoall_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ialltoall_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ialltoall_inplace(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ialltoall_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ialltoall_perm_sr(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ialltoall_pairwise(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallgather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallgather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallgather_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallgather_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallgather_ring(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallgatherv_rec_dbl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallgatherv_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallgatherv_ring(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallgatherv_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iallgatherv_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iscan_rec_dbl(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iscan_SMP(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ialltoallw_intra(const void *sendbuf, const int *sendcounts, const int *sdispls, const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts, const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+int MPIR_Ialltoallw_inter(const void *sendbuf, const int *sendcounts, const int *sdispls, const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts, const int *rdispls, const MPI_Datatype *recvtypes, MPIR_Comm *comm_ptr, MPID_Sched_t s);
+
+#endif /* MPIR_COLL_H_INCLUDED */
diff --git a/src/include/mpir_comm.h b/src/include/mpir_comm.h
new file mode 100644
index 0000000..57bd1ed
--- /dev/null
+++ b/src/include/mpir_comm.h
@@ -0,0 +1,365 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_COMM_H_INCLUDED
+#define MPIR_COMM_H_INCLUDED
+
+#if defined HAVE_LIBHCOLL
+#include "../mpid/common/hcoll/hcollpre.h"
+#endif
+
+/*E
+  MPIR_Comm_kind_t - Name the two types of communicators
+  E*/
+typedef enum MPIR_Comm_kind_t {
+    MPIR_COMM_KIND__INTRACOMM = 0,
+    MPIR_COMM_KIND__INTERCOMM = 1
+} MPIR_Comm_kind_t;
+
+/* ideally we could add these to MPIR_Comm_kind_t, but there's too much existing
+ * code that assumes that the only valid values are INTRACOMM or INTERCOMM */
+typedef enum MPIR_Comm_hierarchy_kind_t {
+    MPIR_COMM_HIERARCHY_KIND__FLAT = 0,        /* no hierarchy */
+    MPIR_COMM_HIERARCHY_KIND__PARENT = 1,      /* has subcommunicators */
+    MPIR_COMM_HIERARCHY_KIND__NODE_ROOTS = 2,  /* is the subcomm for node roots */
+    MPIR_COMM_HIERARCHY_KIND__NODE = 3,        /* is the subcomm for a node */
+    MPIR_COMM_HIERARCHY_KIND__SIZE             /* cardinality of this enum */
+} MPIR_Comm_hierarchy_kind_t;
+
+typedef enum {
+    MPIR_COMM_MAP_TYPE__DUP,
+    MPIR_COMM_MAP_TYPE__IRREGULAR
+} MPIR_Comm_map_type_t;
+
+/* direction of mapping: local to local, local to remote, remote to
+ * local, remote to remote */
+typedef enum {
+    MPIR_COMM_MAP_DIR__L2L,
+    MPIR_COMM_MAP_DIR__L2R,
+    MPIR_COMM_MAP_DIR__R2L,
+    MPIR_COMM_MAP_DIR__R2R
+} MPIR_Comm_map_dir_t;
+
+typedef struct MPIR_Comm_map {
+    MPIR_Comm_map_type_t type;
+
+    struct MPIR_Comm *src_comm;
+
+    /* mapping direction for intercomms, which contain local and
+     * remote groups */
+    MPIR_Comm_map_dir_t dir;
+
+    /* only valid for irregular map type */
+    int src_mapping_size;
+    int *src_mapping;
+    int free_mapping;       /* we allocated the mapping */
+
+    struct MPIR_Comm_map *next;
+} MPIR_Comm_map_t;
+
+int MPIR_Comm_map_irregular(struct MPIR_Comm *newcomm, struct MPIR_Comm *src_comm,
+                            int *src_mapping, int src_mapping_size,
+                            MPIR_Comm_map_dir_t dir,
+                            MPIR_Comm_map_t **map);
+int MPIR_Comm_map_dup(struct MPIR_Comm *newcomm, struct MPIR_Comm *src_comm,
+                      MPIR_Comm_map_dir_t dir);
+int MPIR_Comm_map_free(struct MPIR_Comm *comm);
+
+/*S
+  MPIR_Comm - Description of the Communicator data structure
+
+  Notes:
+  Note that the size and rank duplicate data in the groups that
+  make up this communicator.  These are used often enough that this
+  optimization is valuable.
+
+  This definition provides only a 16-bit integer for context id''s .
+  This should be sufficient for most applications.  However, extending
+  this to a 32-bit (or longer) integer should be easy.
+
+  There are two context ids.  One is used for sending and one for
+  receiving.  In the case of an Intracommunicator, they are the same
+  context id.  They differ in the case of intercommunicators, where
+  they may come from processes in different comm worlds (in the
+  case of MPI-2 dynamic process intercomms).
+
+  The virtual connection table is an explicit member of this structure.
+  This contains the information used to contact a particular process,
+  indexed by the rank relative to this communicator.
+
+  Groups are allocated lazily.  That is, the group pointers may be
+  null, created only when needed by a routine such as 'MPI_Comm_group'.
+  The local process ids needed to form the group are available within
+  the virtual connection table.
+  For intercommunicators, we may want to always have the groups.  If not,
+  we either need the 'local_group' or we need a virtual connection table
+  corresponding to the 'local_group' (we may want this anyway to simplify
+  the implementation of the intercommunicator collective routines).
+
+  The pointer to the structure 'MPIR_Collops' containing pointers to the
+  collective
+  routines allows an implementation to replace each routine on a
+  routine-by-routine basis.  By default, this pointer is null, as are the
+  pointers within the structure.  If either pointer is null, the implementation
+  uses the generic provided implementation.  This choice, rather than
+  initializing the table with pointers to all of the collective routines,
+  is made to reduce the space used in the communicators and to eliminate the
+  need to include the implementation of all collective routines in all MPI
+  executables, even if the routines are not used.
+
+  The macro 'MPID_HAS_HETERO' may be defined by a device to indicate that
+  the device supports MPI programs that must communicate between processes with
+  different data representations (e.g., different sized integers or different
+  byte orderings).  If the device does need to define this value, it should
+  be defined in the file 'mpidpre.h'.
+
+  Please note that the local_size and remote_size fields can be confusing.  For
+  intracommunicators both fields are always equal to the size of the
+  communicator.  For intercommunicators local_size is equal to the size of
+  local_group while remote_size is equal to the size of remote_group.
+
+  Module:
+  Communicator-DS
+
+  Question:
+  For fault tolerance, do we want to have a standard field for communicator
+  health?  For example, ok, failure detected, all (live) members of failed
+  communicator have acked.
+  S*/
+struct MPIR_Comm {
+    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    MPID_Thread_mutex_t mutex;
+    MPIU_Context_id_t context_id; /* Send context id.  See notes */
+    MPIU_Context_id_t recvcontext_id; /* Send context id.  See notes */
+    int           remote_size;   /* Value of MPI_Comm_(remote)_size */
+    int           rank;          /* Value of MPI_Comm_rank */
+    MPIR_Attribute *attributes;  /* List of attributes */
+    int           local_size;    /* Value of MPI_Comm_size for local group */
+    MPIR_Group   *local_group,   /* Groups in communicator. */
+                 *remote_group;  /* The local and remote groups are the
+                                    same for intra communicators */
+    MPIR_Comm_kind_t comm_kind;  /* MPIR_COMM_KIND__INTRACOMM or MPIR_COMM_KIND__INTERCOMM */
+    char          name[MPI_MAX_OBJECT_NAME];  /* Required for MPI-2 */
+    MPIR_Errhandler *errhandler; /* Pointer to the error handler structure */
+    struct MPIR_Comm    *local_comm; /* Defined only for intercomms, holds
+				        an intracomm for the local group */
+
+    MPIR_Comm_hierarchy_kind_t hierarchy_kind; /* flat, parent, node, or node_roots */
+    struct MPIR_Comm *node_comm; /* Comm of processes in this comm that are on
+                                    the same node as this process. */
+    struct MPIR_Comm *node_roots_comm; /* Comm of root processes for other nodes. */
+    int *intranode_table;        /* intranode_table[i] gives the rank in
+                                    node_comm of rank i in this comm or -1 if i
+                                    is not in this process' node_comm.
+                                    It is of size 'local_size'. */
+    int *internode_table;        /* internode_table[i] gives the rank in
+                                    node_roots_comm of rank i in this comm.
+                                    It is of size 'local_size'. */
+
+    int           is_low_group;  /* For intercomms only, this boolean is
+				    set for all members of one of the
+				    two groups of processes and clear for
+				    the other.  It enables certain
+				    intercommunicator collective operations
+				    that wish to use half-duplex operations
+				    to implement a full-duplex operation */
+    struct MPIR_Comm     *comm_next;/* Provides a chain through all active
+				       communicators */
+    struct MPIR_Collops  *coll_fns; /* Pointer to a table of functions
+                                              implementing the collective
+                                              routines */
+    struct MPIR_TopoOps  *topo_fns; /* Pointer to a table of functions
+				       implementting the topology routines */
+    int next_sched_tag;             /* used by the NBC schedule code to allocate tags */
+
+    int revoked;                    /* Flag to track whether the communicator
+                                     * has been revoked */
+    MPIR_Info *info;                /* Hints to the communicator */
+
+#ifdef MPID_HAS_HETERO
+    int is_hetero;
+#endif
+
+#if defined HAVE_LIBHCOLL
+    hcoll_comm_priv_t hcoll_priv;
+#endif /* HAVE_LIBHCOLL */
+
+    /* the mapper is temporarily filled out in order to allow the
+     * device to setup its network addresses.  it will be freed after
+     * the device has initialized the comm. */
+    MPIR_Comm_map_t *mapper_head;
+    MPIR_Comm_map_t *mapper_tail;
+
+  /* Other, device-specific information */
+#ifdef MPID_DEV_COMM_DECL
+    MPID_DEV_COMM_DECL
+#endif
+};
+extern MPIU_Object_alloc_t MPIR_Comm_mem;
+
+typedef struct MPIR_Gpid {
+#ifdef MPID_DEV_GPID_DECL
+    MPID_DEV_GPID_DECL
+#else
+    int dummy;   /* don't create an empty structure */
+#endif
+}MPIR_Gpid;
+
+/* this function should not be called by normal code! */
+int MPIR_Comm_delete_internal(MPIR_Comm * comm_ptr);
+
+#define MPIR_Comm_add_ref(_comm) \
+    do { MPIU_Object_add_ref((_comm)); } while (0)
+#define MPIR_Comm_release_ref( _comm, _inuse ) \
+    do { MPIU_Object_release_ref( _comm, _inuse ); } while (0)
+
+
+/* Release a reference to a communicator.  If there are no pending
+   references, delete the communicator and recover all storage and
+   context ids.
+
+   This routine has been inlined because keeping it as a separate routine
+   results in a >5% performance hit for the SQMR benchmark.
+*/
+#undef FUNCNAME
+#define FUNCNAME MPIR_Comm_release
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+static inline int MPIR_Comm_release(MPIR_Comm * comm_ptr)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int in_use;
+
+    MPIR_Comm_release_ref(comm_ptr, &in_use);
+    if (unlikely(!in_use)) {
+        /* the following routine should only be called by this function and its
+         * "_always" variant. */
+        mpi_errno = MPIR_Comm_delete_internal(comm_ptr);
+        /* not ERR_POPing here to permit simpler inlining.  Our caller will
+         * still report the error from the comm_delete level. */
+    }
+
+    return mpi_errno;
+}
+#undef FUNCNAME
+#undef FCNAME
+
+/* MPIR_Comm_release_always is the same as MPIR_Comm_release except it uses
+   MPIR_Comm_release_ref_always instead.
+*/
+int MPIR_Comm_release_always(MPIR_Comm *comm_ptr);
+
+/* applies the specified info chain to the specified communicator */
+int MPIR_Comm_apply_hints(MPIR_Comm *comm_ptr, MPIR_Info *info_ptr);
+
+int MPIR_Comm_copy( MPIR_Comm *, int, MPIR_Comm ** );
+int MPIR_Comm_copy_data(MPIR_Comm *comm_ptr, MPIR_Comm **outcomm_ptr);
+
+int MPIR_Setup_intercomm_localcomm( MPIR_Comm * );
+
+int MPIR_Comm_create( MPIR_Comm ** );
+int MPIR_Comm_create_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr, int tag,
+                           MPIR_Comm ** newcomm);
+
+/* comm_create helper functions, used by both comm_create and comm_create_group */
+int MPIR_Comm_create_calculate_mapping(MPIR_Group  *group_ptr,
+                                       MPIR_Comm   *comm_ptr,
+                                       int        **mapping_out,
+                                       MPIR_Comm **mapping_comm);
+
+int MPIR_Comm_create_map(int local_n,
+                         int remote_n,
+                         int *local_mapping,
+                         int *remote_mapping,
+                         MPIR_Comm *mapping_comm,
+                         MPIR_Comm *newcomm);
+
+/* implements the logic for MPI_Comm_create for intracommunicators only */
+int MPIR_Comm_create_intra(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr,
+                           MPIR_Comm **newcomm_ptr);
+
+
+int MPIR_Comm_commit( MPIR_Comm * );
+
+int MPIR_Comm_is_node_aware( MPIR_Comm * );
+
+int MPIR_Comm_is_node_consecutive( MPIR_Comm *);
+
+int MPIR_Comm_idup_impl(MPIR_Comm *comm_ptr, MPIR_Comm **newcomm, MPIR_Request **reqp);
+
+int MPIR_Comm_shrink(MPIR_Comm *comm_ptr, MPIR_Comm **newcomm_ptr);
+int MPIR_Comm_agree(MPIR_Comm *comm_ptr, int *flag);
+
+#if defined(HAVE_ROMIO)
+int MPIR_Comm_split_filesystem(MPI_Comm comm, int key, const char *dirname, MPI_Comm *newcomm);
+#endif
+
+int MPIR_Comm_init(MPIR_Comm *);
+
+#define MPIR_Comm_rank(comm_ptr) ((comm_ptr)->rank)
+#define MPIR_Comm_size(comm_ptr) ((comm_ptr)->local_size)
+
+/* Communicator info hint functions */
+typedef int (*MPIR_Comm_hint_fn_t)(MPIR_Comm *, MPIR_Info *, void *);
+int MPIR_Comm_register_hint(const char *hint_key, MPIR_Comm_hint_fn_t fn, void *state);
+
+int MPIR_Comm_delete_attr_impl(MPIR_Comm *comm_ptr, MPIR_Keyval *keyval_ptr);
+int MPIR_Comm_create_keyval_impl(MPI_Comm_copy_attr_function *comm_copy_attr_fn,
+                                 MPI_Comm_delete_attr_function *comm_delete_attr_fn,
+                                 int *comm_keyval, void *extra_state);
+int MPIR_Comm_accept_impl(const char * port_name, MPIR_Info * info_ptr, int root,
+                          MPIR_Comm * comm_ptr, MPIR_Comm ** newcomm_ptr);
+int MPIR_Comm_connect_impl(const char * port_name, MPIR_Info * info_ptr, int root,
+                           MPIR_Comm * comm_ptr, MPIR_Comm ** newcomm_ptr);
+int MPIR_Comm_create_errhandler_impl(MPI_Comm_errhandler_function *function,
+                                     MPI_Errhandler *errhandler);
+int MPIR_Comm_dup_impl(MPIR_Comm *comm_ptr, MPIR_Comm **newcomm_ptr);
+int MPIR_Comm_dup_with_info_impl(MPIR_Comm *comm_ptr, MPIR_Info *info_ptr, MPIR_Comm **newcomm_ptr);
+int MPIR_Comm_get_info_impl(MPIR_Comm *comm_ptr, MPIR_Info **info_ptr);
+int MPIR_Comm_set_info_impl(MPIR_Comm *comm_ptr, MPIR_Info *info_ptr);
+int MPIR_Comm_free_impl(MPIR_Comm * comm_ptr);
+void MPIR_Comm_free_keyval_impl(int keyval);
+void MPIR_Comm_get_errhandler_impl(MPIR_Comm *comm_ptr, MPIR_Errhandler **errhandler_ptr);
+void MPIR_Comm_set_errhandler_impl(MPIR_Comm *comm_ptr, MPIR_Errhandler *errhandler_ptr);
+void MPIR_Comm_get_name_impl(MPIR_Comm *comm, char *comm_name, int *resultlen);
+int MPIR_Intercomm_merge_impl(MPIR_Comm *comm_ptr, int high, MPIR_Comm **new_intracomm_ptr);
+int MPIR_Intercomm_create_impl(MPIR_Comm *local_comm_ptr, int local_leader,
+                               MPIR_Comm *peer_comm_ptr, int remote_leader, int tag,
+                               MPIR_Comm **new_intercomm_ptr);
+int MPIR_Comm_group_impl(MPIR_Comm *comm_ptr, MPIR_Group **group_ptr);
+int MPIR_Comm_remote_group_impl(MPIR_Comm *comm_ptr, MPIR_Group **group_ptr);
+int MPIR_Comm_group_failed_impl(MPIR_Comm *comm, MPIR_Group **failed_group_ptr);
+int MPIR_Comm_remote_group_failed_impl(MPIR_Comm *comm, MPIR_Group **failed_group_ptr);
+int MPIR_Comm_split_impl(MPIR_Comm *comm_ptr, int color, int key, MPIR_Comm **newcomm_ptr);
+int MPIR_Comm_split_type_impl(MPIR_Comm *comm_ptr, int split_type, int key, MPIR_Info *info_ptr,
+                              MPIR_Comm **newcomm_ptr);
+int MPIR_Comm_set_attr_impl(MPIR_Comm *comm_ptr, int comm_keyval, void *attribute_val,
+                            MPIR_AttrType attrType);
+
+
+/* Preallocated comm objects.  There are 3: comm_world, comm_self, and
+   a private (non-user accessible) dup of comm world that is provided
+   if needed in MPI_Finalize.  Having a separate version of comm_world
+   avoids possible interference with User code */
+#define MPIR_COMM_N_BUILTIN 3
+extern MPIR_Comm MPIR_Comm_builtin[MPIR_COMM_N_BUILTIN];
+extern MPIR_Comm MPIR_Comm_direct[];
+/* This is the handle for the internal MPI_COMM_WORLD .  The "2" at the end
+   of the handle is 3-1 (e.g., the index in the builtin array) */
+#define MPIR_ICOMM_WORLD  ((MPI_Comm)0x44000002)
+
+#ifndef HAVE_DEV_COMM_HOOK
+#define MPID_Dev_comm_create_hook( a ) MPI_SUCCESS
+#define MPID_Dev_comm_destroy_hook( a ) MPI_SUCCESS
+#endif
+
+typedef struct MPIR_Commops {
+    int (*split_type)(MPIR_Comm *, int, int, MPIR_Info *, MPIR_Comm **);
+} MPIR_Commops;
+extern struct MPIR_Commops  *MPIR_Comm_fns; /* Communicator creation functions */
+
+#endif /* MPIR_COMM_H_INCLUDED */
diff --git a/src/include/mpir_contextid.h b/src/include/mpir_contextid.h
new file mode 100644
index 0000000..571cd88
--- /dev/null
+++ b/src/include/mpir_contextid.h
@@ -0,0 +1,105 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_CONTEXTID_H_INCLUDED
+#define MPIR_CONTEXTID_H_INCLUDED
+
+#define MPIU_CONTEXT_ID_T_DATATYPE MPI_UINT16_T
+typedef uint16_t MPIU_Context_id_t;
+#define MPIU_INVALID_CONTEXT_ID ((MPIU_Context_id_t)0xffff)
+
+/* The following preprocessor macros provide bitfield access information for
+ * context ID values.  They follow a uniform naming pattern:
+ *
+ * MPIR_CONTEXT_foo_WIDTH - the width in bits of the field
+ * MPIR_CONTEXT_foo_MASK  - A valid bit mask for bit-wise AND and OR operations
+ *                          with exactly all of the bits in the field set.
+ * MPIR_CONTEXT_foo_SHIFT - The number of bits that the field should be shifted
+ *                          rightwards to place it in the least significant bits
+ *                          of the ID.  There may still be higher order bits
+ *                          from other fields, so the _MASK should be used first
+ *                          if you want to reliably retrieve the exact value of
+ *                          the field.
+ */
+
+/* yields an rvalue that is the value of the field_name_ in the least significant bits */
+#define MPIR_CONTEXT_READ_FIELD(field_name_,id_) \
+    (((id_) & MPIR_CONTEXT_##field_name_##_MASK) >> MPIR_CONTEXT_##field_name_##_SHIFT)
+/* yields an rvalue that is the old_id_ with field_name_ set to field_val_ */
+#define MPIR_CONTEXT_SET_FIELD(field_name_,old_id_,field_val_) \
+    ((old_id_ & ~MPIR_CONTEXT_##field_name_##_MASK) | ((field_val_) << MPIR_CONTEXT_##field_name_##_SHIFT))
+
+/* Context suffixes for separating pt2pt and collective communication */
+#define MPIR_CONTEXT_SUFFIX_WIDTH (1)
+#define MPIR_CONTEXT_SUFFIX_SHIFT (0)
+#define MPIR_CONTEXT_SUFFIX_MASK ((1 << MPIR_CONTEXT_SUFFIX_WIDTH) - 1)
+#define MPIR_CONTEXT_INTRA_PT2PT (0)
+#define MPIR_CONTEXT_INTRA_COLL  (1)
+#define MPIR_CONTEXT_INTER_PT2PT (0)
+#define MPIR_CONTEXT_INTER_COLL  (1)
+
+/* Used to derive context IDs for sub-communicators from a parent communicator's
+   context ID value.  This field comes after the one bit suffix.
+   values are shifted left by 1. */
+#define MPIR_CONTEXT_SUBCOMM_WIDTH (2)
+#define MPIR_CONTEXT_SUBCOMM_SHIFT (MPIR_CONTEXT_SUFFIX_WIDTH + MPIR_CONTEXT_SUFFIX_SHIFT)
+#define MPIR_CONTEXT_SUBCOMM_MASK      (((1 << MPIR_CONTEXT_SUBCOMM_WIDTH) - 1) << MPIR_CONTEXT_SUBCOMM_SHIFT)
+
+/* these values may be added/subtracted directly to/from an existing context ID
+ * in order to determine the context ID of the child/parent */
+#define MPIR_CONTEXT_PARENT_OFFSET    (0 << MPIR_CONTEXT_SUBCOMM_SHIFT)
+#define MPIR_CONTEXT_INTRANODE_OFFSET (1 << MPIR_CONTEXT_SUBCOMM_SHIFT)
+#define MPIR_CONTEXT_INTERNODE_OFFSET (2 << MPIR_CONTEXT_SUBCOMM_SHIFT)
+
+/* this field (IS_LOCALCOM) is used to derive a context ID for local
+ * communicators of intercommunicators without communication */
+#define MPIR_CONTEXT_IS_LOCALCOMM_WIDTH (1)
+#define MPIR_CONTEXT_IS_LOCALCOMM_SHIFT (MPIR_CONTEXT_SUBCOMM_SHIFT + MPIR_CONTEXT_SUBCOMM_WIDTH)
+#define MPIR_CONTEXT_IS_LOCALCOMM_MASK (((1 << MPIR_CONTEXT_IS_LOCALCOMM_WIDTH) - 1) << MPIR_CONTEXT_IS_LOCALCOMM_SHIFT)
+
+/* MPIR_MAX_CONTEXT_MASK is the number of ints that make up the bit vector that
+ * describes the context ID prefix space.
+ *
+ * The following must hold:
+ * (num_bits_in_vector) <= (maximum_context_id_prefix)
+ *   which is the following in concrete terms:
+ * MPIR_MAX_CONTEXT_MASK*MPIR_CONTEXT_INT_BITS <= 2**(MPIR_CONTEXT_ID_BITS - (MPIR_CONTEXT_PREFIX_SHIFT + MPIR_CONTEXT_DYNAMIC_PROC_WIDTH))
+ *
+ * We currently always assume MPIR_CONTEXT_INT_BITS is 32, regardless of the
+ * value of sizeof(int)*CHAR_BITS.  We also make the assumption that CHAR_BITS==8.
+ *
+ * For a 16-bit context id field and CHAR_BITS==8, this implies MPIR_MAX_CONTEXT_MASK <= 256
+ */
+
+/* number of bits to shift right by in order to obtain the context ID prefix */
+#define MPIR_CONTEXT_PREFIX_SHIFT (MPIR_CONTEXT_IS_LOCALCOMM_SHIFT + MPIR_CONTEXT_IS_LOCALCOMM_WIDTH)
+#define MPIR_CONTEXT_PREFIX_WIDTH (MPIR_CONTEXT_ID_BITS - (MPIR_CONTEXT_PREFIX_SHIFT + MPIR_CONTEXT_DYNAMIC_PROC_WIDTH))
+#define MPIR_CONTEXT_PREFIX_MASK (((1 << MPIR_CONTEXT_PREFIX_WIDTH) - 1) << MPIR_CONTEXT_PREFIX_SHIFT)
+
+#define MPIR_CONTEXT_DYNAMIC_PROC_WIDTH (1) /* the upper half is reserved for dynamic procs */
+#define MPIR_CONTEXT_DYNAMIC_PROC_SHIFT (MPIR_CONTEXT_ID_BITS - MPIR_CONTEXT_DYNAMIC_PROC_WIDTH) /* the upper half is reserved for dynamic procs */
+#define MPIR_CONTEXT_DYNAMIC_PROC_MASK (((1 << MPIR_CONTEXT_DYNAMIC_PROC_WIDTH) - 1) << MPIR_CONTEXT_DYNAMIC_PROC_SHIFT)
+
+/* should probably be (sizeof(int)*CHAR_BITS) once we make the code CHAR_BITS-clean */
+#define MPIR_CONTEXT_INT_BITS (32)
+#define MPIR_CONTEXT_ID_BITS (sizeof(MPIU_Context_id_t)*8) /* 8 --> CHAR_BITS eventually */
+#define MPIR_MAX_CONTEXT_MASK \
+    ((1 << (MPIR_CONTEXT_ID_BITS - (MPIR_CONTEXT_PREFIX_SHIFT + MPIR_CONTEXT_DYNAMIC_PROC_WIDTH))) / MPIR_CONTEXT_INT_BITS)
+
+/* Utility routines.  Where possible, these are kept in the source directory
+   with the other comm routines (src/mpi/comm, in mpicomm.h).  However,
+   to create a new communicator after a spawn or connect-accept operation,
+   the device may need to create a new contextid */
+int MPIR_Get_contextid_sparse(MPIR_Comm *comm_ptr, MPIU_Context_id_t *context_id, int ignore_id);
+int MPIR_Get_contextid_sparse_group(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr, int tag, MPIU_Context_id_t *context_id, int ignore_id);
+
+int MPIR_Get_contextid_nonblock(MPIR_Comm *comm_ptr, MPIR_Comm *newcommp, MPIR_Request **req);
+int MPIR_Get_intercomm_contextid_nonblock(MPIR_Comm *comm_ptr, MPIR_Comm *newcommp, MPIR_Request **req);
+
+void MPIR_Free_contextid( MPIU_Context_id_t );
+
+#endif /* MPIR_CONTEXTID_H_INCLUDED */
diff --git a/src/include/mpir_cxxinterface.h b/src/include/mpir_cxxinterface.h
new file mode 100644
index 0000000..4bb3fdf
--- /dev/null
+++ b/src/include/mpir_cxxinterface.h
@@ -0,0 +1,15 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_CXX_INTERFACE_H_INCLUDED
+#define MPIR_CXX_INTERFACE_H_INCLUDED
+
+extern void MPIR_Keyval_set_cxx( int, void (*)(void), void (*)(void) );
+extern void MPIR_Op_set_cxx( MPI_Op, void (*)(void) );
+extern void MPIR_Errhandler_set_cxx( MPI_Errhandler, void (*)(void) );
+
+#endif /* MPIR_CXX_INTERFACE_H_INCLUDED */
diff --git a/src/include/mpir_datatype.h b/src/include/mpir_datatype.h
new file mode 100644
index 0000000..6910c7c
--- /dev/null
+++ b/src/include/mpir_datatype.h
@@ -0,0 +1,68 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_DATATYPE_H_INCLUDED
+#define MPIR_DATATYPE_H_INCLUDED
+
+/* This routine is used to install an attribute free routine for datatypes
+   at finalize-time */
+void MPIR_DatatypeAttrFinalize( void );
+
+#define MPIR_DATATYPE_IS_PREDEFINED(type) \
+    ((HANDLE_GET_KIND(type) == HANDLE_KIND_BUILTIN) || \
+     (type == MPI_FLOAT_INT) || (type == MPI_DOUBLE_INT) || \
+     (type == MPI_LONG_INT) || (type == MPI_SHORT_INT) || \
+     (type == MPI_LONG_DOUBLE_INT))
+
+int MPIR_Get_elements_x_impl(const MPI_Status *status, MPI_Datatype datatype, MPI_Count *elements);
+int MPIR_Status_set_elements_x_impl(MPI_Status *status, MPI_Datatype datatype, MPI_Count count);
+void MPIR_Type_get_extent_x_impl(MPI_Datatype datatype, MPI_Count *lb, MPI_Count *extent);
+void MPIR_Type_get_true_extent_x_impl(MPI_Datatype datatype, MPI_Count *true_lb, MPI_Count *true_extent);
+int MPIR_Type_size_x_impl(MPI_Datatype datatype, MPI_Count *size);
+
+#define MPIR_Type_extent_impl(datatype, extent_ptr) MPID_Datatype_get_extent_macro(datatype, *(extent_ptr))
+#define MPIR_Type_size_impl(datatype, size) MPID_Datatype_get_size_macro(datatype, *(size))
+#define MPIR_Test_cancelled_impl(status, flag) *(flag) = MPIR_STATUS_GET_CANCEL_BIT(*(status))
+
+void MPIR_Get_count_impl(const MPI_Status *status, MPI_Datatype datatype, int *count);
+int MPIR_Type_commit_impl(MPI_Datatype *datatype);
+int MPIR_Type_create_struct_impl(int count,
+                                 const int array_of_blocklengths[],
+                                 const MPI_Aint array_of_displacements[],
+                                 const MPI_Datatype array_of_types[],
+                                 MPI_Datatype *newtype);
+int MPIR_Type_create_indexed_block_impl(int count,
+                                        int blocklength,
+                                        const int array_of_displacements[],
+                                        MPI_Datatype oldtype,
+                                        MPI_Datatype *newtype);
+int MPIR_Type_create_hindexed_block_impl(int count, int blocklength,
+                                         const MPI_Aint array_of_displacements[],
+                                         MPI_Datatype oldtype, MPI_Datatype *newtype);
+int MPIR_Type_contiguous_impl(int count,
+                              MPI_Datatype old_type,
+                              MPI_Datatype *new_type_p);
+int MPIR_Type_contiguous_x_impl(MPI_Count count,
+                              MPI_Datatype old_type,
+                              MPI_Datatype *new_type_p);
+void MPIR_Type_get_extent_impl(MPI_Datatype datatype, MPI_Aint *lb, MPI_Aint *extent);
+void MPIR_Type_get_true_extent_impl(MPI_Datatype datatype, MPI_Aint *true_lb, MPI_Aint *true_extent);
+void MPIR_Type_get_envelope_impl(MPI_Datatype datatype, int *num_integers, int *num_addresses,
+                                 int *num_datatypes, int *combiner);
+int MPIR_Type_hvector_impl(int count, int blocklen, MPI_Aint stride, MPI_Datatype old_type, MPI_Datatype *newtype_p);
+int MPIR_Type_indexed_impl(int count, const int blocklens[], const int indices[],
+                           MPI_Datatype old_type, MPI_Datatype *newtype);
+void MPIR_Type_free_impl(MPI_Datatype *datatype);
+int MPIR_Type_vector_impl(int count, int blocklength, int stride, MPI_Datatype old_type, MPI_Datatype *newtype_p);
+int MPIR_Type_struct_impl(int count, const int blocklens[], const MPI_Aint indices[], const MPI_Datatype old_types[], MPI_Datatype *newtype);
+int MPIR_Pack_impl(const void *inbuf, MPI_Aint incount, MPI_Datatype datatype, void *outbuf, MPI_Aint outcount, MPI_Aint *position);
+void MPIR_Pack_size_impl(int incount, MPI_Datatype datatype, MPI_Aint *size);
+int MPIR_Unpack_impl(const void *inbuf, MPI_Aint insize, MPI_Aint *position,
+                     void *outbuf, int outcount, MPI_Datatype datatype);
+void MPIR_Type_lb_impl(MPI_Datatype datatype, MPI_Aint *displacement);
+
+#endif /* MPIR_DATATYPE_H_INCLUDED */
diff --git a/src/include/mpir_dbg.h b/src/include/mpir_dbg.h
new file mode 100644
index 0000000..3cd1d28
--- /dev/null
+++ b/src/include/mpir_dbg.h
@@ -0,0 +1,24 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_DBG_H_INCLUDED
+#define MPIR_DBG_H_INCLUDED
+
+#if defined (MPL_USE_DBG_LOGGING)
+extern MPL_dbg_class MPIR_DBG_INIT;
+extern MPL_dbg_class MPIR_DBG_PT2PT;
+extern MPL_dbg_class MPIR_DBG_THREAD;
+extern MPL_dbg_class MPIR_DBG_DATATYPE;
+extern MPL_dbg_class MPIR_DBG_COMM;
+extern MPL_dbg_class MPIR_DBG_BSEND;
+extern MPL_dbg_class MPIR_DBG_ERRHAND;
+extern MPL_dbg_class MPIR_DBG_OTHER;
+extern MPL_dbg_class MPIR_DBG_REQUEST;
+extern MPL_dbg_class MPIR_DBG_ASSERT;
+#endif /* MPL_USE_DBG_LOGGING */
+
+#endif /* MPIR_DBG_H_INCLUDED */
diff --git a/src/include/mpir_debugger.h b/src/include/mpir_debugger.h
new file mode 100644
index 0000000..708791b
--- /dev/null
+++ b/src/include/mpir_debugger.h
@@ -0,0 +1,34 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_DEBUGGER_H_INCLUDED
+#define MPIR_DEBUGGER_H_INCLUDED
+
+/* These macros allow us to implement a sendq when debugger support is
+   selected.  As there is extra overhead for this, we only do this
+   when specifically requested
+*/
+#ifdef HAVE_DEBUGGER_SUPPORT
+void MPIR_WaitForDebugger( void );
+void MPIR_DebuggerSetAborting( const char * );
+void MPIR_Sendq_remember(MPIR_Request *, int, int, int );
+void MPIR_Sendq_forget(MPIR_Request *);
+void MPIR_CommL_remember( MPIR_Comm * );
+void MPIR_CommL_forget( MPIR_Comm * );
+
+#define MPIR_SENDQ_REMEMBER(_a,_b,_c,_d) MPIR_Sendq_remember(_a,_b,_c,_d)
+#define MPIR_SENDQ_FORGET(_a) MPIR_Sendq_forget(_a)
+#define MPIR_COMML_REMEMBER(_a) MPIR_CommL_remember( _a )
+#define MPIR_COMML_FORGET(_a) MPIR_CommL_forget( _a )
+#else
+#define MPIR_SENDQ_REMEMBER(a,b,c,d)
+#define MPIR_SENDQ_FORGET(a)
+#define MPIR_COMML_REMEMBER(_a)
+#define MPIR_COMML_FORGET(_a)
+#endif
+
+#endif /* MPIR_DEBUGGER_H_INCLUDED */
diff --git a/src/include/mpierrs.h b/src/include/mpir_err.h
similarity index 89%
rename from src/include/mpierrs.h
rename to src/include/mpir_err.h
index efc2915..3ba9808 100644
--- a/src/include/mpierrs.h
+++ b/src/include/mpir_err.h
@@ -3,10 +3,154 @@
  *  (C) 2001 by Argonne National Laboratory.
  *      See COPYRIGHT in top-level directory.
  */
-#ifndef MPIERRS_H_INCLUDED
-#define MPIERRS_H_INCLUDED
+
+#ifndef MPIR_ERR_H_INCLUDED
+#define MPIR_ERR_H_INCLUDED
+
+/* Error severity */
+#define MPIR_ERR_FATAL 1
+#define MPIR_ERR_RECOVERABLE 0
+
+struct MPIR_Comm;
+struct MPIR_Win;
+
+/* Bindings for internal routines */
+int MPIR_Err_return_comm( struct MPIR_Comm *, const char [], int );
+int MPIR_Err_return_win( struct MPIR_Win *, const char [], int );
+#ifdef MPI__FILE_DEFINED
+/* Only define if we have MPI_File */
+int MPIR_Err_return_file( MPI_File, const char [], int ); /* Romio version */
+#endif
+/* FIXME:
+ * Update this description to match the current version of the routine,
+ * in particular, the pseudo-format types (even better, fix it so that
+ * the pseudo format types can work with the format attribute check).
+ */
+/*@
+  MPIR_Err_create_code - Create an error code and associated message
+  to report an error
+
+  Input Parameters:
++ lastcode - Previous error code (see notes)
+. severity  - Indicates severity of error
+. fcname - Name of the function in which the error has occurred.
+. line  - Line number (usually '__LINE__')
+. class - Error class
+. generic_msg - A generic message to be used if not instance-specific
+ message is available
+. instance_msg - A message containing printf-style formatting commands
+  that, when combined with the instance_parameters, specify an error
+  message containing instance-specific data.
+- instance_parameters - The remaining parameters.  These must match
+ the formatting commands in 'instance_msg'.
+
+ Notes:
+ A typical use is\:
+.vb
+   mpi_errno = MPIR_Err_create_code( mpi_errno, MPIR_ERR_RECOVERABLE,
+               FCNAME, __LINE__, MPI_ERR_RANK,
+               "Invalid Rank", "Invalid rank %d", rank );
+.ve
+
+  Predefined message may also be used.  Any message that uses the
+  prefix '"**"' will be looked up in a table.  This allows standardized
+  messages to be used for a message that is used in several different locations
+  in the code.  For example, the name '"**rank"' might be used instead of
+  '"Invalid Rank"'; this would also allow the message to be made more
+  specific and useful, such as
+.vb
+   Invalid rank provided.  The rank must be between 0 and the 1 less than
+   the size of the communicator in this call.
+.ve
+  This interface is compatible with the 'gettext' interface for
+  internationalization, in the sense that the 'generic_msg' and 'instance_msg'
+  may be used as arguments to 'gettext' to return a string in the appropriate
+  language; the implementation of 'MPID_Err_create_code' can then convert
+  this text into the appropriate code value.
+
+  The current set of formatting commands is undocumented and will change.
+  You may safely use '%d' and '%s' (though only use '%s' for names of
+  objects, not text messages, as using '%s' for a message breaks support for
+  internationalization.
+
+  This interface allows error messages to be chained together.  The first
+  argument is the last error code; if there is no previous error code,
+  use 'MPI_SUCCESS'.
+
+  Extended Format Specifiers:
+  In addition to the standard format specifies (e.g., %d for an int value),
+  MPIR_Err_create_code accepts some additional values that correspond to
+  various MPI types:
++ i - an MPI rank; recognizes 'MPI_ANY_SOURCE', 'MPI_PROC_NULL', and
+      'MPI_ROOT'
+. t - an MPI tag; recognizes 'MPI_ANY_TAG'.
+. A - an MPI assert value.
+. C - an MPI communicator.
+. D - an MPI datatype.
+. E - an MPI Errhandler.
+. F - an MPI File object.
+. G - an MPI Group.
+. I - an MPI info object.
+. O - an MPI Op.
+. R - an MPI Request.
+- W - an MPI Window object.
+
+
+  Module:
+  Error
+
+  @*/
+int MPIR_Err_create_code( int, int, const char [], int, int, const char [], const char [], ... );
+
+#ifdef USE_ERR_CODE_VALIST
+int MPIR_Err_create_code_valist( int, int, const char [], int, int, const char [], const char [], va_list );
+#endif
+
+/*@
+  MPIR_Err_combine_codes - Combine two error codes, or more importantly
+  two lists of error messages.  The list associated with the second error
+  code is appended to the list associated with the first error code.  If
+  the list associated with the first error code has a dangling tail, which
+  is possible if the ring has wrapped and overwritten entries that were
+  once part of the list, then the append operation is not performed and
+  the error code for the first list is returned.
+
+  Input Parameter:
++ errorcode1 - the error code associated with the first list
+- errorcode2 - the error code associated with the second list
+
+  Return value:
+  An error code which resolves to the combined list of error messages
+
+  Notes:
+  If errorcode1 is equal to MPI_SUCCESS, then errorcode2 is returned.
+  Likewise, if errorcode2 is equal to MPI_SUCCESS, then errorcode1 is
+  returned.
+
+  Module:
+  Error
+  @*/
+int MPIR_Err_combine_codes(int, int);
+
+int MPIR_Err_is_fatal(int);
+void MPIR_Err_init(void);
+void MPIR_Err_preOrPostInit( void );
+
+int MPIR_Err_set_msg( int code, const char *msg_string );
+
+/* This routine is called when there is a fatal error. Now public because file
+ * error handling is defined in a separate file from comm and win, but all
+ * three need to call it */
+void MPIR_Handle_fatal_error(struct MPIR_Comm *comm_ptr,
+	const char fcname[], int errcode);
+
+#define MPIR_ERR_CLASS_MASK 0x0000007f
+#define MPIR_ERR_CLASS_SIZE 128
+#define MPIR_ERR_GET_CLASS(mpi_errno_) (mpi_errno_ & MPIR_ERR_CLASS_MASK)
+
+
 /* ------------------------------------------------------------------------- */
-/* mpierrs.h */
+/* mpir_err.h */
 /* ------------------------------------------------------------------------- */
 
 /*
@@ -850,7 +994,7 @@ do {								\
 #endif
 
 /* ------------------------------------------------------------------------- */
-/* end of mpierrs.h */
+/* end of mpir_err.h */
 /* ------------------------------------------------------------------------- */
 
-#endif
+#endif  /* MPIR_ERR_H_INCLUDED */
diff --git a/src/include/mpir_errhandler.h b/src/include/mpir_errhandler.h
new file mode 100644
index 0000000..53a06db
--- /dev/null
+++ b/src/include/mpir_errhandler.h
@@ -0,0 +1,96 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_ERRHANDLER_H_INCLUDED
+#define MPIR_ERRHANDLER_H_INCLUDED
+
+/*E
+  MPIR_Errhandler_fn - MPID Structure to hold an error handler function
+
+  Notes:
+  The MPI-1 Standard declared only the C version of this, implicitly
+  assuming that 'int' and 'MPI_Fint' were the same.
+
+  Since Fortran does not have a C-style variable number of arguments
+  interface, the Fortran interface simply accepts two arguments.  Some
+  calling conventions for Fortran (particularly under Windows) require
+  this.
+
+  Module:
+  ErrHand-DS
+
+  Questions:
+  What do we want to do about C++?  Do we want a hook for a routine that can
+  be called to throw an exception in C++, particularly if we give C++ access
+  to this structure?  Does the C++ handler need to be different (not part
+  of the union)?
+
+  E*/
+typedef union MPIR_Errhandler_fn {
+   void (*C_Comm_Handler_function) ( MPI_Comm *, int *, ... );
+   void (*F77_Handler_function) ( MPI_Fint *, MPI_Fint * );
+   void (*C_Win_Handler_function) ( MPI_Win *, int *, ... );
+   void (*C_File_Handler_function) ( MPI_File *, int *, ... );
+} MPIR_Errhandler_fn;
+
+/*S
+  MPIR_Errhandler - Description of the error handler structure
+
+  Notes:
+  Device-specific information may indicate whether the error handler is active;
+  this can help prevent infinite recursion in error handlers caused by
+  user-error without requiring the user to be as careful.  We might want to
+  make this part of the interface so that the 'MPI_xxx_call_errhandler'
+  routines would check.
+
+  It is useful to have a way to indicate that the errhandler is no longer
+  valid, to help catch the case where the user has freed the errhandler but
+  is still using a copy of the 'MPI_Errhandler' value.  We may want to
+  define the 'id' value for deleted errhandlers.
+
+  Module:
+  ErrHand-DS
+  S*/
+typedef struct MPIR_Errhandler {
+  MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+  MPIR_Lang_t        language;
+  MPIR_Object_kind   kind;
+  MPIR_Errhandler_fn errfn;
+  /* Other, device-specific information */
+#ifdef MPID_DEV_ERRHANDLER_DECL
+    MPID_DEV_ERRHANDLER_DECL
+#endif
+} MPIR_Errhandler;
+extern MPIU_Object_alloc_t MPIR_Errhandler_mem;
+/* Preallocated errhandler objects */
+extern MPIR_Errhandler MPIR_Errhandler_builtin[];
+extern MPIR_Errhandler MPIR_Errhandler_direct[];
+
+/* We never reference count the builtin error handler objects, regardless of how
+ * we decide to reference count the other predefined objects.  If we get to the
+ * point where we never reference count *any* of the builtin objects then we
+ * should probably remove these checks and let them fall through to the checks
+ * for BUILTIN down in the MPIU_Object_* routines. */
+#define MPIR_Errhandler_add_ref( _errhand )                               \
+    do {                                                                  \
+        if (HANDLE_GET_KIND((_errhand)->handle) != HANDLE_KIND_BUILTIN) { \
+            MPIU_Object_add_ref( _errhand );                              \
+        }                                                                 \
+    } while (0)
+#define MPIR_Errhandler_release_ref( _errhand, _inuse )                   \
+    do {                                                                  \
+        if (HANDLE_GET_KIND((_errhand)->handle) != HANDLE_KIND_BUILTIN) { \
+            MPIU_Object_release_ref( (_errhand), (_inuse) );              \
+        }                                                                 \
+        else {                                                            \
+            *(_inuse) = 1;                                                \
+        }                                                                 \
+    } while (0)
+
+void MPIR_Errhandler_free(MPIR_Errhandler *errhan_ptr);
+
+#endif /* MPIR_ERRHANDLER_H_INCLUDED */
diff --git a/src/include/mpir_ext.h.in b/src/include/mpir_ext.h.in
new file mode 100644
index 0000000..c9805fe
--- /dev/null
+++ b/src/include/mpir_ext.h.in
@@ -0,0 +1,97 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+/* This file contains the prototypes for routines that are used with
+   "external" modules such as ROMIO.  These allow the different packages to
+   hide their internal datatypes from one another */
+
+#ifndef MPIEXT_H_INCLUDED
+#define MPIEXT_H_INCLUDED
+
+#include <stdarg.h>
+
+/* This routine, given an MPI_Errhandler (from a file), returns
+   a pointer to the user-supplied error function.  The last argument
+   is set to an integer indicating that the function is MPI_ERRORS_RETURN
+   (value == 1), MPI_ERRORS_ARE_FATAL (value == 0), a valid user-function
+   (value == 2), or a valid user-function that is a C++ routine (value == 3)
+
+   This routine is implemented in mpich/src/mpi/errhan/file_set_errhandler.c
+*/
+void MPIR_Get_file_error_routine( MPI_Errhandler,
+				  void (**)(MPI_File *, int *, ...),
+				  int * );
+
+/* Invoke the C++ error handler (this invokes a special C++ routine that
+ in turn calls the provided function.  That special routine also
+ resets the errorcode to MPI_SUCCESS to prevent the MPICH C++ error handling
+ code from throwing an exception when the user routine returns.
+*/
+int MPIR_File_call_cxx_errhandler( MPI_File *, int *,
+				   void (*)(MPI_File *, int *, ... ) );
+/*
+   These routines provide access to the MPI_Errhandler field within the
+   ROMIO MPI_File structure
+ */
+int MPIR_ROMIO_Get_file_errhand( MPI_File, MPI_Errhandler * );
+int MPIR_ROMIO_Set_file_errhand( MPI_File, MPI_Errhandler );
+
+/* FIXME: This routine is also defined in adio.h */
+int MPIO_Err_return_file( MPI_File, int );
+
+int MPIR_Err_create_code_valist(int, int, const char [], int, int,
+                                const char [], const char [], va_list );
+int MPIR_Err_is_fatal(int);
+
+void MPIR_Get_file_error_routine( MPI_Errhandler,
+                                  void (**)(MPI_File *, int *, ...),
+                                  int * );
+int MPIR_File_call_cxx_errhandler( MPI_File *, int *,
+                                   void (*)(MPI_File *, int *, ... ) );
+
+typedef int (* MPIR_Err_get_class_string_func_t)(int error, char *str, int length);
+void MPIR_Err_get_string( int, char *, int, MPIR_Err_get_class_string_func_t );
+
+struct MPIR_Comm;
+int MPID_Abort(struct MPIR_Comm *comm, int mpi_errno, int exit_code, const char *error_msg);
+
+int MPIR_Ext_assert_fail(const char *cond, const char *file_name, int line_num);
+
+#if (!defined(NDEBUG) && (@HAVE_ERROR_CHECKING@))
+#define MPIR_Ext_assert(a_)                                \
+    do {                                                   \
+        if (!(a_)) {                                       \
+            MPIR_Ext_assert_fail(#a_, __FILE__, __LINE__); \
+        }                                                  \
+    } while (0)
+#else
+#define MPIR_Ext_assert(a_) do {} while(0)
+#endif
+
+extern int MPIR_Ext_dbg_romio_terse_enabled;
+extern int MPIR_Ext_dbg_romio_typical_enabled;
+extern int MPIR_Ext_dbg_romio_verbose_enabled;
+
+/* a copy of MPIU_Ensure_Aint_fits_in_pointer for external use, slightly
+ * modified to use ROMIO's version of the pointer-casting macro */
+#define MPIR_Ext_ensure_Aint_fits_in_pointer(aint) \
+  MPIR_Ext_assert((aint) == (MPI_Aint)(uintptr_t) ADIOI_AINT_CAST_TO_VOID_PTR(aint));
+
+/* to be called early by ROMIO's initialization process in order to setup init-time
+ * glue code that cannot be initialized statically */
+int MPIR_Ext_init(void);
+
+void MPIR_Ext_cs_enter(void);
+void MPIR_Ext_cs_exit(void);
+void MPIR_Ext_cs_yield(void);
+
+/* to facilitate error checking */
+int MPIR_Ext_datatype_iscommitted(MPI_Datatype datatype);
+
+/* make comm split based on access to a common file system easier */
+int MPIR_Get_node_id(MPI_Comm comm, int rank, int *id);
+
+#endif
diff --git a/src/include/mpi_f77interface.h b/src/include/mpir_f77interface.h
similarity index 87%
rename from src/include/mpi_f77interface.h
rename to src/include/mpir_f77interface.h
index fd09a47..775f98e 100644
--- a/src/include/mpi_f77interface.h
+++ b/src/include/mpir_f77interface.h
@@ -4,6 +4,9 @@
  *      See COPYRIGHT in top-level directory.
  */
 
+#ifndef MPIR_F77INTERFACE_H_INCLUDED
+#define MPIR_F77INTERFACE_H_INCLUDED
+
 /* These functions are provided by the MPICH code for the Fortran interface,
    and provide the interfaces needed to keep track of which MPI internal
    objects need to have Fortran or Fortran 90 characteristics */
@@ -17,3 +20,5 @@ void MPIR_Errhandler_set_fc( MPI_Errhandler );
 #endif
 
 #define MPIR_ATTR_C_TO_FORTRAN(ATTR) ((ATTR)+1)
+
+#endif /* MPIR_F77INTERFACE_H_INCLUDED */
diff --git a/src/include/mpi_fortlogical.h b/src/include/mpir_fortlogical.h
similarity index 100%
rename from src/include/mpi_fortlogical.h
rename to src/include/mpir_fortlogical.h
diff --git a/src/include/mpir_func.h b/src/include/mpir_func.h
new file mode 100644
index 0000000..d4a37f3
--- /dev/null
+++ b/src/include/mpir_func.h
@@ -0,0 +1,211 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2008 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#ifndef MPIR_FUNC_H_INCLUDED
+#define MPIR_FUNC_H_INCLUDED
+
+/* In MPICH, each function has an "enter" and "exit" macro.  These can be
+ * used to add various features to each function at compile time, or they
+ * can be set to empty to provide the fastest possible production version.
+ *
+ * There are at this time three choices of features (beyond the empty choice)
+ * 1. timing
+ *    These collect data on when each function began and finished; the
+ *    resulting data can be displayed using special programs
+ * 2. Debug logging (selected with --enable-g=log)
+ *    Invokes MPL_DBG_MSG at the entry and exit for each routine
+ * 3. Additional memory validation of the memory arena (--enable-g=memarena)
+ */
+
+/* state declaration macros */
+#if defined(MPL_USE_DBG_LOGGING) || defined(MPICH_DEBUG_MEMARENA)
+#define MPID_MPI_STATE_DECL(a)
+#define MPID_MPI_INIT_STATE_DECL(a)
+#define MPID_MPI_FINALIZE_STATE_DECL(a)
+#define MPIDI_STATE_DECL(a)
+
+/* Tell the package to define the rest of the enter/exit macros in
+   terms of these */
+#define NEEDS_FUNC_ENTER_EXIT_DEFS 1
+#endif /* MPL_USE_DBG_LOGGING || MPICH_DEBUG_MEMARENA */
+
+/* function enter and exit macros */
+#if defined(MPL_USE_DBG_LOGGING)
+#define MPIR_FUNC_ENTER(a) MPL_DBG_MSG(MPL_DBG_ROUTINE_ENTER,TYPICAL,"Entering "#a)
+#elif defined(MPICH_DEBUG_MEMARENA)
+#define MPIR_FUNC_ENTER(a) MPL_trvalid("Entering " #a)
+#endif
+
+#if defined(MPL_USE_DBG_LOGGING)
+#define MPIR_FUNC_EXIT(a) MPL_DBG_MSG(MPL_DBG_ROUTINE_EXIT,TYPICAL,"Leaving "#a)
+#elif defined(MPICH_DEBUG_MEMARENA)
+#define MPIR_FUNC_EXIT(a) MPL_trvalid("Leaving " #a)
+#endif
+
+
+#if defined(NEEDS_FUNC_ENTER_EXIT_DEFS)
+
+#define MPID_MPI_FUNC_ENTER(a)			MPIR_FUNC_ENTER(a)
+#define MPID_MPI_FUNC_EXIT(a)			MPIR_FUNC_EXIT(a)
+#define MPID_MPI_PT2PT_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
+#define MPID_MPI_PT2PT_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
+#define MPID_MPI_PT2PT_FUNC_ENTER_FRONT(a)	MPIR_FUNC_ENTER(a)
+#define MPID_MPI_PT2PT_FUNC_EXIT_FRONT(a)	MPIR_FUNC_EXIT(a)
+#define MPID_MPI_PT2PT_FUNC_ENTER_BACK(a)	MPIR_FUNC_ENTER(a)
+#define MPID_MPI_PT2PT_FUNC_ENTER_BOTH(a)	MPIR_FUNC_ENTER(a)
+#define MPID_MPI_PT2PT_FUNC_EXIT_BACK(a)	MPIR_FUNC_EXIT(a)
+#define MPID_MPI_PT2PT_FUNC_EXIT_BOTH(a)	MPIR_FUNC_EXIT(a)
+#define MPID_MPI_COLL_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
+#define MPID_MPI_COLL_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
+#define MPID_MPI_RMA_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
+#define MPID_MPI_RMA_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
+#define MPID_MPI_INIT_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
+#define MPID_MPI_INIT_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
+#define MPID_MPI_FINALIZE_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
+#define MPID_MPI_FINALIZE_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
+
+/* device layer definitions */
+#define MPIDI_FUNC_ENTER(a)			MPIR_FUNC_ENTER(a)
+#define MPIDI_FUNC_EXIT(a)			MPIR_FUNC_EXIT(a)
+#define MPIDI_PT2PT_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
+#define MPIDI_PT2PT_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
+#define MPIDI_RMA_FUNC_ENTER(a)			MPIR_FUNC_ENTER(a)
+#define MPIDI_RMA_FUNC_EXIT(a)			MPIR_FUNC_EXIT(a)
+
+/* evaporate the timing macros since timing is not selected */
+#define MPIU_Timer_init(rank, size)
+#define MPIU_Timer_finalize()
+
+#else   /* ! NEEDS_FUNC_ENTER_EXIT_DEFS */
+
+/* Possible values for timing */
+#define MPID_TIMING_KIND_NONE 0
+#define MPID_TIMING_KIND_TIME 1
+#define MPID_TIMING_KIND_LOG 2
+#define MPID_TIMING_KIND_LOG_DETAILED 3
+#define MPID_TIMING_KIND_ALL 4
+#define MPID_TIMING_KIND_RUNTIME 5
+
+/* Routine tracing (see --enable-timing for control of this) */
+#if defined(HAVE_TIMING) && (HAVE_TIMING == MPID_TIMING_KIND_LOG || \
+    HAVE_TIMING == MPID_TIMING_KIND_LOG_DETAILED || \
+    HAVE_TIMING == MPID_TIMING_KIND_ALL || \
+    HAVE_TIMING == MPID_TIMING_KIND_RUNTIME)
+
+/* This include file contains the static state definitions */
+#include "mpiallstates.h"
+
+/* Possible values for USE_LOGGING */
+#define MPID_LOGGING_NONE 0
+#define MPID_LOGGING_RLOG 1
+#define MPID_LOGGING_EXTERNAL 4
+
+/* Include the macros specific to the selected logging library */
+#if (USE_LOGGING == MPID_LOGGING_RLOG)
+#include "rlog_macros.h"
+#elif (USE_LOGGING == MPID_LOGGING_EXTERNAL)
+#include "mpilogging.h"
+#else
+#error You must select a logging library if timing is enabled
+#endif
+
+/* MPI layer definitions */
+#define MPID_MPI_STATE_DECL(a)                MPIDU_STATE_DECL(a)
+#define MPID_MPI_INIT_STATE_DECL(a)           MPIDU_INIT_STATE_DECL(a)
+#define MPID_MPI_FINALIZE_STATE_DECL(a)       MPIDU_FINALIZE_STATE_DECL(a)
+
+#define MPID_MPI_FUNC_ENTER(a)                MPIDU_FUNC_ENTER(a)
+#define MPID_MPI_FUNC_EXIT(a)                 MPIDU_FUNC_EXIT(a)
+#define MPID_MPI_PT2PT_FUNC_ENTER(a)          MPIDU_PT2PT_FUNC_ENTER(a)
+#define MPID_MPI_PT2PT_FUNC_EXIT(a)           MPIDU_PT2PT_FUNC_EXIT(a)
+#define MPID_MPI_COLL_FUNC_ENTER(a)           MPIDU_COLL_FUNC_ENTER(a)
+#define MPID_MPI_COLL_FUNC_EXIT(a)            MPIDU_COLL_FUNC_EXIT(a)
+#define MPID_MPI_RMA_FUNC_ENTER(a)            MPIDU_RMA_FUNC_ENTER(a)
+#define MPID_MPI_RMA_FUNC_EXIT(a)             MPIDU_RMA_FUNC_EXIT(a)
+#define MPID_MPI_INIT_FUNC_ENTER(a)           MPIDU_INIT_FUNC_ENTER(a)
+#define MPID_MPI_INIT_FUNC_EXIT(a)            MPIDU_INIT_FUNC_EXIT(a)
+#define MPID_MPI_FINALIZE_FUNC_ENTER(a)       MPIDU_FINALIZE_FUNC_ENTER(a)
+#define MPID_MPI_FINALIZE_FUNC_EXIT(a)        MPIDU_FINALIZE_FUNC_EXIT(a)
+
+#define MPID_MPI_PT2PT_FUNC_ENTER_FRONT(a)    MPIDU_PT2PT_FUNC_ENTER_FRONT(a)
+#define MPID_MPI_PT2PT_FUNC_EXIT_FRONT(a)     MPIDU_PT2PT_FUNC_EXIT(a)
+#define MPID_MPI_PT2PT_FUNC_ENTER_BACK(a)     MPIDU_PT2PT_FUNC_ENTER(a)
+#define MPID_MPI_PT2PT_FUNC_EXIT_BACK(a)      MPIDU_PT2PT_FUNC_EXIT_BACK(a)
+#define MPID_MPI_PT2PT_FUNC_EXIT_BOTH(a)      MPIDU_PT2PT_FUNC_EXIT_BOTH(a)
+#define MPID_MPI_PT2PT_FUNC_ENTER_BOTH(a)     MPIDU_PT2PT_FUNC_ENTER_BOTH(a)
+
+#if defined(HAVE_TIMING) && (HAVE_TIMING == MPID_TIMING_KIND_LOG_DETAILED || HAVE_TIMING == MPID_TIMING_KIND_ALL)
+
+/* device layer definitions */
+#define MPIDI_STATE_DECL(a)                MPIDU_STATE_DECL(a)
+#define MPIDI_FUNC_ENTER(a)                MPIDU_FUNC_ENTER(a)
+#define MPIDI_FUNC_EXIT(a)                 MPIDU_FUNC_EXIT(a)
+#define MPIDI_PT2PT_FUNC_ENTER(a)          MPIDU_PT2PT_FUNC_ENTER(a)
+#define MPIDI_PT2PT_FUNC_EXIT(a)           MPIDU_PT2PT_FUNC_EXIT(a)
+#define MPIDI_RMA_FUNC_ENTER(a)            MPIDU_RMA_FUNC_ENTER(a)
+#define MPIDI_RMA_FUNC_EXIT(a)             MPIDU_RMA_FUNC_EXIT(a)
+
+#else
+
+#define MPIDI_STATE_DECL(a)
+#define MPIDI_FUNC_ENTER(a)
+#define MPIDI_FUNC_EXIT(a)
+#define MPIDI_PT2PT_FUNC_ENTER(a)
+#define MPIDI_PT2PT_FUNC_EXIT(a)
+#define MPIDI_RMA_FUNC_ENTER(a)
+#define MPIDI_RMA_FUNC_EXIT(a)
+
+#endif /* (HAVE_TIMING == MPID_TIMING_KIND_LOG_DETAILED || HAVE_TIMING == MPID_TIMING_KIND_ALL) */
+
+/* prototype the initialization/finalization functions */
+int MPIU_Timer_init(int rank, int size);
+int MPIU_Timer_finalize(void);
+int MPIR_Describe_timer_states(void);
+
+/* The original statistics macros (see the design documentation)
+   have been superceeded by the MPIR_T_PVAR_* macros (see mpit.h) */
+
+#else /* HAVE_TIMING and doing logging */
+
+/* evaporate all the timing macros if timing is not selected */
+#define MPIU_Timer_init(rank, size)
+#define MPIU_Timer_finalize()
+/* MPI layer */
+#define MPID_MPI_STATE_DECL(a)
+#define MPID_MPI_INIT_STATE_DECL(a)
+#define MPID_MPI_FINALIZE_STATE_DECL(a)
+#define MPID_MPI_FUNC_EXIT(a)
+#define MPID_MPI_FUNC_ENTER(a)
+#define MPID_MPI_PT2PT_FUNC_ENTER(a)
+#define MPID_MPI_PT2PT_FUNC_ENTER_FRONT(a)
+#define MPID_MPI_PT2PT_FUNC_EXIT_FRONT(a)
+#define MPID_MPI_PT2PT_FUNC_ENTER_BACK(a)
+#define MPID_MPI_PT2PT_FUNC_ENTER_BOTH(a)
+#define MPID_MPI_PT2PT_FUNC_EXIT(a)
+#define MPID_MPI_PT2PT_FUNC_EXIT_BACK(a)
+#define MPID_MPI_PT2PT_FUNC_EXIT_BOTH(a)
+#define MPID_MPI_COLL_FUNC_ENTER(a)
+#define MPID_MPI_COLL_FUNC_EXIT(a)
+#define MPID_MPI_RMA_FUNC_ENTER(a)
+#define MPID_MPI_RMA_FUNC_EXIT(a)
+#define MPID_MPI_INIT_FUNC_ENTER(a)
+#define MPID_MPI_INIT_FUNC_EXIT(a)
+#define MPID_MPI_FINALIZE_FUNC_ENTER(a)
+#define MPID_MPI_FINALIZE_FUNC_EXIT(a)
+/* device layer */
+#define MPIDI_STATE_DECL(a)
+#define MPIDI_FUNC_ENTER(a)
+#define MPIDI_FUNC_EXIT(a)
+#define MPIDI_PT2PT_FUNC_ENTER(a)
+#define MPIDI_PT2PT_FUNC_EXIT(a)
+#define MPIDI_RMA_FUNC_ENTER(a)
+#define MPIDI_RMA_FUNC_EXIT(a)
+
+#endif /* HAVE_TIMING */
+
+#endif /* NEEDS_FUNC_ENTER_EXIT_DEFS */
+
+#endif /* MPIR_FUNC_H_INCLUDED */
diff --git a/src/include/mpir_group.h b/src/include/mpir_group.h
new file mode 100644
index 0000000..0744212
--- /dev/null
+++ b/src/include/mpir_group.h
@@ -0,0 +1,119 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_GROUP_H_INCLUDED
+#define MPIR_GROUP_H_INCLUDED
+
+/*---------------------------------------------------------------------------
+ * Groups are *not* a major data structure in MPICH-2.  They are provided
+ * only because they are required for the group operations (e.g.,
+ * MPI_Group_intersection) and for the scalable RMA synchronization
+ *---------------------------------------------------------------------------*/
+/* This structure is used to implement the group operations such as
+   MPI_Group_translate_ranks */
+typedef struct MPIR_Group_pmap_t {
+    int          lpid;      /* local process id, from VCONN */
+    int          next_lpid; /* Index of next lpid (in lpid order) */
+    int          flag;      /* marker, used to implement group operations */
+} MPIR_Group_pmap_t;
+
+/* Any changes in the MPIR_Group structure must be made to the
+   predefined value in MPIR_Group_builtin for MPI_GROUP_EMPTY in
+   src/mpi/group/grouputil.c */
+/*S
+ MPIR_Group - Description of the Group data structure
+
+ The processes in the group of 'MPI_COMM_WORLD' have lpid values 0 to 'size'-1,
+ where 'size' is the size of 'MPI_COMM_WORLD'.  Processes created by
+ 'MPI_Comm_spawn' or 'MPI_Comm_spawn_multiple' or added by 'MPI_Comm_attach'
+ or
+ 'MPI_Comm_connect'
+ are numbered greater than 'size - 1' (on the calling process). See the
+ discussion of LocalPID values.
+
+ Note that when dynamic process creation is used, the pids are `not` unique
+ across the universe of connected MPI processes.  This is ok, as long as
+ pids are interpreted `only` on the process that owns them.
+
+ Only for MPI-1 are the lpid''s equal to the `global` pids.  The local pids
+ can be thought of as a reference not to the remote process itself, but
+ how the remote process can be reached from this process.  We may want to
+ have a structure 'MPID_Lpid_t' that contains information on the remote
+ process, such as (for TCP) the hostname, ip address (it may be different if
+ multiple interfaces are supported; we may even want plural ip addresses for
+ stripping communication), and port (or ports).  For shared memory connected
+ processes, it might have the address of a remote queue.  The lpid number
+ is an index into a table of 'MPID_Lpid_t'''s that contain this (device- and
+ method-specific) information.
+
+ Module:
+ Group-DS
+
+ S*/
+struct MPIR_Group {
+    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    int          size;           /* Size of a group */
+    int          rank;           /* rank of this process relative to this
+				    group */
+    int          idx_of_first_lpid;
+    MPIR_Group_pmap_t *lrank_to_lpid; /* Array mapping a local rank to local
+					 process number */
+    int          is_local_dense_monotonic; /* see NOTE-G1 */
+
+    /* We may want some additional data for the RMA syncrhonization calls */
+  /* Other, device-specific information */
+#ifdef MPID_DEV_GROUP_DECL
+    MPID_DEV_GROUP_DECL
+#endif
+};
+
+/* NOTE-G1: is_local_dense_monotonic will be true iff the group meets the
+ * following criteria:
+ * 1) the lpids are all in the range [0,size-1], i.e. a subset of comm world
+ * 2) the pids are sequentially numbered in increasing order, without any gaps,
+ *    stride, or repetitions
+ *
+ * This additional information allows us to handle the common case (insofar as
+ * group ops are common) for MPI_Group_translate_ranks where group2 is
+ * group_of(MPI_COMM_WORLD), or some simple subset.  This is an important use
+ * case for many MPI tool libraries, such as Scalasca.
+ */
+
+extern MPIU_Object_alloc_t MPIR_Group_mem;
+/* Preallocated group objects */
+#define MPIR_GROUP_N_BUILTIN 1
+extern MPIR_Group MPIR_Group_builtin[MPIR_GROUP_N_BUILTIN];
+extern MPIR_Group MPIR_Group_direct[];
+
+/* Object for empty group */
+extern MPIR_Group * const MPIR_Group_empty;
+
+#define MPIR_Group_add_ref( _group ) \
+    do { MPIU_Object_add_ref( _group ); } while (0)
+
+#define MPIR_Group_release_ref( _group, _inuse ) \
+     do { MPIU_Object_release_ref( _group, _inuse ); } while (0)
+
+void MPIR_Group_setup_lpid_list( MPIR_Group * );
+int MPIR_Group_create( int, MPIR_Group ** );
+int MPIR_Group_release(MPIR_Group *group_ptr);
+
+int MPIR_Group_compare_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2, int *result);
+int MPIR_Group_difference_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2, MPIR_Group **new_group_ptr);
+int MPIR_Group_excl_impl(MPIR_Group *group_ptr, int n, const int *ranks, MPIR_Group **new_group_ptr);
+int MPIR_Group_free_impl(MPIR_Group *group_ptr);
+int MPIR_Group_incl_impl(MPIR_Group *group_ptr, int n, const int *ranks, MPIR_Group **new_group_ptr);
+int MPIR_Group_intersection_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2, MPIR_Group **new_group_ptr);
+int MPIR_Group_range_excl_impl(MPIR_Group *group_ptr, int n, int ranges[][3], MPIR_Group **new_group_ptr);
+int MPIR_Group_range_incl_impl(MPIR_Group *group_ptr, int n, int ranges[][3], MPIR_Group **new_group_ptr);
+int MPIR_Group_translate_ranks_impl(MPIR_Group *group_ptr1, int n, const int *ranks1,
+                                     MPIR_Group *group_ptr2, int *ranks2);
+int MPIR_Group_union_impl(MPIR_Group *group_ptr1, MPIR_Group *group_ptr2, MPIR_Group **new_group_ptr);
+int MPIR_Group_check_subset(MPIR_Group * group_ptr, MPIR_Comm * comm_ptr);
+int MPIR_Group_init(void);
+
+#endif /* MPIR_GROUP_H_INCLUDED */
diff --git a/src/include/mpir_info.h b/src/include/mpir_info.h
new file mode 100644
index 0000000..e43aa83
--- /dev/null
+++ b/src/include/mpir_info.h
@@ -0,0 +1,105 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_INFO_H_INCLUDED
+#define MPIR_INFO_H_INCLUDED
+
+/* ------------------------------------------------------------------------- */
+/* Info */
+/*TInfoOverview.tex
+
+  'MPI_Info' provides a way to create a list of '(key,value)' pairs
+  where the 'key' and 'value' are both strings.  Because many routines, both
+  in the MPI implementation and in related APIs such as the PMI process
+  management interface, require 'MPI_Info' arguments, we define a simple
+  structure for each 'MPI_Info' element.  Elements are allocated by the
+  generic object allocator; the head element is always empty (no 'key'
+  or 'value' is defined on the head element).
+
+  For simplicity, we have not abstracted the info data structures;
+  routines that want to work with the linked list may do so directly.
+  Because the 'MPI_Info' type is a handle and not a pointer, an MPIU
+  (utility) routine is provided to handle the
+  deallocation of 'MPIR_Info' elements.  See the implementation of
+  'MPI_Info_create' for how an Info type is allocated.
+
+  Thread Safety:
+
+  The info interface itself is not thread-robust.  In particular, the routines
+  'MPI_INFO_GET_NKEYS' and 'MPI_INFO_GET_NTHKEY' assume that no other
+  thread modifies the info key.  (If the info routines had the concept
+  of a next value, they would not be thread safe.  As it stands, a user
+  must be careful if several threads have access to the same info object.)
+  Further, 'MPI_INFO_DUP', while not
+  explicitly advising implementers to be careful of one thread modifying the
+  'MPI_Info' structure while 'MPI_INFO_DUP' is copying it, requires that the
+  operation take place in a thread-safe manner.
+  There isn'' much that we can do about these cases.  There are other cases
+  that must be handled.  In particular, multiple threads are allowed to
+  update the same info value.  Thus, all of the update routines must be thread
+  safe; the simple implementation used in the MPICH implementation uses locks.
+  Note that the 'MPI_Info_delete' call does not need a lock; the defintion of
+  thread-safety means that any order of the calls functions correctly; since
+  it invalid either to delete the same 'MPI_Info' twice or to modify an
+  'MPI_Info' that has been deleted, only one thread at a time can call
+  'MPI_Info_free' on any particular 'MPI_Info' value.
+
+  T*/
+/*S
+  MPIR_Info - Structure of an MPID info
+
+  Notes:
+  There is no reference count because 'MPI_Info' values, unlike other MPI
+  objects, may be changed after they are passed to a routine without
+  changing the routine''s behavior.  In other words, any routine that uses
+  an 'MPI_Info' object must make a copy or otherwise act on any info value
+  that it needs.
+
+  A linked list is used because the typical 'MPI_Info' list will be short
+  and a simple linked list is easy to implement and to maintain.  Similarly,
+  a single structure rather than separate header and element structures are
+  defined for simplicity.  No separate thread lock is provided because
+  info routines are not performance critical; they may use the single
+  critical section lock in the 'MPIR_Process' structure when they need a
+  thread lock.
+
+  This particular form of linked list (in particular, with this particular
+  choice of the first two members) is used because it allows us to use
+  the same routines to manage this list as are used to manage the
+  list of free objects (in the file 'src/util/mem/handlemem.c').  In
+  particular, if lock-free routines for updating a linked list are
+  provided, they can be used for managing the 'MPIR_Info' structure as well.
+
+  The MPI standard requires that keys can be no less that 32 characters and
+  no more than 255 characters.  There is no mandated limit on the size
+  of values.
+
+  Module:
+  Info-DS
+  S*/
+struct MPIR_Info {
+    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    struct MPIR_Info   *next;
+    char               *key;
+    char               *value;
+};
+extern MPIU_Object_alloc_t MPIR_Info_mem;
+/* Preallocated info objects */
+#define MPIR_INFO_N_BUILTIN 2
+extern MPIR_Info MPIR_Info_builtin[MPIR_INFO_N_BUILTIN];
+extern MPIR_Info MPIR_Info_direct[];
+
+int MPIR_Info_get_impl(MPIR_Info *info_ptr, const char *key, int valuelen, char *value, int *flag);
+void MPIR_Info_get_nkeys_impl(MPIR_Info *info_ptr, int *nkeys);
+int MPIR_Info_get_nthkey_impl(MPIR_Info *info, int n, char *key);
+void MPIR_Info_get_valuelen_impl(MPIR_Info *info_ptr, const char *key, int *valuelen, int *flag);
+int MPIR_Info_set_impl(MPIR_Info *info_ptr, const char *key, const char *value);
+int MPIR_Info_dup_impl(MPIR_Info *info_ptr, MPIR_Info **new_info_ptr);
+void MPIU_Info_free( MPIR_Info *info_ptr );
+int MPIU_Info_alloc(MPIR_Info **info_p_p);
+
+#endif /* MPIR_INFO_H_INCLUDED */
diff --git a/src/include/mpimem.h b/src/include/mpir_mem.h
similarity index 96%
rename from src/include/mpimem.h
rename to src/include/mpir_mem.h
index 97bbd2d..58381fe 100644
--- a/src/include/mpimem.h
+++ b/src/include/mpir_mem.h
@@ -3,12 +3,10 @@
  *  (C) 2001 by Argonne National Laboratory.
  *      See COPYRIGHT in top-level directory.
  */
-#ifndef MPIMEM_H_INCLUDED
-#define MPIMEM_H_INCLUDED
+#ifndef MPIR_MEM_H_INCLUDED
+#define MPIR_MEM_H_INCLUDED
 
-#ifndef MPICHCONF_H_INCLUDED
-#error 'mpimem.h requires that mpichconf.h be included first'
-#endif
+#include "mpichconf.h"
 
 /* Make sure that we have the definitions for the malloc routines and size_t */
 #include <stdio.h>
@@ -21,7 +19,6 @@
 extern "C" {
 #endif
 
-#include "mpichconf.h"
 #include "mpl.h"
 
 /* Define attribute as empty if it has no definition */
@@ -34,7 +31,7 @@ extern MPL_dbg_class MPIR_DBG_STRING;
 #endif /* MPL_USE_DBG_LOGGING */
 
 /* ------------------------------------------------------------------------- */
-/* mpimem.h */
+/* mpir_mem.h */
 /* ------------------------------------------------------------------------- */
 /* Memory allocation */
 /* style: allow:malloc:2 sig:0 */
@@ -79,12 +76,14 @@ extern MPL_dbg_class MPIR_DBG_STRING;
   D*/
 
 /* Define the string copy and duplication functions */
-/* ---------------------------------------------------------------------- */
-/* FIXME: Global types like this need to be discussed and agreed to */
-typedef int MPIU_BOOL;
-
 /* ------------------------------------------------------------------------- */
 
+#define MPIU_Memcpy(dst, src, len)                \
+    do {                                          \
+        MPIU_MEM_CHECK_MEMCPY((dst),(src),(len)); \
+        memcpy((dst), (src), (len));              \
+    } while (0)
+
 #ifdef USE_MEMORY_TRACING
 
 /* Define these as invalid C to catch their use in the code */
@@ -261,10 +260,11 @@ if (pointer_) { \
 /* valgrind macros are now provided by MPL (via mpl.h included in mpiimpl.h) */
 
 /* ------------------------------------------------------------------------- */
-/* end of mpimem.h */
+/* end of mpir_mem.h */
 /* ------------------------------------------------------------------------- */
 
 #if defined(__cplusplus)
 }
 #endif
-#endif
+
+#endif  /* MPIR_MEM_H_INCLUDED */
diff --git a/src/include/mpir_misc.h b/src/include/mpir_misc.h
new file mode 100644
index 0000000..ca551c7
--- /dev/null
+++ b/src/include/mpir_misc.h
@@ -0,0 +1,99 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_MISC_H_INCLUDED
+#define MPIR_MISC_H_INCLUDED
+
+#define MPIR_UNIVERSE_SIZE_NOT_SET -1
+#define MPIR_UNIVERSE_SIZE_NOT_AVAILABLE -2
+
+#define MPIR_FINALIZE_CALLBACK_PRIO 5
+#define MPIR_FINALIZE_CALLBACK_HANDLE_CHECK_PRIO 1
+#define MPIR_FINALIZE_CALLBACK_DEFAULT_PRIO 0
+#define MPIR_FINALIZE_CALLBACK_MAX_PRIO 10
+
+typedef int MPIU_BOOL;
+
+/* Define a typedef for the errflag value used by many internal
+ * functions.  If an error needs to be returned, these values can be
+ * used to signal such.  More details can be found further down in the
+ * code with the bitmasking logic */
+typedef enum {
+    MPIR_ERR_NONE = MPI_SUCCESS,
+    MPIR_ERR_PROC_FAILED = MPIX_ERR_PROC_FAILED,
+    MPIR_ERR_OTHER = MPI_ERR_OTHER
+} MPIR_Errflag_t;
+
+/*E
+  MPIR_Lang_t - Known language bindings for MPI
+
+  A few operations in MPI need to know what language they were called from
+  or created by.  This type enumerates the possible languages so that
+  the MPI implementation can choose the correct behavior.  An example of this
+  are the keyval attribute copy and delete functions.
+
+  Module:
+  Attribute-DS
+  E*/
+typedef enum MPIR_Lang_t {
+    MPIR_LANG__C
+#ifdef HAVE_FORTRAN_BINDING
+    , MPIR_LANG__FORTRAN
+    , MPIR_LANG__FORTRAN90
+#endif
+#ifdef HAVE_CXX_BINDING
+    , MPIR_LANG__CXX
+#endif
+} MPIR_Lang_t;
+
+typedef enum MPIR_MPI_State_t {
+    MPICH_MPI_STATE__PRE_INIT=0,
+    MPICH_MPI_STATE__IN_INIT,
+    MPICH_MPI_STATE__POST_INIT,
+    MPICH_MPI_STATE__POST_FINALIZED
+} MPIR_MPI_State_t;
+
+extern const char MPIR_Version_string[];
+extern const char MPIR_Version_date[];
+extern const char MPIR_Version_configure[];
+extern const char MPIR_Version_device[];
+extern const char MPIR_Version_CC[];
+extern const char MPIR_Version_CXX[];
+extern const char MPIR_Version_F77[];
+extern const char MPIR_Version_FC[];
+
+int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
+                   void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype);
+
+/*@ MPIR_Add_finalize - Add a routine to be called when MPI_Finalize is invoked
+
++ routine - Routine to call
+. extra   - Void pointer to data to pass to the routine
+- priority - Indicates the priority of this callback and controls the order
+  in which callbacks are executed.  Use a priority of zero for most handlers;
+  higher priorities will be executed first.
+
+Notes:
+  The routine 'MPID_Finalize' is executed with priority
+  'MPIR_FINALIZE_CALLBACK_PRIO' (currently defined as 5).  Handlers with
+  a higher priority execute before 'MPID_Finalize' is called; those with
+  a lower priority after 'MPID_Finalize' is called.
+@*/
+void MPIR_Add_finalize( int (*routine)( void * ), void *extra, int priority );
+
+/* Routines for determining local and remote processes */
+int MPIU_Find_local_and_external(struct MPIR_Comm *comm, int *local_size_p, int *local_rank_p, int **local_ranks_p,
+                                 int *external_size_p, int *external_rank_p, int **external_ranks_p,
+                                 int **intranode_table, int **internode_table_p);
+int MPIU_Get_internode_rank(MPIR_Comm *comm_ptr, int r);
+int MPIU_Get_intranode_rank(MPIR_Comm *comm_ptr, int r);
+
+int MPIR_Close_port_impl(const char *port_name);
+int MPIR_Open_port_impl(MPIR_Info *info_ptr, char *port_name);
+int MPIR_Cancel_impl(MPIR_Request *request_ptr);
+
+#endif /* MPIR_MISC_H_INCLUDED */
diff --git a/src/include/mpir_misc_post.h b/src/include/mpir_misc_post.h
new file mode 100644
index 0000000..ca53ee7
--- /dev/null
+++ b/src/include/mpir_misc_post.h
@@ -0,0 +1,35 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_MISC_POST_H_INCLUDED
+#define MPIR_MISC_POST_H_INCLUDED
+
+/* Pull the error status out of the tag space and put it into an errflag. */
+#undef FUNCNAME
+#define FUNCNAME MPIR_process_status
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+static inline void MPIR_Process_status(MPI_Status *status, MPIR_Errflag_t *errflag)
+{
+    if (MPI_PROC_NULL != status->MPI_SOURCE &&
+        (MPIX_ERR_REVOKED == MPIR_ERR_GET_CLASS(status->MPI_ERROR) ||
+        MPIX_ERR_PROC_FAILED == MPIR_ERR_GET_CLASS(status->MPI_ERROR) ||
+        MPIR_TAG_CHECK_ERROR_BIT(status->MPI_TAG)) && !*errflag) {
+        /* If the receive was completed within the MPID_Recv, handle the
+        * errflag here. */
+        if (MPIR_TAG_CHECK_PROC_FAILURE_BIT(status->MPI_TAG) ||
+            MPIX_ERR_PROC_FAILED == MPIR_ERR_GET_CLASS(status->MPI_ERROR)) {
+            *errflag = MPIR_ERR_PROC_FAILED;
+            MPIR_TAG_CLEAR_ERROR_BITS(status->MPI_TAG);
+        } else {
+            *errflag = MPIR_ERR_OTHER;
+            MPIR_TAG_CLEAR_ERROR_BITS(status->MPI_TAG);
+        }
+    }
+}
+
+#endif /* MPIR_MISC_POST_H_INCLUDED */
diff --git a/src/include/mpihandlemem.h b/src/include/mpir_objects.h
similarity index 66%
rename from src/include/mpihandlemem.h
rename to src/include/mpir_objects.h
index 1de6dda..a11ebc1 100644
--- a/src/include/mpihandlemem.h
+++ b/src/include/mpir_objects.h
@@ -3,11 +3,99 @@
  *  (C) 2001 by Argonne National Laboratory.
  *      See COPYRIGHT in top-level directory.
  */
-#ifndef MPIHANDLE_H_INCLUDED
-#define MPIHANDLE_H_INCLUDED
+
+#ifndef MPIR_OBJECTS_H_INCLUDED
+#define MPIR_OBJECTS_H_INCLUDED
 
 #include "mpichconf.h"
 
+/*TDSOverview.tex
+
+  MPI has a number of data structures, most of which are represented by
+  an opaque handle in an MPI program.  In the MPICH implementation of MPI,
+  these handles are represented
+  as integers; this makes implementation of the C/Fortran handle transfer
+  calls (part of MPI-2) easy.
+
+  MPID objects (again with the possible exception of 'MPI_Request's)
+  are allocated by a common set of object allocation functions.
+  These are
+.vb
+    void *MPIU_Handle_obj_create( MPIU_Object_alloc_t *objmem )
+    void MPIU_Handle_obj_destroy( MPIU_Object_alloc_t *objmem, void *object )
+.ve
+  where 'objmem' is a pointer to a memory allocation object that knows
+  enough to allocate objects, including the
+  size of the object and the location of preallocated memory, as well
+  as the type of memory allocator.  By providing the routines to allocate and
+  free the memory, we make it easy to use the same interface to allocate both
+  local and shared memory for objects (always using the same kind for each
+  type of object).
+
+  The names create/destroy were chosen because they are different from
+  new/delete (C++ operations) and malloc/free.
+  Any name choice will have some conflicts with other uses, of course.
+
+  Reference Counts:
+  Many MPI objects have reference count semantics.
+  The semantics of MPI require that many objects that have been freed by the
+  user
+  (e.g., with 'MPI_Type_free' or 'MPI_Comm_free') remain valid until all
+  pending
+  references to that object (e.g., by an 'MPI_Irecv') are complete.  There
+  are several ways to implement this; MPICH uses `reference counts` in the
+  objects.  To support the 'MPI_THREAD_MULTIPLE' level of thread-safety, these
+  reference counts must be accessed and updated atomically.
+  A reference count for
+  `any` object can be incremented (atomically)
+  with 'MPIU_Object_add_ref(objptr)'
+  and decremented with 'MPIU_Object_release_ref(objptr,newval_ptr)'.
+  These have been designed so that then can be implemented as inlined
+  macros rather than function calls, even in the multithreaded case, and
+  can use special processor instructions that guarantee atomicity to
+  avoid thread locks.
+  The decrement routine sets the value pointed at by 'inuse_ptr' to 0 if
+  the postdecrement value of the reference counter is zero, and to a non-zero
+  value otherwise.  If this value is zero, then the routine that decremented
+  the
+  reference count should free the object.  This may be as simple as
+  calling 'MPIU_Handle_obj_destroy' (for simple objects with no other allocated
+  storage) or may require calling a separate routine to destroy the object.
+  Because MPI uses 'MPI_xxx_free' to both decrement the reference count and
+  free the object if the reference count is zero, we avoid the use of 'free'
+  in the MPID routines.
+
+  The 'inuse_ptr' approach is used rather than requiring the post-decrement
+  value because, for reference-count semantics, all that is necessary is
+  to know when the reference count reaches zero, and this can sometimes
+  be implemented more cheaply that requiring the post-decrement value (e.g.,
+  on IA32, there is an instruction for this operation).
+
+  Question:
+  Should we state that this is a macro so that we can use a register for
+  the output value?  That avoids a store.  Alternately, have the macro
+  return the value as if it was a function?
+
+  Structure Definitions:
+  The structure definitions in this document define `only` that part of
+  a structure that may be used by code that is making use of the ADI.
+  Thus, some structures, such as 'MPID_Comm', have many defined fields;
+  these are used to support MPI routines such as 'MPI_Comm_size' and
+  'MPI_Comm_remote_group'.  Other structures may have few or no defined
+  members; these structures have no fields used outside of the ADI.
+  In C++ terms,  all members of these structures are 'private'.
+
+  For the initial implementation, we expect that the structure definitions
+  will be designed for the multimethod device.  However, all items that are
+  specific to a particular device (including the multi-method device)
+  will be placed at the end of the structure;
+  the document will clearly identify the members that all implementations
+  will provide.  This simplifies much of the code in both the ADI and the
+  implementation of the MPI routines because structure member can be directly
+  accessed rather than using some macro or C++ style method interface.
+
+ T*/
+
 /*TOpaqOverview.tex
   MPI Opaque Objects:
 
@@ -30,8 +118,8 @@
 
   T*/
 
-/* Known MPI object types.  These are used for both the error handlers 
-   and for the handles.  This is a 4 bit value.  0 is reserved for so 
+/* Known MPI object types.  These are used for both the error handlers
+   and for the handles.  This is a 4 bit value.  0 is reserved for so
    that all-zero handles can be flagged as an error. */
 /*E
   MPIR_Object_kind - Object kind (communicator, window, or file)
@@ -40,8 +128,8 @@
   This enum is used by keyvals and errhandlers to indicate the type of
   object for which MPI opaque types the data is valid.  These are defined
   as bits to allow future expansion to the case where an object is value for
-  multiple types (for example, we may want a universal error handler for 
-  errors return).  This is also used to indicate the type of MPI object a 
+  multiple types (for example, we may want a universal error handler for
+  errors return).  This is also used to indicate the type of MPI object a
   MPI handle represents.  It is an enum because only this applies only the
   the MPI and internal MPICH objects.
 
@@ -72,7 +160,8 @@ typedef enum MPIR_Object_kind {
   MPIR_PROCGROUP  = 0xc,               /* These are internal device objects */
   MPIR_VCONN      = 0xd,
   MPIR_GREQ_CLASS = 0xf
-  } MPIR_Object_kind;
+} MPIR_Object_kind;
+
 
 #define HANDLE_MPI_KIND_SHIFT 26
 #define HANDLE_GET_MPI_KIND(a) ( ((a)&0x3c000000) >> HANDLE_MPI_KIND_SHIFT )
@@ -382,4 +471,86 @@ extern void *MPIU_Handle_obj_alloc(MPIU_Object_alloc_t *);
 extern void *MPIU_Handle_obj_alloc_unsafe(MPIU_Object_alloc_t *);
 extern void MPIU_Handle_obj_free( MPIU_Object_alloc_t *, void * );
 void *MPIU_Handle_get_ptr_indirect( int, MPIU_Object_alloc_t * );
-#endif
+
+
+/* Convert Handles to objects for MPI types that have predefined objects */
+/* TODO examine generated assembly for this construct, it's probably suboptimal
+ * on Blue Gene.  An if/else if/else might help the compiler out.  It also lets
+ * us hint that one case is likely(), usually the BUILTIN case. */
+#define MPIR_Getb_ptr(kind,a,bmsk,ptr)                                  \
+{                                                                       \
+   switch (HANDLE_GET_KIND(a)) {                                        \
+      case HANDLE_KIND_BUILTIN:                                         \
+          ptr=MPIR_##kind##_builtin+((a)&(bmsk));                       \
+          break;                                                        \
+      case HANDLE_KIND_DIRECT:                                          \
+          ptr=MPIR_##kind##_direct+HANDLE_INDEX(a);                     \
+          break;                                                        \
+      case HANDLE_KIND_INDIRECT:                                        \
+          ptr=((MPIR_##kind*)                                           \
+               MPIU_Handle_get_ptr_indirect(a,&MPIR_##kind##_mem));     \
+          break;                                                        \
+      case HANDLE_KIND_INVALID:                                         \
+      default:								\
+          ptr=0;							\
+          break;							\
+    }                                                                   \
+}
+
+/* Convert handles to objects for MPI types that do _not_ have any predefined
+   objects */
+#define MPIR_Get_ptr(kind,a,ptr)					\
+{									\
+   switch (HANDLE_GET_KIND(a)) {					\
+      case HANDLE_KIND_DIRECT:						\
+          ptr=MPIR_##kind##_direct+HANDLE_INDEX(a);			\
+          break;							\
+      case HANDLE_KIND_INDIRECT:					\
+          ptr=((MPIR_##kind*)						\
+               MPIU_Handle_get_ptr_indirect(a,&MPIR_##kind##_mem));	\
+          break;							\
+      case HANDLE_KIND_INVALID:						\
+      case HANDLE_KIND_BUILTIN:						\
+      default:								\
+          ptr=0;							\
+          break;							\
+     }									\
+}
+
+/* FIXME: the masks should be defined with the handle definitions instead
+   of inserted here as literals */
+#define MPIR_Comm_get_ptr(a,ptr)       MPIR_Getb_ptr(Comm,a,0x03ffffff,ptr)
+#define MPIR_Group_get_ptr(a,ptr)      MPIR_Getb_ptr(Group,a,0x03ffffff,ptr)
+#define MPIR_Errhandler_get_ptr(a,ptr) MPIR_Getb_ptr(Errhandler,a,0x3,ptr)
+#define MPIR_Op_get_ptr(a,ptr)         MPIR_Getb_ptr(Op,a,0x000000ff,ptr)
+#define MPIR_Info_get_ptr(a,ptr)       MPIR_Getb_ptr(Info,a,0x03ffffff,ptr)
+#define MPIR_Win_get_ptr(a,ptr)        MPIR_Get_ptr(Win,a,ptr)
+#define MPIR_Request_get_ptr(a,ptr)    MPIR_Get_ptr(Request,a,ptr)
+#define MPIR_Grequest_class_get_ptr(a,ptr) MPIR_Get_ptr(Grequest_class,a,ptr)
+/* Keyvals have a special format. This is roughly MPID_Get_ptrb, but
+   the handle index is in a smaller bit field.  In addition,
+   there is no storage for the builtin keyvals.
+   For the indirect case, we mask off the part of the keyval that is
+   in the bits normally used for the indirect block index.
+*/
+#define MPIR_Keyval_get_ptr(a,ptr)     \
+{                                                                       \
+   switch (HANDLE_GET_KIND(a)) {                                        \
+      case HANDLE_KIND_BUILTIN:                                         \
+          ptr=0;                                                        \
+          break;                                                        \
+      case HANDLE_KIND_DIRECT:                                          \
+          ptr=MPIR_Keyval_direct+((a)&0x3fffff);                        \
+          break;                                                        \
+      case HANDLE_KIND_INDIRECT:                                        \
+          ptr=((MPIR_Keyval*)                                           \
+             MPIU_Handle_get_ptr_indirect((a)&0xfc3fffff,&MPIR_Keyval_mem)); \
+          break;                                                        \
+      case HANDLE_KIND_INVALID:                                         \
+      default:								\
+          ptr=0;							\
+          break;							\
+    }                                                                   \
+}
+
+#endif  /* MPIR_OBJECTS_H_INCLUDED */
diff --git a/src/include/mpir_op.h b/src/include/mpir_op.h
new file mode 100644
index 0000000..7a22a7a
--- /dev/null
+++ b/src/include/mpir_op.h
@@ -0,0 +1,169 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_OP_H_INCLUDED
+#define MPIR_OP_H_INCLUDED
+
+/*E
+  MPIR_Op_kind - Enumerates types of MPI_Op types
+
+  Notes:
+  These are needed for implementing 'MPI_Accumulate', since only predefined
+  operations are allowed for that operation.
+
+  A gap in the enum values was made allow additional predefined operations
+  to be inserted.  This might include future additions to MPI or experimental
+  extensions (such as a Read-Modify-Write operation).
+
+  Module:
+  Collective-DS
+  E*/
+typedef enum MPIR_Op_kind {
+    MPIR_OP_KIND__NULL=0,
+    MPIR_OP_KIND__MAX=1,
+    MPIR_OP_KIND__MIN=2,
+    MPIR_OP_KIND__SUM=3,
+    MPIR_OP_KIND__PROD=4,
+    MPIR_OP_KIND__LAND=5,
+    MPIR_OP_KIND__BAND=6,
+    MPIR_OP_KIND__LOR=7,
+    MPIR_OP_KIND__BOR=8,
+    MPIR_OP_KIND__LXOR=9,
+    MPIR_OP_KIND__BXOR=10,
+    MPIR_OP_KIND__MAXLOC=11,
+    MPIR_OP_KIND__MINLOC=12,
+    MPIR_OP_KIND__REPLACE=13,
+    MPIR_OP_KIND__NO_OP=14,
+    MPIR_OP_KIND__USER_NONCOMMUTE=32,
+    MPIR_OP_KIND__USER=33
+} MPIR_Op_kind;
+
+/*S
+  MPIR_User_function - Definition of a user function for MPI_Op types.
+
+  Notes:
+  This includes a 'const' to make clear which is the 'in' argument and
+  which the 'inout' argument, and to indicate that the 'count' and 'datatype'
+  arguments are unchanged (they are addresses in an attempt to allow
+  interoperation with Fortran).  It includes 'restrict' to emphasize that
+  no overlapping operations are allowed.
+
+  We need to include a Fortran version, since those arguments will
+  have type 'MPI_Fint *' instead.  We also need to add a test to the
+  test suite for this case; in fact, we need tests for each of the handle
+  types to ensure that the transfered handle works correctly.
+
+  This is part of the collective module because user-defined operations
+  are valid only for the collective computation routines and not for
+  RMA accumulate.
+
+  Yes, the 'restrict' is in the correct location.  C compilers that
+  support 'restrict' should be able to generate code that is as good as a
+  Fortran compiler would for these functions.
+
+  We should note on the manual pages for user-defined operations that
+  'restrict' should be used when available, and that a cast may be
+  required when passing such a function to 'MPI_Op_create'.
+
+  Question:
+  Should each of these function types have an associated typedef?
+
+  Should there be a C++ function here?
+
+  Module:
+  Collective-DS
+  S*/
+typedef union MPIR_User_function {
+    void (*c_function) ( const void *, void *,
+			 const int *, const MPI_Datatype * );
+    void (*f77_function) ( const void *, void *,
+			  const MPI_Fint *, const MPI_Fint * );
+} MPIR_User_function;
+/* FIXME: Should there be "restrict" in the definitions above, e.g.,
+   (*c_function)( const void restrict * , void restrict *, ... )? */
+
+/*S
+  MPIR_Op - MPI_Op structure
+
+  Notes:
+  All of the predefined functions are commutative.  Only user functions may
+  be noncummutative, so there are two separate op types for commutative and
+  non-commutative user-defined operations.
+
+  Operations do not require reference counts because there are no nonblocking
+  operations that accept user-defined operations.  Thus, there is no way that
+  a valid program can free an 'MPI_Op' while it is in use.
+
+  Module:
+  Collective-DS
+  S*/
+typedef struct MPIR_Op {
+     MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+     MPIR_Op_kind       kind;
+     MPIR_Lang_t        language;
+     MPIR_User_function function;
+  } MPIR_Op;
+#define MPIR_OP_N_BUILTIN 15
+extern MPIR_Op MPIR_Op_builtin[MPIR_OP_N_BUILTIN];
+extern MPIR_Op MPIR_Op_direct[];
+extern MPIU_Object_alloc_t MPIR_Op_mem;
+
+#define MPIR_Op_add_ref(_op) \
+    do { MPIU_Object_add_ref(_op); } while (0)
+#define MPIR_Op_release_ref( _op, _inuse ) \
+    do { MPIU_Object_release_ref( _op, _inuse ); } while (0)
+
+/* release and free-if-not-in-use helper */
+#define MPIR_Op_release(op_p_)                           \
+    do {                                                 \
+        int in_use_;                                     \
+        MPIR_Op_release_ref((op_p_), &in_use_);          \
+        if (!in_use_) {                                  \
+            MPIU_Handle_obj_free(&MPIR_Op_mem, (op_p_)); \
+        }                                                \
+    } while (0)
+
+void MPIR_MAXF  ( void *, void *, int *, MPI_Datatype * ) ;
+void MPIR_MINF  ( void *, void *, int *, MPI_Datatype * ) ;
+void MPIR_SUM  ( void *, void *, int *, MPI_Datatype * ) ;
+void MPIR_PROD  ( void *, void *, int *, MPI_Datatype * ) ;
+void MPIR_LAND  ( void *, void *, int *, MPI_Datatype * ) ;
+void MPIR_BAND  ( void *, void *, int *, MPI_Datatype * ) ;
+void MPIR_LOR  ( void *, void *, int *, MPI_Datatype * ) ;
+void MPIR_BOR  ( void *, void *, int *, MPI_Datatype * ) ;
+void MPIR_LXOR  ( void *, void *, int *, MPI_Datatype * ) ;
+void MPIR_BXOR  ( void *, void *, int *, MPI_Datatype * ) ;
+void MPIR_MAXLOC  ( void *, void *, int *, MPI_Datatype * ) ;
+void MPIR_MINLOC  ( void *, void *, int *, MPI_Datatype * ) ;
+void MPIR_REPLACE  ( void *, void *, int *, MPI_Datatype * ) ;
+void MPIR_NO_OP  ( void *, void *, int *, MPI_Datatype * ) ;
+
+int MPIR_MAXF_check_dtype  ( MPI_Datatype ) ;
+int MPIR_MINF_check_dtype ( MPI_Datatype ) ;
+int MPIR_SUM_check_dtype  ( MPI_Datatype ) ;
+int MPIR_PROD_check_dtype  ( MPI_Datatype ) ;
+int MPIR_LAND_check_dtype  ( MPI_Datatype ) ;
+int MPIR_BAND_check_dtype  ( MPI_Datatype ) ;
+int MPIR_LOR_check_dtype  ( MPI_Datatype ) ;
+int MPIR_BOR_check_dtype  ( MPI_Datatype ) ;
+int MPIR_LXOR_check_dtype ( MPI_Datatype ) ;
+int MPIR_BXOR_check_dtype  ( MPI_Datatype ) ;
+int MPIR_MAXLOC_check_dtype  ( MPI_Datatype ) ;
+int MPIR_MINLOC_check_dtype  ( MPI_Datatype ) ;
+int MPIR_REPLACE_check_dtype  ( MPI_Datatype ) ;
+int MPIR_NO_OP_check_dtype  ( MPI_Datatype ) ;
+
+#define MPIR_PREDEF_OP_COUNT 14
+extern MPI_User_function *MPIR_Op_table[];
+
+typedef int (MPIR_Op_check_dtype_fn) ( MPI_Datatype );
+extern MPIR_Op_check_dtype_fn *MPIR_Op_check_dtype_table[];
+
+#define MPIR_OP_HDL_TO_FN(op) MPIR_Op_table[((op)&0xf) - 1]
+#define MPIR_OP_HDL_TO_DTYPE_FN(op) MPIR_Op_check_dtype_table[((op)&0xf) - 1]
+
+#endif /* MPIR_OP_H_INCLUDED */
diff --git a/src/include/oputil.h b/src/include/mpir_op_util.h
similarity index 99%
rename from src/include/oputil.h
rename to src/include/mpir_op_util.h
index 0a6f23f..96d0b6e 100644
--- a/src/include/oputil.h
+++ b/src/include/mpir_op_util.h
@@ -5,8 +5,8 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-#ifndef OPUTIL_H_INCLUDED
-#define OPUTIL_H_INCLUDED
+#ifndef MPIR_OP_UTIL_H_INCLUDED
+#define MPIR_OP_UTIL_H_INCLUDED
 
 /* The MPI Standard (MPI-2.1, sec 5.9.2) defines which predfined reduction
    operators are valid by groups of types:
@@ -232,7 +232,7 @@ MPIR_OP_TYPE_GROUP(FORTRAN_INTEGER)
 /* C types needed to support some of the complex types.
 
    FIXME These are a hack in most cases, but they seem to work in practice
-   and it's what we were doing prior to the oputil.h refactoring. */
+   and it's what we were doing prior to the mpir_op_util.h refactoring. */
 typedef struct { 
     float re;
     float im;
@@ -372,4 +372,4 @@ typedef struct {
     MPIR_OP_TYPE_GROUP(COMPLEX_EXTRA)         \
     MPIR_OP_TYPE_GROUP(BYTE_EXTRA)
 
-#endif /* OPUTIL_H_INCLUDED */
+#endif /* MPIR_OP_UTIL_H_INCLUDED */
diff --git a/src/include/mpir_pointers.h b/src/include/mpir_pointers.h
new file mode 100644
index 0000000..763f8cd
--- /dev/null
+++ b/src/include/mpir_pointers.h
@@ -0,0 +1,79 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_POINTERS_H_INCLUDED
+#define MPIR_POINTERS_H_INCLUDED
+
+#include "mpi.h"
+#include "mpichconf.h"
+#include "mpichconfconst.h"
+#include "mpir_assert.h"
+#include "mpl.h"
+
+
+/* This test is lame.  Should eventually include cookie test
+   and in-range addresses */
+#define MPIR_Valid_ptr_class(kind,ptr,errclass,err) \
+  {if (!(ptr)) { err = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, errclass, \
+                                             "**nullptrtype", "**nullptrtype %s", #kind ); } }
+
+#define MPIR_Info_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Info,ptr,MPI_ERR_INFO,err)
+/* Check not only for a null pointer but for an invalid communicator,
+   such as one that has been freed.  Let's try the ref_count as the test
+   for now */
+/* ticket #1441: check (refcount<=0) to cover the case of 0, an "over-free" of
+ * -1 or similar, and the 0xecec... case when --enable-g=mem is used */
+#define MPIR_Comm_valid_ptr(ptr,err,ignore_rev) {     \
+     MPIR_Valid_ptr_class(Comm,ptr,MPI_ERR_COMM,err); \
+     if ((ptr) && MPIU_Object_get_ref(ptr) <= 0) {    \
+         MPIR_ERR_SET(err,MPI_ERR_COMM,"**comm");     \
+         ptr = 0;                                     \
+     } else if ((ptr) && (ptr)->revoked && !(ignore_rev)) {        \
+         MPIR_ERR_SET(err,MPIX_ERR_REVOKED,"**comm"); \
+     }                                                \
+}
+#define MPIR_Win_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Win,ptr,MPI_ERR_WIN,err)
+#define MPIR_Group_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Group,ptr,MPI_ERR_GROUP,err)
+#define MPIR_Op_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Op,ptr,MPI_ERR_OP,err)
+#define MPIR_Errhandler_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Errhandler,ptr,MPI_ERR_ARG,err)
+#define MPIR_Request_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Request,ptr,MPI_ERR_REQUEST,err)
+#define MPIR_Keyval_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Keyval,ptr,MPI_ERR_KEYVAL,err)
+
+
+/* Assigns (src_) to (dst_), checking that (src_) fits in (dst_) without
+ * truncation.
+ *
+ * When fiddling with this macro, please keep C's overly complicated integer
+ * promotion/truncation/conversion rules in mind.  A discussion of these issues
+ * can be found in Chapter 5 of "Secure Coding in C and C++" by Robert Seacord.
+ */
+#define MPIU_Assign_trunc(dst_,src_,dst_type_)                                         \
+    do {                                                                               \
+        /* will catch some of the cases if the expr_inttype macros aren't available */ \
+        MPIU_Assert((src_) == (dst_type_)(src_));                                      \
+        dst_ = (dst_type_)(src_);                                                      \
+    } while (0)
+
+/*
+ * Ensure an MPI_Aint value fits into a signed int.
+ * Useful for detecting overflow when MPI_Aint is larger than an int.
+ *
+ * \param[in]  aint  Variable of type MPI_Aint
+ */
+#define MPIU_Ensure_Aint_fits_in_int(aint) \
+  MPIU_Assert((aint) == (MPI_Aint)(int)(aint));
+
+/*
+ * Ensure an MPI_Aint value fits into a pointer.
+ * Useful for detecting overflow when MPI_Aint is larger than a pointer.
+ *
+ * \param[in]  aint  Variable of type MPI_Aint
+ */
+#define MPIU_Ensure_Aint_fits_in_pointer(aint) \
+  MPIU_Assert((aint) == (MPI_Aint)(uintptr_t) MPIU_AINT_CAST_TO_VOID_PTR(aint));
+
+#endif /* MPIR_POINTERS_H_INCLUDED */
diff --git a/src/include/mpir_process.h b/src/include/mpir_process.h
new file mode 100644
index 0000000..d70e89b
--- /dev/null
+++ b/src/include/mpir_process.h
@@ -0,0 +1,63 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_PROCESS_H_INCLUDED
+#define MPIR_PROCESS_H_INCLUDED
+
+/* Per process data */
+typedef struct PreDefined_attrs {
+    int appnum;          /* Application number provided by mpiexec (MPI-2) */
+    int host;            /* host */
+    int io;              /* standard io allowed */
+    int lastusedcode;    /* last used error code (MPI-2) */
+    int tag_ub;          /* Maximum message tag */
+    int universe;        /* Universe size from mpiexec (MPI-2) */
+    int wtime_is_global; /* Wtime is global over processes in COMM_WORLD */
+} PreDefined_attrs;
+
+typedef struct MPICH_PerProcess_t {
+    OPA_int_t mpich_state; /* State of MPICH. Use OPA_int_t to make MPI_Initialized() etc.
+                              thread-safe per MPI-3.1.  See MPI-Forum ticket 357 */
+    int               do_error_checks;  /* runtime error check control */
+    struct MPIR_Comm  *comm_world;      /* Easy access to comm_world for
+                                           error handler */
+    struct MPIR_Comm  *comm_self;       /* Easy access to comm_self */
+    struct MPIR_Comm  *comm_parent;     /* Easy access to comm_parent */
+    struct MPIR_Comm  *icomm_world;     /* An internal version of comm_world
+					   that is separate from user's
+					   versions */
+    PreDefined_attrs  attrs;            /* Predefined attribute values */
+    int               tagged_coll_mask; /* Tag space mask for tagged collectives */
+
+    /* The topology routines dimsCreate is independent of any communicator.
+       If this pointer is null, the default routine is used */
+    int (*dimsCreate)( int, int, int *);
+
+    /* Attribute dup functions.  Here for lazy initialization */
+    int (*attr_dup)( int, MPIR_Attribute *, MPIR_Attribute ** );
+    int (*attr_free)( int, MPIR_Attribute ** );
+    /* There is no win_attr_dup function because there can be no MPI_Win_dup
+       function */
+    /* Routine to get the messages corresponding to dynamically created
+       error messages */
+    const char *(*errcode_to_string)( int );
+#ifdef HAVE_CXX_BINDING
+    /* Routines to call C++ functions from the C implementation of the
+       MPI reduction and attribute routines */
+    void (*cxx_call_op_fn)(const void *, void *, int, MPI_Datatype,
+			    MPI_User_function * );
+    /* Error handling functions.  As for the attribute functions,
+       we pass the integer file/comm/win, the address of the error code,
+       and the C function to call (itself a function defined by the
+       C++ interface and exported to C).  The first argument is used
+       to specify the kind (comm,file,win) */
+    void  (*cxx_call_errfn) ( int, int *, int *, void (*)(void) );
+#endif /* HAVE_CXX_BINDING */
+} MPICH_PerProcess_t;
+extern MPICH_PerProcess_t MPIR_Process;
+
+#endif /* MPIR_PROCESS_H_INCLUDED */
diff --git a/src/include/mpir_pt2pt.h b/src/include/mpir_pt2pt.h
new file mode 100644
index 0000000..729bf8b
--- /dev/null
+++ b/src/include/mpir_pt2pt.h
@@ -0,0 +1,20 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_PT2PT_H_INCLUDED
+#define MPIR_PT2PT_H_INCLUDED
+
+int MPIR_Ibsend_impl(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
+                     MPIR_Comm *comm_ptr, MPI_Request *request);
+int MPIR_Test_impl(MPI_Request *request, int *flag, MPI_Status *status);
+int MPIR_Testall_impl(int count, MPI_Request array_of_requests[], int *flag,
+                      MPI_Status array_of_statuses[]);
+int MPIR_Wait_impl(MPI_Request *request, MPI_Status *status);
+int MPIR_Waitall_impl(int count, MPI_Request array_of_requests[],
+                      MPI_Status array_of_statuses[]);
+
+#endif /* MPIR_PT2PT_H_INCLUDED */
diff --git a/src/util/refcount/mpir_refcount.h b/src/include/mpir_refcount.h
similarity index 100%
rename from src/util/refcount/mpir_refcount.h
rename to src/include/mpir_refcount.h
diff --git a/src/util/refcount/mpir_refcount_global.h b/src/include/mpir_refcount_global.h
similarity index 100%
rename from src/util/refcount/mpir_refcount_global.h
rename to src/include/mpir_refcount_global.h
diff --git a/src/util/refcount/mpir_refcount_pobj.h b/src/include/mpir_refcount_pobj.h
similarity index 100%
rename from src/util/refcount/mpir_refcount_pobj.h
rename to src/include/mpir_refcount_pobj.h
diff --git a/src/util/refcount/mpir_refcount_single.h b/src/include/mpir_refcount_single.h
similarity index 100%
rename from src/util/refcount/mpir_refcount_single.h
rename to src/include/mpir_refcount_single.h
diff --git a/src/include/mpir_request.h b/src/include/mpir_request.h
new file mode 100644
index 0000000..eb63831
--- /dev/null
+++ b/src/include/mpir_request.h
@@ -0,0 +1,314 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_REQUEST_H_INCLUDED
+#define MPIR_REQUEST_H_INCLUDED
+
+/* NOTE-R1: MPIR_REQUEST_KIND__MPROBE signifies that this is a request created by
+ * MPI_Mprobe or MPI_Improbe.  Since we use MPI_Request objects as our
+ * MPI_Message objects, we use this separate kind in order to provide stronger
+ * error checking.  Once a message (backed by a request) is promoted to a real
+ * request by calling MPI_Mrecv/MPI_Imrecv, we actually modify the kind to be
+ * MPIR_REQUEST_KIND__RECV in order to keep completion logic as simple as possible. */
+/*E
+  MPIR_Request_kind - Kinds of MPI Requests
+
+  Module:
+  Request-DS
+
+  E*/
+typedef enum MPIR_Request_kind_t {
+    MPIR_REQUEST_KIND__UNDEFINED,
+    MPIR_REQUEST_KIND__SEND,
+    MPIR_REQUEST_KIND__RECV,
+    MPIR_REQUEST_KIND__PREQUEST_SEND,
+    MPIR_REQUEST_KIND__PREQUEST_RECV,
+    MPIR_REQUEST_KIND__GREQUEST,
+    MPIR_REQUEST_KIND__COLL,
+    MPIR_REQUEST_KIND__MPROBE, /* see NOTE-R1 */
+    MPIR_REQUEST_KIND__RMA,
+    MPIR_REQUEST_KIND__LAST
+#ifdef MPID_REQUEST_KIND_DECL
+    , MPID_REQUEST_KIND_DECL
+#endif
+} MPIR_Request_kind_t;
+
+/* This currently defines a single structure type for all requests.
+   Eventually, we may want a union type, as used in MPICH-1 */
+/* Typedefs for Fortran generalized requests */
+typedef void (MPIR_Grequest_f77_cancel_function)(void *, MPI_Fint*, MPI_Fint *);
+typedef void (MPIR_Grequest_f77_free_function)(void *, MPI_Fint *);
+typedef void (MPIR_Grequest_f77_query_function)(void *, MPI_Fint *, MPI_Fint *);
+
+/* vtable-ish structure holding generalized request function pointers and other
+ * state.  Saves ~48 bytes in pt2pt requests on many platforms. */
+struct MPIR_Grequest_fns {
+    MPI_Grequest_cancel_function *cancel_fn;
+    MPI_Grequest_free_function   *free_fn;
+    MPI_Grequest_query_function  *query_fn;
+    MPIX_Grequest_poll_function   *poll_fn;
+    MPIX_Grequest_wait_function   *wait_fn;
+    void             *grequest_extra_state;
+    MPIX_Grequest_class         greq_class;
+    MPIR_Lang_t                  greq_lang;         /* language that defined
+                                                       the generalize req */
+};
+
+typedef struct MPIR_Grequest_class {
+     MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+     MPI_Grequest_query_function *query_fn;
+     MPI_Grequest_free_function *free_fn;
+     MPI_Grequest_cancel_function *cancel_fn;
+     MPIX_Grequest_poll_function *poll_fn;
+     MPIX_Grequest_wait_function *wait_fn;
+     struct MPIR_Grequest_class *next;
+} MPIR_Grequest_class;
+
+#define MPIR_Request_extract_status(request_ptr_, status_)              \
+    {                                                                   \
+        if ((status_) != MPI_STATUS_IGNORE)                             \
+        {                                                               \
+            int error__;                                                \
+                                                                        \
+            /* According to the MPI 1.1 standard page 22 lines 9-12,    \
+             * the MPI_ERROR field may not be modified except by the    \
+             * functions in section 3.7.5 which return                  \
+             * MPI_ERR_IN_STATUSES (MPI_Wait{all,some} and              \
+             * MPI_Test{all,some}). */                                  \
+            error__ = (status_)->MPI_ERROR;                             \
+            *(status_) = (request_ptr_)->status;                        \
+            (status_)->MPI_ERROR = error__;                             \
+        }                                                               \
+    }
+
+#define MPIR_Request_is_complete(req_) (MPIR_cc_is_complete((req_)->cc_ptr))
+
+/*S
+  MPIR_Request - Description of the Request data structure
+
+  Module:
+  Request-DS
+
+  Notes:
+  If it is necessary to remember the MPI datatype, this information is
+  saved within the device-specific fields provided by 'MPID_DEV_REQUEST_DECL'.
+
+  Requests come in many flavors, as stored in the 'kind' field.  It is
+  expected that each kind of request will have its own structure type
+  (e.g., 'MPIR_Request_send_t') that extends the 'MPIR_Request'.
+
+  S*/
+struct MPIR_Request {
+    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+
+    MPIR_Request_kind_t kind;
+
+    /* pointer to the completion counter.  This is necessary for the
+     * case when an operation is described by a list of requests */
+    MPIR_cc_t *cc_ptr;
+    /* the actual completion counter.  Ensure cc and status are in the
+     * same cache line, assuming the cache line size is a multiple of
+     * 32 bytes and 32-bit integers */
+    MPIR_cc_t cc;
+
+    /* A comm is needed to find the proper error handler */
+    MPIR_Comm *comm;
+    /* Status is needed for wait/test/recv */
+    MPI_Status status;
+
+    union {
+        struct {
+            struct MPIR_Grequest_fns *greq_fns;
+        } ureq; /* kind : MPIR_REQUEST_KIND__GREQUEST */
+        struct {
+            MPIR_Errflag_t errflag;
+        } nbc;  /* kind : MPIR_REQUEST_KIND__COLL */
+#if defined HAVE_DEBUGGER_SUPPORT
+        struct {
+            struct MPIR_Sendq *dbg_next;
+        } send; /* kind : MPID_REQUEST_SEND */
+#endif  /* HAVE_DEBUGGER_SUPPORT */
+        struct {
+            /* Persistent requests have their own "real" requests */
+            struct MPIR_Request *real_request;
+        } persist;  /* kind : MPID_PREQUEST_SEND or MPID_PREQUEST_RECV */
+    } u;
+
+    /* Notes about request_completed_cb:
+     *
+     *   1. The callback function is triggered when this requests
+     *      completion count reaches 0.
+     *
+     *   2. The callback function should be nonblocking.
+     *
+     *   3. The callback function should not poke the progress engine,
+     *      or call any function that pokes the progress engine.
+     *
+     *   4. The callback function can complete other requests, thus
+     *      calling those requests' callback functions.  However, the
+     *      recursion depth of request completion function is limited.
+     *      If we ever need deeper recurisve calls, we need to change
+     *      to an iterative design instead of a recursive design for
+     *      request completion.
+     *
+     *   5. In multithreaded programs, since the callback function is
+     *      nonblocking and never calls the progress engine, it would
+     *      never yield the lock to other threads.  So the recursion
+     *      should be multithreading-safe.
+     */
+    int (*request_completed_cb)(struct MPIR_Request *);
+
+    /* Other, device-specific information */
+#ifdef MPID_DEV_REQUEST_DECL
+    MPID_DEV_REQUEST_DECL
+#endif
+} ATTRIBUTE((__aligned__(32)));
+
+#define MPIR_REQUEST_PREALLOC 8
+
+extern MPIU_Object_alloc_t MPIR_Request_mem;
+/* Preallocated request objects */
+extern MPIR_Request MPIR_Request_direct[];
+
+static inline MPIR_Request *MPIR_Request_create(MPIR_Request_kind_t kind)
+{
+    MPIR_Request *req;
+
+    req = MPIU_Handle_obj_alloc(&MPIR_Request_mem);
+    if (req != NULL) {
+	MPL_DBG_MSG_P(MPIR_DBG_REQUEST,VERBOSE,
+                      "allocated request, handle=0x%08x", req->handle);
+#ifdef MPICH_DBG_OUTPUT
+	/*MPIU_Assert(HANDLE_GET_MPI_KIND(req->handle) == MPIR_REQUEST);*/
+	if (HANDLE_GET_MPI_KIND(req->handle) != MPIR_REQUEST)
+	{
+	    int mpi_errno;
+	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
+                                             FCNAME, __LINE__, MPI_ERR_OTHER,
+                                             "**invalid_handle", "**invalid_handle %d", req->handle);
+	    MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
+	}
+#endif
+	/* FIXME: This makes request creation expensive.  We need to
+         * trim this to the basics, with additional setup for
+         * special-purpose requests (think base class and
+         * inheritance).  For example, do we really* want to set the
+         * kind to UNDEFINED? And should the RMA values be set only
+         * for RMA requests? */
+	MPIU_Object_set_ref(req, 1);
+	req->kind = kind;
+        MPIR_cc_set(&req->cc, 1);
+	req->cc_ptr		   = &req->cc;
+
+	req->status.MPI_ERROR	   = MPI_SUCCESS;
+        MPIR_STATUS_SET_CANCEL_BIT(req->status, FALSE);
+
+	req->comm		   = NULL;
+        req->request_completed_cb  = NULL;
+
+        switch(kind) {
+        case MPIR_REQUEST_KIND__COLL:
+            req->u.nbc.errflag = MPIR_ERR_NONE;
+            break;
+        default:
+            break;
+        }
+
+        MPID_Request_init(req);
+    }
+    else
+    {
+	/* FIXME: This fails to fail if debugging is turned off */
+	MPL_DBG_MSG(MPIR_DBG_REQUEST,TYPICAL,"unable to allocate a request");
+    }
+
+    return req;
+}
+
+#define MPIR_Request_add_ref( _req ) \
+    do { MPIU_Object_add_ref( _req ); } while (0)
+
+#define MPIR_Request_release_ref( _req, _inuse ) \
+    do { MPIU_Object_release_ref( _req, _inuse ); } while (0)
+
+static inline void MPIR_Request_free(MPIR_Request *req)
+{
+    int inuse;
+
+    MPIR_Request_release_ref(req, &inuse);
+    if (inuse == 0) {
+        MPL_DBG_MSG_P(MPIR_DBG_REQUEST,VERBOSE,
+                       "freeing request, handle=0x%08x", req->handle);
+
+#ifdef MPICH_DBG_OUTPUT
+        if (HANDLE_GET_MPI_KIND(req->handle) != MPIR_REQUEST)
+        {
+            int mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
+                                                 FCNAME, __LINE__, MPI_ERR_OTHER,
+                                                 "**invalid_handle", "**invalid_handle %d", req->handle);
+            MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
+        }
+
+        if (req->ref_count != 0)
+        {
+            int mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
+                                                 FCNAME, __LINE__, MPI_ERR_OTHER,
+                                                 "**invalid_refcount", "**invalid_refcount %d", req->ref_count);
+            MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
+        }
+#endif
+
+        /* FIXME: We need a better way to handle these so that we do
+         * not always need to initialize these fields and check them
+         * when we destroy a request */
+        /* FIXME: We need a way to call these routines ONLY when the
+         * related ref count has become zero. */
+        if (req->comm != NULL) {
+            MPIR_Comm_release(req->comm);
+        }
+
+        if (req->kind == MPIR_REQUEST_KIND__GREQUEST && req->u.ureq.greq_fns != NULL) {
+            MPL_free(req->u.ureq.greq_fns);
+        }
+
+        MPID_Request_finalize(req);
+
+        MPIU_Handle_obj_free(&MPIR_Request_mem, req);
+    }
+}
+
+int MPIR_Request_complete(MPI_Request *, MPIR_Request *, MPI_Status *, int *);
+int MPIR_Request_get_error(MPIR_Request *);
+int MPIR_Progress_wait_request(MPIR_Request *req);
+
+/* The following routines perform the callouts to the user routines registered
+   as part of a generalized request.  They handle any language binding issues
+   that are necessary. They are used when completing, freeing, cancelling or
+   extracting the status from a generalized request. */
+int MPIR_Grequest_cancel(MPIR_Request * request_ptr, int complete);
+int MPIR_Grequest_query(MPIR_Request * request_ptr);
+int MPIR_Grequest_free(MPIR_Request * request_ptr);
+
+/* this routine was added to support our extension relaxing the progress rules
+ * for generalized requests */
+int MPIR_Grequest_progress_poke(int count, MPIR_Request **request_ptrs,
+		MPI_Status array_of_statuses[] );
+int MPIR_Grequest_waitall(int count, MPIR_Request * const *  request_ptrs);
+
+void MPIR_Grequest_complete_impl(MPIR_Request *request_ptr);
+int MPIR_Grequest_start_impl(MPI_Grequest_query_function *query_fn,
+                             MPI_Grequest_free_function *free_fn,
+                             MPI_Grequest_cancel_function *cancel_fn,
+                             void *extra_state, MPIR_Request **request_ptr);
+int MPIX_Grequest_start_impl(MPI_Grequest_query_function *,
+                             MPI_Grequest_free_function *,
+                             MPI_Grequest_cancel_function *,
+                             MPIX_Grequest_poll_function *,
+                             MPIX_Grequest_wait_function *, void *,
+                             MPIR_Request **);
+
+#endif /* MPIR_REQUEST_H_INCLUDED */
diff --git a/src/include/mpir_status.h b/src/include/mpir_status.h
new file mode 100644
index 0000000..abf4eb3
--- /dev/null
+++ b/src/include/mpir_status.h
@@ -0,0 +1,65 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_STATUS_H_INCLUDED
+#define MPIR_STATUS_H_INCLUDED
+
+/* We use bits from the "count_lo" and "count_hi_and_cancelled" fields
+ * to represent the 'count' and 'cancelled' objects.  The LSB of the
+ * "count_hi_and_cancelled" field represents the 'cancelled' object.
+ * The 'count' object is split between the "count_lo" and
+ * "count_hi_and_cancelled" fields, with the lower order bits going
+ * into the "count_lo" field, and the higher order bits goin into the
+ * "count_hi_and_cancelled" field.  This gives us 2N-1 bits for the
+ * 'count' object, where N is the size of int.  However, the value
+ * returned to the user is bounded by the definition on MPI_Count. */
+/* NOTE: The below code assumes that the count value is never
+ * negative.  For negative values, right-shifting can have weird
+ * implementation specific consequences. */
+#define MPIR_STATUS_SET_COUNT(status_, count_)                          \
+    {                                                                   \
+        (status_).count_lo = ((int) count_);                            \
+        (status_).count_hi_and_cancelled &= 1;                          \
+        (status_).count_hi_and_cancelled |= (int) ((MPIR_Ucount) count_ >> (8 * SIZEOF_INT) << 1); \
+    }
+
+#define MPIR_STATUS_GET_COUNT(status_)                                  \
+    ((MPI_Count) ((((MPIR_Ucount) (((unsigned int) (status_).count_hi_and_cancelled) >> 1)) << (8 * SIZEOF_INT)) + (unsigned int) (status_).count_lo))
+
+#define MPIR_STATUS_SET_CANCEL_BIT(status_, cancelled_)	\
+    {                                                   \
+        (status_).count_hi_and_cancelled &= ~1;         \
+        (status_).count_hi_and_cancelled |= cancelled_; \
+    }
+
+#define MPIR_STATUS_GET_CANCEL_BIT(status_)	((status_).count_hi_and_cancelled & 1)
+
+/* Do not set MPI_ERROR (only set if ERR_IN_STATUS is returned */
+#define MPIR_Status_set_empty(status_)                          \
+    {                                                           \
+        if ((status_) != MPI_STATUS_IGNORE)                     \
+        {                                                       \
+            (status_)->MPI_SOURCE = MPI_ANY_SOURCE;             \
+            (status_)->MPI_TAG = MPI_ANY_TAG;                   \
+            MPIR_STATUS_SET_COUNT(*(status_), 0);               \
+            MPIR_STATUS_SET_CANCEL_BIT(*(status_), FALSE);      \
+        }                                                       \
+    }
+/* See MPI 1.1, section 3.11, Null Processes */
+/* Do not set MPI_ERROR (only set if ERR_IN_STATUS is returned */
+#define MPIR_Status_set_procnull(status_)                       \
+    {                                                           \
+        if ((status_) != MPI_STATUS_IGNORE)                     \
+        {                                                       \
+            (status_)->MPI_SOURCE = MPI_PROC_NULL;              \
+            (status_)->MPI_TAG = MPI_ANY_TAG;                   \
+            MPIR_STATUS_SET_COUNT(*(status_), 0);               \
+            MPIR_STATUS_SET_CANCEL_BIT(*(status_), FALSE);      \
+        }                                                       \
+    }
+
+#endif /* MPIR_STATUS_H_INCLUDED */
diff --git a/src/util/mem/mpiu_strerror.h b/src/include/mpir_strerror.h
similarity index 71%
rename from src/util/mem/mpiu_strerror.h
rename to src/include/mpir_strerror.h
index 700bf30..c69c58b 100644
--- a/src/util/mem/mpiu_strerror.h
+++ b/src/include/mpir_strerror.h
@@ -3,8 +3,8 @@
  *  (C) 2001 by Argonne National Laboratory.
  *      See COPYRIGHT in top-level directory.
  */
-#if !defined(MPIU_STRERROR_H_INCLUDED)
-#define MPIU_STRERROR_H_INCLUDED
+#if !defined(MPIR_STRERROR_H_INCLUDED)
+#define MPIR_STRERROR_H_INCLUDED
 
 /*
  * MPIU_Sterror()
@@ -12,4 +12,4 @@
  * Thread safe implementation of strerror(), whenever possible. */
 const char *MPIU_Strerror(int errnum);
 
-#endif /* !defined(MPIU_STRERROR_H_INCLUDED) */
+#endif /* !defined(MPIR_STRERROR_H_INCLUDED) */
diff --git a/src/include/mpir_tags.h b/src/include/mpir_tags.h
new file mode 100644
index 0000000..7bd0c26
--- /dev/null
+++ b/src/include/mpir_tags.h
@@ -0,0 +1,79 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_TAGS_H_INCLUDED
+#define MPIR_TAGS_H_INCLUDED
+
+/* Tags for point to point operations which implement collective and other
+   internal operations */
+#define MPIR_BARRIER_TAG               1
+#define MPIR_BCAST_TAG                 2
+#define MPIR_GATHER_TAG                3
+#define MPIR_GATHERV_TAG               4
+#define MPIR_SCATTER_TAG               5
+#define MPIR_SCATTERV_TAG              6
+#define MPIR_ALLGATHER_TAG             7
+#define MPIR_ALLGATHERV_TAG            8
+#define MPIR_ALLTOALL_TAG              9
+#define MPIR_ALLTOALLV_TAG            10
+#define MPIR_REDUCE_TAG               11
+#define MPIR_USER_REDUCE_TAG          12
+#define MPIR_USER_REDUCEA_TAG         13
+#define MPIR_ALLREDUCE_TAG            14
+#define MPIR_USER_ALLREDUCE_TAG       15
+#define MPIR_USER_ALLREDUCEA_TAG      16
+#define MPIR_REDUCE_SCATTER_TAG       17
+#define MPIR_USER_REDUCE_SCATTER_TAG  18
+#define MPIR_USER_REDUCE_SCATTERA_TAG 19
+#define MPIR_SCAN_TAG                 20
+#define MPIR_USER_SCAN_TAG            21
+#define MPIR_USER_SCANA_TAG           22
+#define MPIR_LOCALCOPY_TAG            23
+#define MPIR_EXSCAN_TAG               24
+#define MPIR_ALLTOALLW_TAG            25
+#define MPIR_TOPO_A_TAG               26
+#define MPIR_TOPO_B_TAG               27
+#define MPIR_REDUCE_SCATTER_BLOCK_TAG 28
+#define MPIR_SHRINK_TAG               29
+#define MPIR_AGREE_TAG                30
+#define MPIR_FIRST_NBC_TAG            31
+
+/* These macros must be used carefully. These macros will not work with
+ * negative tags. By definition, users are not to use negative tags and the
+ * only negative tag in MPICH is MPI_ANY_TAG which is checked seperately, but
+ * if there is a time where negative tags become more common, this setup won't
+ * work anymore. */
+
+/* This bitmask can be used to manually mask the tag space wherever it might
+ * be necessary to do so (for instance in the receive queue */
+#define MPIR_TAG_ERROR_BIT (1 << 30)
+
+/* This bitmask is used to differentiate between a process failure
+ * (MPIX_ERR_PROC_FAILED) and any other kind of failure (MPI_ERR_OTHER). */
+#define MPIR_TAG_PROC_FAILURE_BIT (1 << 29)
+
+/* This macro checks the value of the error bit in the MPI tag and returns 1
+ * if the tag is set and 0 if it is not. */
+#define MPIR_TAG_CHECK_ERROR_BIT(tag) ((MPIR_TAG_ERROR_BIT & (tag)) == MPIR_TAG_ERROR_BIT ? 1 : 0)
+
+/* This macro checks the value of the process failure bit in the MPI tag and
+ * returns 1 if the tag is set and 0 if it is not. */
+#define MPIR_TAG_CHECK_PROC_FAILURE_BIT(tag) ((MPIR_TAG_PROC_FAILURE_BIT & (tag)) == MPIR_TAG_PROC_FAILURE_BIT ? 1 : 0)
+
+/* This macro sets the value of the error bit in the MPI tag to 1 */
+#define MPIR_TAG_SET_ERROR_BIT(tag) ((tag) |= MPIR_TAG_ERROR_BIT)
+
+/* This macro sets the value of the process failure bit in the MPI tag to 1 */
+#define MPIR_TAG_SET_PROC_FAILURE_BIT(tag) ((tag) |= (MPIR_TAG_ERROR_BIT | MPIR_TAG_PROC_FAILURE_BIT))
+
+/* This macro clears the value of the error bits in the MPI tag */
+#define MPIR_TAG_CLEAR_ERROR_BITS(tag) ((tag) &= ~(MPIR_TAG_ERROR_BIT ^ MPIR_TAG_PROC_FAILURE_BIT))
+
+/* This macro masks the value of the error bits in the MPI tag */
+#define MPIR_TAG_MASK_ERROR_BITS(tag) ((tag) & ~(MPIR_TAG_ERROR_BIT ^ MPIR_TAG_PROC_FAILURE_BIT))
+
+#endif /* MPIR_TAGS_H_INCLUDED */
diff --git a/src/include/mpir_thread.h b/src/include/mpir_thread.h
new file mode 100644
index 0000000..7d17382
--- /dev/null
+++ b/src/include/mpir_thread.h
@@ -0,0 +1,85 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_THREAD_H_INCLUDED
+#define MPIR_THREAD_H_INCLUDED
+
+#include "mpichconfconst.h"
+#include "mpichconf.h"
+
+typedef struct {
+    int thread_provided;        /* Provided level of thread support */
+
+    /* This is a special case for is_thread_main, which must be
+     * implemented even if MPICH itself is single threaded.  */
+#if MPICH_THREAD_LEVEL >= MPI_THREAD_SERIALIZED
+    MPID_Thread_id_t master_thread;     /* Thread that started MPI */
+#endif
+
+#if defined MPICH_IS_THREADED
+    int isThreaded;             /* Set to true if user requested
+                                 * THREAD_MULTIPLE */
+#endif                          /* MPICH_IS_THREADED */
+} MPIR_Thread_info_t;
+extern MPIR_Thread_info_t MPIR_ThreadInfo;
+
+/* ------------------------------------------------------------------------- */
+/* thread-local storage macros */
+/* arbitrary, just needed to avoid cleaning up heap allocated memory at thread
+ * destruction time */
+#define MPIR_STRERROR_BUF_SIZE (1024)
+
+/* This structure contains all thread-local variables and will be zeroed at
+ * allocation time.
+ *
+ * Note that any pointers to dynamically allocated memory stored in this
+ * structure must be externally cleaned up.
+ * */
+typedef struct {
+    int op_errno;               /* For errors in predefined MPI_Ops */
+
+    /* error string storage for MPIU_Strerror */
+    char strerrbuf[MPIR_STRERROR_BUF_SIZE];
+
+#if (MPICH_THREAD_LEVEL == MPI_THREAD_MULTIPLE)
+    int lock_depth;
+#endif
+} MPIR_Per_thread_t;
+
+#if defined(MPICH_IS_THREADED) && defined(MPL_TLS_SPECIFIER)
+extern MPL_TLS_SPECIFIER MPIR_Per_thread_t MPIR_Per_thread;
+#else
+extern MPIR_Per_thread_t MPIR_Per_thread;
+#endif
+
+extern MPID_Thread_tls_t MPIR_Per_thread_key;
+
+#if defined(MPICH_IS_THREADED)
+#define MPIR_THREAD_CHECK_BEGIN if (MPIR_ThreadInfo.isThreaded) {
+#define MPIR_THREAD_CHECK_END   }
+#else
+#define MPIR_THREAD_CHECK_BEGIN
+#define MPIR_THREAD_CHECK_END
+#endif /* MPICH_IS_THREADED */
+
+#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_GLOBAL || \
+    MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT
+extern MPID_Thread_mutex_t MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX;
+#endif
+
+#if MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY_PER_OBJECT
+extern MPID_Thread_mutex_t MPIR_THREAD_POBJ_HANDLE_MUTEX;
+extern MPID_Thread_mutex_t MPIR_THREAD_POBJ_MSGQ_MUTEX;
+extern MPID_Thread_mutex_t MPIR_THREAD_POBJ_COMPLETION_MUTEX;
+extern MPID_Thread_mutex_t MPIR_THREAD_POBJ_CTX_MUTEX;
+extern MPID_Thread_mutex_t MPIR_THREAD_POBJ_PMI_MUTEX;
+
+#define MPIR_THREAD_POBJ_COMM_MUTEX(_comm_ptr) _comm_ptr->mutex
+#define MPIR_THREAD_POBJ_WIN_MUTEX(_win_ptr)   _win_ptr->mutex
+#endif
+
+#endif /* MPIR_THREAD_H_INCLUDED */
diff --git a/src/include/mpir_topo.h b/src/include/mpir_topo.h
new file mode 100644
index 0000000..824e085
--- /dev/null
+++ b/src/include/mpir_topo.h
@@ -0,0 +1,112 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_TOPO_H_INCLUDED
+#define MPIR_TOPO_H_INCLUDED
+
+/*
+ * The following struture allows the device detailed control over the
+ * functions that are used to implement the topology routines.  If either
+ * the pointer to this structure is null or any individual entry is null,
+ * the default function is used (this follows exactly the same rules as the
+ * collective operations, provided in the MPIR_Collops structure).
+ */
+
+typedef struct MPIR_TopoOps {
+    int (*cartCreate)( const MPIR_Comm *, int, const int[], const int [],
+		       int, MPI_Comm * );
+    int (*cartMap)   ( const MPIR_Comm *, int, const int[], const int [],
+		       int * );
+    int (*graphCreate)( const MPIR_Comm *, int, const int[], const int [],
+			int, MPI_Comm * );
+    int (*graphMap)   ( const MPIR_Comm *, int, const int[], const int[],
+			int * );
+} MPIR_TopoOps;
+
+
+typedef struct MPIR_Graph_topology {
+  int nnodes;
+  int nedges;
+  int *index;
+  int *edges;
+} MPIR_Graph_topology;
+
+typedef struct MPIR_Cart_topology {
+  int nnodes;     /* Product of dims[*], gives the size of the topology */
+  int ndims;
+  int *dims;
+  int *periodic;
+  int *position;
+} MPIR_Cart_topology;
+
+typedef struct MPIR_Dist_graph_topology {
+    int indegree;
+    int *in;
+    int *in_weights;
+    int outdegree;
+    int *out;
+    int *out_weights;
+    int is_weighted;
+} MPIR_Dist_graph_topology;
+
+struct MPIR_Topology {
+  MPIR_Topo_type kind;
+  union topo { 
+    MPIR_Graph_topology graph;
+    MPIR_Cart_topology  cart;
+    MPIR_Dist_graph_topology dist_graph;
+  } topo;
+};
+
+int MPIR_Dims_create( int, int, int * );
+
+MPIR_Topology *MPIR_Topology_get( MPIR_Comm * );
+int MPIR_Topology_put( MPIR_Comm *, MPIR_Topology * );
+
+/* Returns the canonicalized count of neighbors for the given topology as though
+ * MPI_Dist_graph_neighbors_count were called with a distributed graph topology,
+ * even if the given topology is actually Cartesian or Graph.  Useful for
+ * implementing neighborhood collective operations. */
+int MPIR_Topo_canon_nhb_count(MPIR_Comm *comm_ptr, int *indegree, int *outdegree, int *weighted);
+
+/* Returns the canonicalized list of neighbors for a given topology, separated
+ * into inbound and outbound edges.  Equivalent to MPI_Dist_graph_neighbors but
+ * works for any topology type by canonicalizing according to the rules in
+ * Section 7.6 of the MPI-3.0 standard. */
+int MPIR_Topo_canon_nhb(MPIR_Comm *comm_ptr,
+                        int indegree, int sources[], int inweights[],
+                        int outdegree, int dests[], int outweights[]);
+
+#define MAX_CART_DIM 16
+
+/* topology impl functions */
+int MPIR_Cart_create( MPIR_Comm *, int, const int [],
+		      const int [], int, MPI_Comm * );
+int MPIR_Cart_map( const MPIR_Comm *, int, const int[],  const int[], int* );
+int MPIR_Cart_shift_impl(MPIR_Comm *comm_ptr, int direction, int displ, int *source, int *dest);
+
+void MPIR_Cart_rank_impl(struct MPIR_Topology *cart_ptr, const int *coords, int *rank);
+int MPIR_Cart_create_impl(MPIR_Comm *comm_ptr, int ndims, const int dims[],
+                          const int periods[], int reorder, MPI_Comm *comm_cart);
+int MPIR_Cart_map_impl(const MPIR_Comm *comm_ptr, int ndims, const int dims[],
+                       const int periodic[], int *newrank);
+
+int MPIR_Graph_create( MPIR_Comm *, int,
+		       const int[], const int[], int,
+		       MPI_Comm *);
+int MPIR_Graph_map( const MPIR_Comm *, int, const int[], const int[], int* );
+int MPIR_Graph_neighbors_count_impl(MPIR_Comm *comm_ptr, int rank, int *nneighbors);
+int MPIR_Graph_neighbors_impl(MPIR_Comm *comm_ptr, int rank, int maxneighbors, int *neighbors);
+int MPIR_Graph_map_impl(const MPIR_Comm *comm_ptr, int nnodes,
+                        const int indx[], const int edges[], int *newrank);
+
+int MPIR_Dist_graph_neighbors_count_impl(MPIR_Comm *comm_ptr, int *indegree, int *outdegree, int *weighted);
+int MPIR_Dist_graph_neighbors_impl(MPIR_Comm *comm_ptr,
+                                   int maxindegree, int sources[], int sourceweights[],
+                                   int maxoutdegree, int destinations[], int destweights[]);
+
+#endif /* MPIR_TOPO_H_INCLUDED */
diff --git a/src/include/mpir_type_defs.h b/src/include/mpir_type_defs.h
index e34b29e..83b9cf7 100644
--- a/src/include/mpir_type_defs.h
+++ b/src/include/mpir_type_defs.h
@@ -9,13 +9,75 @@
 
 #include "mpichconf.h"
 
-/* Define a typedef for the errflag value used by many internal functions.
- * If an error needs to be returned, these values can be used to signal such.
- * More details can be found further down in the code with the bitmasking logic */
-typedef enum {
-    MPIR_ERR_NONE = MPI_SUCCESS,
-    MPIR_ERR_PROC_FAILED = MPIX_ERR_PROC_FAILED,
-    MPIR_ERR_OTHER = MPI_ERR_OTHER
-} MPIR_Errflag_t;
+/* Basic typedefs */
+#ifdef HAVE_SYS_BITYPES_H
+#include <sys/bitypes.h>
+#endif
+
+/* inttypes.h is supposed to include stdint.h but this is here as
+   belt-and-suspenders for platforms that aren't fully compliant */
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+
+/* stdint.h gives us fixed-width C99 types like int16_t, among others */
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+
+/* stdbool.h gives us the C boolean type */
+#ifdef HAVE_STDBOOL_H
+#include <stdbool.h>
+#endif
+
+/* complex.h gives us the C complex type */
+#ifdef HAVE_COMPLEX_H
+#include <complex.h>
+#endif
+
+#ifdef HAVE_WINDOWS_H
+#include <winsock2.h>
+#include <windows.h>
+#else
+#ifndef BOOL
+#define BOOL int
+#endif
+#endif
+
+#include "mpl.h"
+
+typedef MPIU_SIZE_T MPIU_Size_t;
+
+/* Use the MPIU_PtrToXXX macros to convert pointers to and from integer types */
+
+/* The Microsoft compiler will not allow casting of different sized types
+ * without
+ * printing a compiler warning.  Using these macros allows compiler specific
+ * type casting and avoids the warning output.  These macros should only be used
+ * in code that can handle loss of bits.
+ */
+
+/* PtrToAint converts a pointer to an MPI_Aint type, truncating bits if necessary */
+#ifdef HAVE_PTRTOAINT
+#define MPIU_PtrToAint(a) ((MPI_Aint)(INT_PTR) (a))
+#else
+/* An MPI_Aint may be *larger* than a pointer.  By using 2 casts, we can
+   keep some compilers from complaining about converting a pointer to an
+   integer of a different size */
+#define MPIU_PtrToAint(a) ((MPI_Aint)(uintptr_t)(a))
+#endif
+
+/* AintToPtr converts an MPI_Aint to a pointer type, extending bits if necessary */
+#ifdef HAVE_AINTTOPTR
+#define MPIU_AintToPtr(a) ((VOID *)(INT_PTR)((MPI_Aint)a))
+#else
+#define MPIU_AintToPtr(a) (void*)(a)
+#endif
+
+/* Adding the 32-bit compute/64-bit I/O related type-casts in here as
+ * they are not a part of the MPI standard yet. */
+#define MPIU_AINT_CAST_TO_VOID_PTR (void *)(intptr_t)
+#define MPIU_VOID_PTR_CAST_TO_MPI_AINT (MPI_Aint)(uintptr_t)
+#define MPIU_PTR_DISP_CAST_TO_MPI_AINT (MPI_Aint)(intptr_t)
 
 #endif /* !defined(MPIR_TYPE_DEFS_H_INCLUDED) */
diff --git a/src/include/mpiu_utarray.h b/src/include/mpir_utarray.h
similarity index 99%
rename from src/include/mpiu_utarray.h
rename to src/include/mpir_utarray.h
index ed1d33f..49bd303 100644
--- a/src/include/mpiu_utarray.h
+++ b/src/include/mpir_utarray.h
@@ -33,8 +33,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 /* a dynamic array implementation using macros 
  * see http://uthash.sourceforge.net/utarray
  */
-#ifndef MPIU_UTARRAY_H
-#define MPIU_UTARRAY_H
+#ifndef MPIR_UTARRAY_H_INCLUDED
+#define MPIR_UTARRAY_H_INCLUDED
 
 #define UTARRAY_VERSION 1.9.4
 
@@ -255,4 +255,4 @@ static const UT_icd ut_ptr_icd _UNUSED_ = {sizeof(void*),NULL,NULL,NULL};
 #define ut_ptr_array(a) ((void**)(a)->d)
 
 
-#endif /* MPIU_UTARRAY_H */
+#endif /* MPIR_UTARRAY_H_INCLUDED */
diff --git a/src/include/mpiu_uthash.h b/src/include/mpir_uthash.h
similarity index 99%
rename from src/include/mpiu_uthash.h
rename to src/include/mpir_uthash.h
index b70e51d..8efbff9 100644
--- a/src/include/mpiu_uthash.h
+++ b/src/include/mpir_uthash.h
@@ -1,6 +1,6 @@
 /* MPICH-local modifications:
  *
- * 1) Rename header from "uthash.h" to "mpiu_uthash.h" to avoid accidentally
+ * 1) Rename header from "uthash.h" to "mpir_uthash.h" to avoid accidentally
  * pulling in a system-installed version of the header.
  *
  * 2) Use MPL_malloc/MPL_free instead of malloc/free
@@ -32,8 +32,8 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifndef UTHASH_H
-#define UTHASH_H
+#ifndef MPIR_UTHASH_H_INCLUDED
+#define MPIR_UTHASH_H_INCLUDED
 
 #include <string.h>   /* memcmp,strlen */
 #include <stddef.h>   /* ptrdiff_t */
@@ -948,4 +948,4 @@ typedef struct UT_hash_handle {
    unsigned hashv;                   /* result of hash-fcn(key)        */
 } UT_hash_handle;
 
-#endif /* UTHASH_H */
+#endif /* MPIR_UTHASH_H_INCLUDED */
diff --git a/src/include/mpir_win.h b/src/include/mpir_win.h
new file mode 100644
index 0000000..12d3233
--- /dev/null
+++ b/src/include/mpir_win.h
@@ -0,0 +1,93 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ */
+
+#ifndef MPIR_WIN_H_INCLUDED
+#define MPIR_WIN_H_INCLUDED
+
+/*S
+  MPIR_Win - Description of the Window Object data structure.
+
+  Module:
+  Win-DS
+
+  Notes:
+  The following 3 keyvals are defined for attributes on all MPI
+  Window objects\:
+.vb
+ MPI_WIN_SIZE
+ MPI_WIN_BASE
+ MPI_WIN_DISP_UNIT
+.ve
+  These correspond to the values in 'length', 'start_address', and
+  'disp_unit'.
+
+  The communicator in the window is the same communicator that the user
+  provided to 'MPI_Win_create' (not a dup).  However, each intracommunicator
+  has a special context id that may be used if MPI communication is used
+  by the implementation to implement the RMA operations.
+
+  There is no separate window group; the group of the communicator should be
+  used.
+
+  Question:
+  Should a 'MPID_Win' be defined after 'MPID_Segment' in case the device
+  wants to
+  store a queue of pending put/get operations, described with 'MPID_Segment'
+  (or 'MPIR_Request')s?
+
+  S*/
+struct MPIR_Win {
+    MPIU_OBJECT_HEADER; /* adds handle and ref_count fields */
+    MPID_Thread_mutex_t mutex;
+    MPIR_Errhandler *errhandler;  /* Pointer to the error handler structure */
+    void *base;
+    MPI_Aint    size;
+    int          disp_unit;      /* Displacement unit of *local* window */
+    MPIR_Attribute *attributes;
+    MPIR_Comm *comm_ptr;         /* Pointer to comm of window (dup) */
+#ifdef USE_THREADED_WINDOW_CODE
+    /* These were causing compilation errors.  We need to figure out how to
+       integrate threads into MPICH before including these fields. */
+    /* FIXME: The test here should be within a test for threaded support */
+#ifdef HAVE_PTHREAD_H
+    pthread_t wait_thread_id; /* id of thread handling MPI_Win_wait */
+    pthread_t passive_target_thread_id; /* thread for passive target RMA */
+#elif defined(HAVE_WINTHREADS)
+    HANDLE wait_thread_id;
+    HANDLE passive_target_thread_id;
+#endif
+#endif
+    /* These are COPIES of the values so that addresses to them
+       can be returned as attributes.  They are initialized by the
+       MPI_Win_get_attr function.
+
+       These values are constant for the lifetime of the window, so
+       this is thread-safe.
+     */
+    int  copyDispUnit;
+    MPI_Aint copySize;
+
+    char          name[MPI_MAX_OBJECT_NAME];
+
+    MPIR_Win_flavor_t create_flavor;
+    MPIR_Win_model_t  model;
+    MPIR_Win_flavor_t copyCreateFlavor;
+    MPIR_Win_model_t  copyModel;
+
+  /* Other, device-specific information */
+#ifdef MPID_DEV_WIN_DECL
+    MPID_DEV_WIN_DECL
+#endif
+};
+extern MPIU_Object_alloc_t MPIR_Win_mem;
+/* Preallocated win objects */
+extern MPIR_Win MPIR_Win_direct[];
+
+int MPIR_Type_is_rma_atomic(MPI_Datatype type);
+int MPIR_Compare_equal(const void *a, const void *b, MPI_Datatype type);
+
+#endif /* MPIR_WIN_H_INCLUDED */
diff --git a/src/include/mpit.h b/src/include/mpit.h
index 8f6ee20..51bc5d2 100644
--- a/src/include/mpit.h
+++ b/src/include/mpit.h
@@ -427,4 +427,22 @@ static inline cvar_table_entry_t * LOOKUP_CVAR_BY_NAME(const char* cvar_name)
             addr_, count_, verb_, bind_, flags_, get_value_, get_count_, cat_, desc_) \
     PVAR_GATED_ACTION(MODULE, MPIR_T_PVAR_LOWWATERMARK_REGISTER_DYNAMIC_impl(dtype_, name_, \
             addr_, count_, verb_, bind_, flags_, get_value_, get_count_, cat_, desc_))
-#endif
+
+int MPIR_T_cvar_handle_alloc_impl(int cvar_index, void *obj_handle, MPI_T_cvar_handle *handle, int *count);
+int MPIR_T_cvar_read_impl(MPI_T_cvar_handle handle, void *buf);
+int MPIR_T_cvar_write_impl(MPI_T_cvar_handle handle, const void *buf);
+int MPIR_T_pvar_session_create_impl(MPI_T_pvar_session *session);
+int MPIR_T_pvar_session_free_impl(MPI_T_pvar_session *session);
+int MPIR_T_pvar_handle_alloc_impl(MPI_T_pvar_session session, int pvar_index, void *obj_handle, MPI_T_pvar_handle *handle, int *count);
+int MPIR_T_pvar_handle_free_impl(MPI_T_pvar_session session, MPI_T_pvar_handle *handle);
+int MPIR_T_pvar_start_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle);
+int MPIR_T_pvar_stop_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle);
+int MPIR_T_pvar_read_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle, void *buf);
+int MPIR_T_pvar_write_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle, const void *buf);
+int MPIR_T_pvar_reset_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle);
+int MPIR_T_pvar_readreset_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle, void *buf);
+int MPIR_T_category_get_cvars_impl(int cat_index, int len, int indices[]);
+int MPIR_T_category_get_pvars_impl(int cat_index, int len, int indices[]);
+int MPIR_T_category_get_categories_impl(int cat_index, int len, int indices[]);
+
+#endif  /* MPIT_H_INCLUDED */
diff --git a/src/include/mpitimerimpl.h b/src/include/mpitimerimpl.h
deleted file mode 100644
index 9113ce6..0000000
--- a/src/include/mpitimerimpl.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- *
- *  (C) 2001 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef MPITIMERIMPL_H_INCLUDED
-#define MPITIMERIMPL_H_INCLUDED
-
-/* Possible values for timing */
-#define MPID_TIMING_KIND_NONE 0
-#define MPID_TIMING_KIND_TIME 1
-#define MPID_TIMING_KIND_LOG 2
-#define MPID_TIMING_KIND_LOG_DETAILED 3
-#define MPID_TIMING_KIND_ALL 4
-#define MPID_TIMING_KIND_RUNTIME 5
-
-/* Routine tracing (see --enable-timing for control of this) */
-#if defined(HAVE_TIMING) && (HAVE_TIMING == MPID_TIMING_KIND_LOG || \
-    HAVE_TIMING == MPID_TIMING_KIND_LOG_DETAILED || \
-    HAVE_TIMING == MPID_TIMING_KIND_ALL || \
-    HAVE_TIMING == MPID_TIMING_KIND_RUNTIME)
-
-/* This include file contains the static state definitions */
-#include "mpiallstates.h"
-
-/* Possible values for USE_LOGGING */
-#define MPID_LOGGING_NONE 0
-#define MPID_LOGGING_RLOG 1
-#define MPID_LOGGING_EXTERNAL 4
-
-/* Include the macros specific to the selected logging library */
-#if (USE_LOGGING == MPID_LOGGING_RLOG)
-#include "rlog_macros.h"
-#elif (USE_LOGGING == MPID_LOGGING_EXTERNAL)
-#include "mpilogging.h"
-#else
-#error You must select a logging library if timing is enabled
-#endif
-
-/* MPI layer definitions */
-#define MPID_MPI_STATE_DECL(a)                MPIDU_STATE_DECL(a)
-#define MPID_MPI_INIT_STATE_DECL(a)           MPIDU_INIT_STATE_DECL(a)
-#define MPID_MPI_FINALIZE_STATE_DECL(a)       MPIDU_FINALIZE_STATE_DECL(a)
-
-#define MPID_MPI_FUNC_ENTER(a)                MPIDU_FUNC_ENTER(a)
-#define MPID_MPI_FUNC_EXIT(a)                 MPIDU_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER(a)          MPIDU_PT2PT_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT(a)           MPIDU_PT2PT_FUNC_EXIT(a)
-#define MPID_MPI_COLL_FUNC_ENTER(a)           MPIDU_COLL_FUNC_ENTER(a)
-#define MPID_MPI_COLL_FUNC_EXIT(a)            MPIDU_COLL_FUNC_EXIT(a)
-#define MPID_MPI_RMA_FUNC_ENTER(a)            MPIDU_RMA_FUNC_ENTER(a)
-#define MPID_MPI_RMA_FUNC_EXIT(a)             MPIDU_RMA_FUNC_EXIT(a)
-#define MPID_MPI_INIT_FUNC_ENTER(a)           MPIDU_INIT_FUNC_ENTER(a)
-#define MPID_MPI_INIT_FUNC_EXIT(a)            MPIDU_INIT_FUNC_EXIT(a)
-#define MPID_MPI_FINALIZE_FUNC_ENTER(a)       MPIDU_FINALIZE_FUNC_ENTER(a)
-#define MPID_MPI_FINALIZE_FUNC_EXIT(a)        MPIDU_FINALIZE_FUNC_EXIT(a)
-
-#define MPID_MPI_PT2PT_FUNC_ENTER_FRONT(a)    MPIDU_PT2PT_FUNC_ENTER_FRONT(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_FRONT(a)     MPIDU_PT2PT_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_BACK(a)     MPIDU_PT2PT_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BACK(a)      MPIDU_PT2PT_FUNC_EXIT_BACK(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BOTH(a)      MPIDU_PT2PT_FUNC_EXIT_BOTH(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_BOTH(a)     MPIDU_PT2PT_FUNC_ENTER_BOTH(a)
-
-#if defined(HAVE_TIMING) && (HAVE_TIMING == MPID_TIMING_KIND_LOG_DETAILED || HAVE_TIMING == MPID_TIMING_KIND_ALL)
-
-/* device layer definitions */
-#define MPIDI_STATE_DECL(a)                MPIDU_STATE_DECL(a)
-#define MPIDI_FUNC_ENTER(a)                MPIDU_FUNC_ENTER(a)
-#define MPIDI_FUNC_EXIT(a)                 MPIDU_FUNC_EXIT(a)
-#define MPIDI_PT2PT_FUNC_ENTER(a)          MPIDU_PT2PT_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_EXIT(a)           MPIDU_PT2PT_FUNC_EXIT(a)
-#define MPIDI_RMA_FUNC_ENTER(a)            MPIDU_RMA_FUNC_ENTER(a)
-#define MPIDI_RMA_FUNC_EXIT(a)             MPIDU_RMA_FUNC_EXIT(a)
-
-#else
-
-#define MPIDI_STATE_DECL(a)
-#define MPIDI_FUNC_ENTER(a)
-#define MPIDI_FUNC_EXIT(a)
-#define MPIDI_PT2PT_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_EXIT(a)
-#define MPIDI_RMA_FUNC_ENTER(a)
-#define MPIDI_RMA_FUNC_EXIT(a)
-
-#endif /* (HAVE_TIMING == MPID_TIMING_KIND_LOG_DETAILED || HAVE_TIMING == MPID_TIMING_KIND_ALL) */
-
-/* prototype the initialization/finalization functions */
-int MPIU_Timer_init(int rank, int size);
-int MPIU_Timer_finalize(void);
-int MPIR_Describe_timer_states(void);
-
-/* The original statistics macros (see the design documentation) 
-   have been superceeded by the MPIR_T_PVAR_* macros (see mpit.h) */
-
-#else /* HAVE_TIMING and doing logging */
-
-/* evaporate all the timing macros if timing is not selected */
-#define MPIU_Timer_init(rank, size)
-#define MPIU_Timer_finalize()
-/* MPI layer */
-#define MPID_MPI_STATE_DECL(a)
-#define MPID_MPI_INIT_STATE_DECL(a)
-#define MPID_MPI_FINALIZE_STATE_DECL(a)
-#define MPID_MPI_FUNC_EXIT(a)
-#define MPID_MPI_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_FRONT(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_FRONT(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_BACK(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_BOTH(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BACK(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BOTH(a)
-#define MPID_MPI_COLL_FUNC_ENTER(a)
-#define MPID_MPI_COLL_FUNC_EXIT(a)
-#define MPID_MPI_RMA_FUNC_ENTER(a)
-#define MPID_MPI_RMA_FUNC_EXIT(a)
-#define MPID_MPI_INIT_FUNC_ENTER(a)
-#define MPID_MPI_INIT_FUNC_EXIT(a)
-#define MPID_MPI_FINALIZE_FUNC_ENTER(a)
-#define MPID_MPI_FINALIZE_FUNC_EXIT(a)
-/* device layer */
-#define MPIDI_STATE_DECL(a)
-#define MPIDI_FUNC_ENTER(a)
-#define MPIDI_FUNC_EXIT(a)
-#define MPIDI_PT2PT_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_EXIT(a)
-#define MPIDI_RMA_FUNC_ENTER(a)
-#define MPIDI_RMA_FUNC_EXIT(a)
-
-#endif /* HAVE_TIMING */
-
-#endif
diff --git a/src/include/mpitimpl.h b/src/include/mpitimpl.h
index 2182751..6789fe0 100644
--- a/src/include/mpitimpl.h
+++ b/src/include/mpitimpl.h
@@ -11,9 +11,13 @@
 #define MPITIMPL_H_INCLUDED
 
 #include "mpi.h"
-#include "mpiutil.h"
-#include "mpiu_utarray.h"
-#include "mpiu_uthash.h"
+#include "mpir_strerror.h"
+#include "mpir_type_defs.h"
+#include "mpir_assert.h"
+#include "mpir_pointers.h"
+#include "mpir_utarray.h"
+#include "mpir_uthash.h"
+#include "mpir_objects.h"
 
 #ifdef HAVE_ERROR_CHECKING
 typedef enum {
diff --git a/src/include/mpiutil.h b/src/include/mpiutil.h
deleted file mode 100644
index 0609ff7..0000000
--- a/src/include/mpiutil.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- *  (C) 2001 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-#if !defined(MPIUTIL_H_INCLUDED)
-#define MPIUTIL_H_INCLUDED
-
-#include "mpiu_strerror.h"
-#include "mpiu_type_defs.h"
-#include "mpiassert.h"
-#include "mpiu_pointer.h"
-
-#endif /* !defined(MPIUTIL_H_INCLUDED) */
diff --git a/src/mpi/coll/opband.c b/src/mpi/coll/opband.c
index 5c27768..04c0ad6 100644
--- a/src/mpi/coll/opband.c
+++ b/src/mpi/coll/opband.c
@@ -6,7 +6,7 @@
  */
 
 #include "mpiimpl.h"
-#include "oputil.h"
+#include "mpir_op_util.h"
 
 /*
  * In MPI-2.1, this operation is valid only for C integer, Fortran integer,
diff --git a/src/mpi/coll/opbor.c b/src/mpi/coll/opbor.c
index ef41f85..5a2fe5c 100644
--- a/src/mpi/coll/opbor.c
+++ b/src/mpi/coll/opbor.c
@@ -6,7 +6,7 @@
  */
 
 #include "mpiimpl.h"
-#include "oputil.h"
+#include "mpir_op_util.h"
 
 /*
  * In MPI-2.1, this operation is valid only for C integer, Fortran integer,
diff --git a/src/mpi/coll/opbxor.c b/src/mpi/coll/opbxor.c
index 0be0be7..42cf011 100644
--- a/src/mpi/coll/opbxor.c
+++ b/src/mpi/coll/opbxor.c
@@ -6,7 +6,7 @@
  */
 
 #include "mpiimpl.h"
-#include "oputil.h"
+#include "mpir_op_util.h"
 
 /*
  * In MPI-2.1, this operation is valid only for C integer, Fortran integer,
diff --git a/src/mpi/coll/opland.c b/src/mpi/coll/opland.c
index 45b21af..a6bf56f 100644
--- a/src/mpi/coll/opland.c
+++ b/src/mpi/coll/opland.c
@@ -6,9 +6,9 @@
  */
 
 #include "mpiimpl.h"
-#include "oputil.h"
+#include "mpir_op_util.h"
 #ifdef HAVE_FORTRAN_BINDING
-#include "mpi_fortlogical.h"
+#include "mpir_fortlogical.h"
 #endif
 
 /*
diff --git a/src/mpi/coll/oplor.c b/src/mpi/coll/oplor.c
index 0a268ef..d71316f 100644
--- a/src/mpi/coll/oplor.c
+++ b/src/mpi/coll/oplor.c
@@ -6,9 +6,9 @@
  */
 
 #include "mpiimpl.h"
-#include "oputil.h"
+#include "mpir_op_util.h"
 #ifdef HAVE_FORTRAN_BINDING
-#include "mpi_fortlogical.h"
+#include "mpir_fortlogical.h"
 #endif
 
 /*
diff --git a/src/mpi/coll/oplxor.c b/src/mpi/coll/oplxor.c
index 3ea3fbd..e66c22c 100644
--- a/src/mpi/coll/oplxor.c
+++ b/src/mpi/coll/oplxor.c
@@ -6,9 +6,9 @@
  */
 
 #include "mpiimpl.h"
-#include "oputil.h"
+#include "mpir_op_util.h"
 #ifdef HAVE_FORTRAN_BINDING
-#include "mpi_fortlogical.h"
+#include "mpir_fortlogical.h"
 #endif
 
 /*
diff --git a/src/mpi/coll/opmax.c b/src/mpi/coll/opmax.c
index bb4fa15..93fad00 100644
--- a/src/mpi/coll/opmax.c
+++ b/src/mpi/coll/opmax.c
@@ -6,7 +6,7 @@
  */
 
 #include "mpiimpl.h"
-#include "oputil.h"
+#include "mpir_op_util.h"
 
 /*
  * In MPI-2.1, this operation is valid only for C integer, Fortran integer,
diff --git a/src/mpi/coll/opmin.c b/src/mpi/coll/opmin.c
index 75cbf3f..a79b5f5 100644
--- a/src/mpi/coll/opmin.c
+++ b/src/mpi/coll/opmin.c
@@ -6,7 +6,7 @@
  */
 
 #include "mpiimpl.h"
-#include "oputil.h"
+#include "mpir_op_util.h"
 
 /*
  * In MPI-2.1, this operation is valid only for C integer, Fortran integer,
diff --git a/src/mpi/coll/opprod.c b/src/mpi/coll/opprod.c
index bf9bdfc..34382ad 100644
--- a/src/mpi/coll/opprod.c
+++ b/src/mpi/coll/opprod.c
@@ -6,7 +6,7 @@
  */
 
 #include "mpiimpl.h"
-#include "oputil.h"
+#include "mpir_op_util.h"
 
 /*
  * In MPI-2.1, this operation is valid only for C integer, Fortran integer,
diff --git a/src/mpi/coll/opsum.c b/src/mpi/coll/opsum.c
index 28e4c3b..f7e4eee 100644
--- a/src/mpi/coll/opsum.c
+++ b/src/mpi/coll/opsum.c
@@ -6,7 +6,7 @@
  */
 
 #include "mpiimpl.h"
-#include "oputil.h"
+#include "mpir_op_util.h"
 
 /*
  * In MPI-2.1, this operation is valid only for C integer, Fortran integer,
diff --git a/src/mpi/comm/comm_get_info.c b/src/mpi/comm/comm_get_info.c
index aa0d017..d4bf586 100644
--- a/src/mpi/comm/comm_get_info.c
+++ b/src/mpi/comm/comm_get_info.c
@@ -6,7 +6,7 @@
  */
 
 #include "mpiimpl.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Comm_get_info */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/comm/comm_set_info.c b/src/mpi/comm/comm_set_info.c
index 0d0250b..493577c 100644
--- a/src/mpi/comm/comm_set_info.c
+++ b/src/mpi/comm/comm_set_info.c
@@ -7,7 +7,7 @@
 
 #include "mpiimpl.h"
 #include "mpl_utlist.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Comm_set_info */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/comm/commutil.c b/src/mpi/comm/commutil.c
index ce6b714..ce93308 100644
--- a/src/mpi/comm/commutil.c
+++ b/src/mpi/comm/commutil.c
@@ -6,10 +6,10 @@
 
 #include "mpiimpl.h"
 #include "mpicomm.h"
-#include "mpiinfo.h"    /* MPIU_Info_free */
+#include "mpir_info.h"    /* MPIU_Info_free */
 
 #include "mpl_utlist.h"
-#include "mpiu_uthash.h"
+#include "mpir_uthash.h"
 
 /* This is the utility file for comm that contains the basic comm items
    and storage management */
diff --git a/src/mpi/comm/contextid.c b/src/mpi/comm/contextid.c
index 611be2a..096e79f 100644
--- a/src/mpi/comm/contextid.c
+++ b/src/mpi/comm/contextid.c
@@ -6,10 +6,10 @@
 
 #include "mpiimpl.h"
 #include "mpicomm.h"
-#include "mpiinfo.h"    /* MPIU_Info_free */
+#include "mpir_info.h"    /* MPIU_Info_free */
 
 #include "mpl_utlist.h"
-#include "mpiu_uthash.h"
+#include "mpir_uthash.h"
 
 /*
 === BEGIN_MPI_T_CVAR_INFO_BLOCK ===
diff --git a/src/mpi/errhan/errcodes.h b/src/mpi/errhan/errcodes.h
index cad75f8..cbbc5d0 100644
--- a/src/mpi/errhan/errcodes.h
+++ b/src/mpi/errhan/errcodes.h
@@ -43,7 +43,7 @@ int MPIR_Err_add_code( int );
    and the value of the attribute MPI_LASTUSEDCODE will fail).
  */
 
-/* the error class bits are defined in mpierror.h, are 0x0000007f */
+/* the error class bits are defined in mpir_err.h, are 0x0000007f */
 #define ERROR_CLASS_MASK          MPIR_ERR_CLASS_MASK  
 #define ERROR_CLASS_SIZE          MPIR_ERR_CLASS_SIZE
 #define ERROR_DYN_MASK            0x40000000
diff --git a/src/mpi/errhan/errutil.c b/src/mpi/errhan/errutil.c
index 5c41389..582bbac 100644
--- a/src/mpi/errhan/errutil.c
+++ b/src/mpi/errhan/errutil.c
@@ -11,7 +11,7 @@
    MPIR_Err_create_code */
 #include <stdarg.h>
 /* Define USE_ERR_CODE_VALIST to get the prototype for the valist version
-   of MPIR_Err_create_code in mpierror.h (without this definition,
+   of MPIR_Err_create_code in mpir_err.h (without this definition,
    the prototype is not included.  The "valist" version of the function
    is used in only a few places, here and potentially in ROMIO) */
 #define USE_ERR_CODE_VALIST
diff --git a/src/mpi/errhan/file_call_errhandler.c b/src/mpi/errhan/file_call_errhandler.c
index d0473b4..48aa6cd 100644
--- a/src/mpi/errhan/file_call_errhandler.c
+++ b/src/mpi/errhan/file_call_errhandler.c
@@ -5,10 +5,7 @@
  */
 
 #include "mpiimpl.h"
-
-/* mpiext.h contains the prototypes for functions to interface MPICH
-   and ROMIO */
-#include "mpiext.h"
+#include "mpir_ext.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_File_call_errhandler */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/errhan/file_get_errhandler.c b/src/mpi/errhan/file_get_errhandler.c
index a69f7ec..7112917 100644
--- a/src/mpi/errhan/file_get_errhandler.c
+++ b/src/mpi/errhan/file_get_errhandler.c
@@ -6,10 +6,7 @@
  */
 
 #include "mpiimpl.h"
-
-/* mpiext.h contains the prototypes for functions to interface MPICH
-   and ROMIO */
-#include "mpiext.h"
+#include "mpir_ext.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_File_get_errhandler */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/errhan/file_set_errhandler.c b/src/mpi/errhan/file_set_errhandler.c
index 203c7c7..06c0cf1 100644
--- a/src/mpi/errhan/file_set_errhandler.c
+++ b/src/mpi/errhan/file_set_errhandler.c
@@ -6,10 +6,7 @@
  */
 
 #include "mpiimpl.h"
-
-/* mpiext.h contains the prototypes for functions to interface MPICH
-   and ROMIO */
-#include "mpiext.h"
+#include "mpir_ext.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_File_set_errhandler */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/info/info_create.c b/src/mpi/info/info_create.c
index a36da96..ca437c0 100644
--- a/src/mpi/info/info_create.c
+++ b/src/mpi/info/info_create.c
@@ -5,7 +5,7 @@
  */
 
 #include "mpiimpl.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Info_create */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/info/info_delete.c b/src/mpi/info/info_delete.c
index 623a1f8..7645dd1 100644
--- a/src/mpi/info/info_delete.c
+++ b/src/mpi/info/info_delete.c
@@ -5,7 +5,7 @@
  */
 
 #include "mpiimpl.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Info_delete */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/info/info_dup.c b/src/mpi/info/info_dup.c
index 3cd4b54..6a1ced6 100644
--- a/src/mpi/info/info_dup.c
+++ b/src/mpi/info/info_dup.c
@@ -5,7 +5,7 @@
  */
 
 #include "mpiimpl.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 
 #include <string.h>
 
diff --git a/src/mpi/info/info_free.c b/src/mpi/info/info_free.c
index b4e60b4..ca8191a 100644
--- a/src/mpi/info/info_free.c
+++ b/src/mpi/info/info_free.c
@@ -5,7 +5,7 @@
  */
 
 #include "mpiimpl.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Info_free */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/info/info_get.c b/src/mpi/info/info_get.c
index ccc88f6..f6bcf95 100644
--- a/src/mpi/info/info_get.c
+++ b/src/mpi/info/info_get.c
@@ -5,7 +5,7 @@
  */
 
 #include "mpiimpl.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Info_get */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/info/info_getn.c b/src/mpi/info/info_getn.c
index 80093b7..82152b2 100644
--- a/src/mpi/info/info_getn.c
+++ b/src/mpi/info/info_getn.c
@@ -5,7 +5,7 @@
  */
 
 #include "mpiimpl.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Info_get_nkeys */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/info/info_getnth.c b/src/mpi/info/info_getnth.c
index c5dd4f4..81d2d58 100644
--- a/src/mpi/info/info_getnth.c
+++ b/src/mpi/info/info_getnth.c
@@ -5,7 +5,7 @@
  */
 
 #include "mpiimpl.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Info_get_nthkey */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/info/info_getvallen.c b/src/mpi/info/info_getvallen.c
index ca15eb5..4051e28 100644
--- a/src/mpi/info/info_getvallen.c
+++ b/src/mpi/info/info_getvallen.c
@@ -5,7 +5,7 @@
  */
 
 #include "mpiimpl.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Info_get_valuelen */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/info/info_set.c b/src/mpi/info/info_set.c
index ce55262..4701ebf 100644
--- a/src/mpi/info/info_set.c
+++ b/src/mpi/info/info_set.c
@@ -5,7 +5,7 @@
  */
 
 #include "mpiimpl.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 
 #include <string.h>
 
diff --git a/src/mpi/info/infoutil.c b/src/mpi/info/infoutil.c
index af76a9b..261f911 100644
--- a/src/mpi/info/infoutil.c
+++ b/src/mpi/info/infoutil.c
@@ -5,7 +5,7 @@
  */
 
 #include "mpiimpl.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 /*#include <stdio.h>*/
 
 /* This is the utility file for info that contains the basic info items
diff --git a/src/mpi/init/initthread.c b/src/mpi/init/initthread.c
index 922576e..40f8080 100644
--- a/src/mpi/init/initthread.c
+++ b/src/mpi/init/initthread.c
@@ -14,7 +14,7 @@
  */
 
 #include "mpiimpl.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 #include "datatype.h"
 #include "mpi_init.h"
 #ifdef HAVE_CRTDBG_H
diff --git a/src/mpi/pt2pt/bsendutil.c b/src/mpi/pt2pt/bsendutil.c
index a02d0f5..04ab5a2 100644
--- a/src/mpi/pt2pt/bsendutil.c
+++ b/src/mpi/pt2pt/bsendutil.c
@@ -5,7 +5,7 @@
  */
 
 #include "mpiimpl.h"
-#include "mpibsend.h"
+#include "mpir_bsend.h"
 #include "bsendutil.h"
 
 /*
diff --git a/src/mpi/pt2pt/bsendutil.h b/src/mpi/pt2pt/bsendutil.h
index 065b34e..50fff28 100644
--- a/src/mpi/pt2pt/bsendutil.h
+++ b/src/mpi/pt2pt/bsendutil.h
@@ -5,7 +5,7 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-#include "mpibsend.h"
+#include "mpir_bsend.h"
 
 /* Function Prototypes for the bsend utility functions */
 int MPIR_Bsend_attach( void *, int );
diff --git a/src/mpi/rma/rmatypeutil.c b/src/mpi/rma/rmatypeutil.c
index 1bd81ea..cab4a3f 100644
--- a/src/mpi/rma/rmatypeutil.c
+++ b/src/mpi/rma/rmatypeutil.c
@@ -5,13 +5,13 @@
  */
 
 /* This file contains functions that support the RMA code but use some "private"
- * headers from the oputil.h in the "coll" directory.  The alternative is to put
+ * headers from the mpir_op_util.h in the "coll" directory.  The alternative is to put
  * this file in src/mpi/rma instead and add -I${top_srcdir}/src/mpi/coll to the
- * AM_CPPFLAGS.  That option is less preferable because the usage of "oputil.h"
+ * AM_CPPFLAGS.  That option is less preferable because the usage of "mpir_op_util.h"
  * can bleed out of this directory and it clutters the CPPFLAGS further. */
 
 #include "mpiimpl.h"
-#include "oputil.h"
+#include "mpir_op_util.h"
 
 /* Returns true iff the given type is valid for use in MPI-3 RMA atomics, such
  * as MPI_Compare_and_swap or MPI_Fetch_and_op.  Does NOT return MPICH error
diff --git a/src/mpi/romio/adio/include/adioi.h b/src/mpi/romio/adio/include/adioi.h
index 9edffb5..42b82ff 100644
--- a/src/mpi/romio/adio/include/adioi.h
+++ b/src/mpi/romio/adio/include/adioi.h
@@ -987,9 +987,9 @@ int  ADIOI_MPE_iwrite_b;
    (no loss of (meaningful) high order bytes in 8 byte MPI_Aint 
       to (possible) 4 byte ptr cast)                              */
 /* Should work even on 64bit or old 32bit configs                 */
-  /* Use MPIU_Ensure_Aint_fits_in_pointer from mpiutil.h and 
+  /* Use MPIU_Ensure_Aint_fits_in_pointer and
          MPIU_AINT_CAST_TO_VOID_PTR from configure (mpi.h) */
-  #include "glue_romio.h"
+  #include "mpir_ext.h"
 
   #define ADIOI_AINT_CAST_TO_VOID_PTR (void*)(intptr_t)
   /* The next two casts are only used when you don't want sign extension
diff --git a/src/mpi/romio/mpi-io/glue/mpich/mpio_err.c b/src/mpi/romio/mpi-io/glue/mpich/mpio_err.c
index dbb02a8..83046bf 100644
--- a/src/mpi/romio/mpi-io/glue/mpich/mpio_err.c
+++ b/src/mpi/romio/mpi-io/glue/mpich/mpio_err.c
@@ -9,25 +9,7 @@
 
 #include "mpioimpl.h"
 #include "adio_extern.h"
-
-/* MPICH error handling implementation */
-/* FIXME: These external prototypes should be included from 
-   mpich/src/include/mpiext.h */
-int MPIR_Err_create_code_valist(int, int, const char [], int, int, 
-				const char [], const char [], va_list );
-int MPIR_Err_is_fatal(int);
-
-void MPIR_Get_file_error_routine( MPI_Errhandler, 
-				  void (**)(MPI_File *, int *, ...), 
-				  int * );
-int MPIR_File_call_cxx_errhandler( MPI_File *, int *, 
-				   void (*)(MPI_File *, int *, ... ) );
-
-typedef int (* MPIR_Err_get_class_string_func_t)(int error, char *str, int length);
-void MPIR_Err_get_string( int, char *, int, MPIR_Err_get_class_string_func_t );
-
-struct MPIR_Comm;
-int MPID_Abort(struct MPIR_Comm *comm, int mpi_errno, int exit_code, const char *error_msg);
+#include "mpir_ext.h"
 
 int MPIO_Err_create_code(int lastcode, int fatal, const char fcname[],
 			 int line, int error_class, const char generic_msg[],
diff --git a/src/mpi/romio/mpi-io/mpioimpl.h b/src/mpi/romio/mpi-io/mpioimpl.h
index 8f636d3..6fcc83f 100644
--- a/src/mpi/romio/mpi-io/mpioimpl.h
+++ b/src/mpi/romio/mpi-io/mpioimpl.h
@@ -16,7 +16,7 @@
 #include "mpio.h"
 
 #ifdef ROMIO_INSIDE_MPICH
-#include "glue_romio.h"
+#include "mpir_ext.h"
 
 #define ROMIO_THREAD_CS_ENTER() MPIR_Ext_cs_enter()
 #define ROMIO_THREAD_CS_EXIT() MPIR_Ext_cs_exit()
diff --git a/src/mpi/topo/Makefile.mk b/src/mpi/topo/Makefile.mk
index 2400d9d..54caa33 100644
--- a/src/mpi/topo/Makefile.mk
+++ b/src/mpi/topo/Makefile.mk
@@ -38,5 +38,3 @@ mpi_sources +=                          \
 
 mpi_core_sources +=       \
     src/mpi/topo/topoutil.c
-
-noinst_HEADERS += src/mpi/topo/topo.h
diff --git a/src/mpi/topo/cart_coords.c b/src/mpi/topo/cart_coords.c
index 844161a..d54747c 100644
--- a/src/mpi/topo/cart_coords.c
+++ b/src/mpi/topo/cart_coords.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Cart_coords */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/cart_create.c b/src/mpi/topo/cart_create.c
index 1f0c130..99f39a0 100644
--- a/src/mpi/topo/cart_create.c
+++ b/src/mpi/topo/cart_create.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Cart_create */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/cart_get.c b/src/mpi/topo/cart_get.c
index 6e7dabf..f1ec93b 100644
--- a/src/mpi/topo/cart_get.c
+++ b/src/mpi/topo/cart_get.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Cart_get */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/cart_map.c b/src/mpi/topo/cart_map.c
index 457afa6..4ef7039 100644
--- a/src/mpi/topo/cart_map.c
+++ b/src/mpi/topo/cart_map.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Cart_map */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/cart_rank.c b/src/mpi/topo/cart_rank.c
index 14cd85e..5942542 100644
--- a/src/mpi/topo/cart_rank.c
+++ b/src/mpi/topo/cart_rank.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Cart_rank */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/cart_shift.c b/src/mpi/topo/cart_shift.c
index 396da61..0c4985d 100644
--- a/src/mpi/topo/cart_shift.c
+++ b/src/mpi/topo/cart_shift.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Cart_shift */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/cart_sub.c b/src/mpi/topo/cart_sub.c
index 5002239..014f20b 100644
--- a/src/mpi/topo/cart_sub.c
+++ b/src/mpi/topo/cart_sub.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Cart_sub */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/cartdim_get.c b/src/mpi/topo/cartdim_get.c
index 51b437f..a6b7a9e 100644
--- a/src/mpi/topo/cartdim_get.c
+++ b/src/mpi/topo/cartdim_get.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Cartdim_get */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/dims_create.c b/src/mpi/topo/dims_create.c
index 72d34c2..f3320c2 100644
--- a/src/mpi/topo/dims_create.c
+++ b/src/mpi/topo/dims_create.c
@@ -5,7 +5,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Dims_create */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/dist_gr_create.c b/src/mpi/topo/dist_gr_create.c
index b2f3b30..02032ca 100644
--- a/src/mpi/topo/dist_gr_create.c
+++ b/src/mpi/topo/dist_gr_create.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Dist_graph_create */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/dist_gr_create_adj.c b/src/mpi/topo/dist_gr_create_adj.c
index d3e12a9..454e6a8 100644
--- a/src/mpi/topo/dist_gr_create_adj.c
+++ b/src/mpi/topo/dist_gr_create_adj.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Dist_graph_create_adjacent */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/dist_gr_neighb.c b/src/mpi/topo/dist_gr_neighb.c
index cf1a88a..dd7a128 100644
--- a/src/mpi/topo/dist_gr_neighb.c
+++ b/src/mpi/topo/dist_gr_neighb.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Dist_graph_neighbors */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/dist_gr_neighb_count.c b/src/mpi/topo/dist_gr_neighb_count.c
index 26daeca..37ad547 100644
--- a/src/mpi/topo/dist_gr_neighb_count.c
+++ b/src/mpi/topo/dist_gr_neighb_count.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Dist_graph_neighbors_count */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/graph_get.c b/src/mpi/topo/graph_get.c
index 8d028be..ac34259 100644
--- a/src/mpi/topo/graph_get.c
+++ b/src/mpi/topo/graph_get.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Graph_get */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/graph_map.c b/src/mpi/topo/graph_map.c
index 89410e6..e6e41a0 100644
--- a/src/mpi/topo/graph_map.c
+++ b/src/mpi/topo/graph_map.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Graph_map */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/graph_nbr.c b/src/mpi/topo/graph_nbr.c
index 46e17fd..86b87ef 100644
--- a/src/mpi/topo/graph_nbr.c
+++ b/src/mpi/topo/graph_nbr.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Graph_neighbors */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/graphcreate.c b/src/mpi/topo/graphcreate.c
index 5d86fd0..4a11a89 100644
--- a/src/mpi/topo/graphcreate.c
+++ b/src/mpi/topo/graphcreate.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Graph_create */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/graphdimsget.c b/src/mpi/topo/graphdimsget.c
index db02477..d78d49e 100644
--- a/src/mpi/topo/graphdimsget.c
+++ b/src/mpi/topo/graphdimsget.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Graphdims_get */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/graphnbrcnt.c b/src/mpi/topo/graphnbrcnt.c
index a195fe3..003556a 100644
--- a/src/mpi/topo/graphnbrcnt.c
+++ b/src/mpi/topo/graphnbrcnt.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Graph_neighbors_count */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/inhb_allgather.c b/src/mpi/topo/inhb_allgather.c
index c14e4eb..f6ae155 100644
--- a/src/mpi/topo/inhb_allgather.c
+++ b/src/mpi/topo/inhb_allgather.c
@@ -5,7 +5,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Ineighbor_allgather */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/inhb_allgatherv.c b/src/mpi/topo/inhb_allgatherv.c
index 7429d7f..13e5964 100644
--- a/src/mpi/topo/inhb_allgatherv.c
+++ b/src/mpi/topo/inhb_allgatherv.c
@@ -5,7 +5,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Ineighbor_allgatherv */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/inhb_alltoall.c b/src/mpi/topo/inhb_alltoall.c
index 275b237..fe7d71b 100644
--- a/src/mpi/topo/inhb_alltoall.c
+++ b/src/mpi/topo/inhb_alltoall.c
@@ -5,7 +5,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Ineighbor_alltoall */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/inhb_alltoallv.c b/src/mpi/topo/inhb_alltoallv.c
index 5880a6d..505f2ab 100644
--- a/src/mpi/topo/inhb_alltoallv.c
+++ b/src/mpi/topo/inhb_alltoallv.c
@@ -5,7 +5,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Ineighbor_alltoallv */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/inhb_alltoallw.c b/src/mpi/topo/inhb_alltoallw.c
index d2d9f1f..5692b19 100644
--- a/src/mpi/topo/inhb_alltoallw.c
+++ b/src/mpi/topo/inhb_alltoallw.c
@@ -5,7 +5,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Ineighbor_alltoallw */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/topo.h b/src/mpi/topo/topo.h
deleted file mode 100644
index d2d851d..0000000
--- a/src/mpi/topo/topo.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- *
- *  (C) 2001 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-typedef struct MPIR_Graph_topology {
-  int nnodes;
-  int nedges;
-  int *index;
-  int *edges;
-} MPIR_Graph_topology;
-
-typedef struct MPIR_Cart_topology {
-  int nnodes;     /* Product of dims[*], gives the size of the topology */
-  int ndims;
-  int *dims;
-  int *periodic;
-  int *position;
-} MPIR_Cart_topology;
-
-typedef struct MPIR_Dist_graph_topology {
-    int indegree;
-    int *in;
-    int *in_weights;
-    int outdegree;
-    int *out;
-    int *out_weights;
-    int is_weighted;
-} MPIR_Dist_graph_topology;
-
-typedef struct MPIR_Topology { 
-  MPIR_Topo_type kind;
-  union topo { 
-    MPIR_Graph_topology graph;
-    MPIR_Cart_topology  cart;
-    MPIR_Dist_graph_topology dist_graph;
-  } topo;
-} MPIR_Topology;
-
-MPIR_Topology *MPIR_Topology_get( MPIR_Comm * );
-int MPIR_Topology_put( MPIR_Comm *, MPIR_Topology * );
-int MPIR_Cart_create( MPIR_Comm *, int, const int [],
-		      const int [], int, MPI_Comm * );
-int MPIR_Graph_create( MPIR_Comm *, int,
-		       const int[], const int[], int, 
-		       MPI_Comm *);
-int MPIR_Dims_create( int, int, int * );
-int MPIR_Graph_map( const MPIR_Comm *, int, const int[], const int[], int* );
-int MPIR_Cart_map( const MPIR_Comm *, int, const int[],  const int[], int* );
-
-/* Returns the canonicalized count of neighbors for the given topology as though
- * MPI_Dist_graph_neighbors_count were called with a distributed graph topology,
- * even if the given topology is actually Cartesian or Graph.  Useful for
- * implementing neighborhood collective operations. */
-int MPIR_Topo_canon_nhb_count(MPIR_Comm *comm_ptr, int *indegree, int *outdegree, int *weighted);
-
-/* Returns the canonicalized list of neighbors for a given topology, separated
- * into inbound and outbound edges.  Equivalent to MPI_Dist_graph_neighbors but
- * works for any topology type by canonicalizing according to the rules in
- * Section 7.6 of the MPI-3.0 standard. */
-int MPIR_Topo_canon_nhb(MPIR_Comm *comm_ptr,
-                        int indegree, int sources[], int inweights[],
-                        int outdegree, int dests[], int outweights[]);
-
-#define MAX_CART_DIM 16
diff --git a/src/mpi/topo/topo_test.c b/src/mpi/topo/topo_test.c
index 83e78c5..e36fbf2 100644
--- a/src/mpi/topo/topo_test.c
+++ b/src/mpi/topo/topo_test.c
@@ -6,7 +6,6 @@
  */
 
 #include "mpiimpl.h"
-#include "topo.h"
 
 /* -- Begin Profiling Symbol Block for routine MPI_Topo_test */
 #if defined(HAVE_PRAGMA_WEAK)
diff --git a/src/mpi/topo/topoutil.c b/src/mpi/topo/topoutil.c
index a94be22..5b1225b 100644
--- a/src/mpi/topo/topoutil.c
+++ b/src/mpi/topo/topoutil.c
@@ -5,7 +5,6 @@
  *      See COPYRIGHT in top-level directory.
  */
 #include "mpiimpl.h"
-#include "topo.h"
 
 static int unweighted_dummy = 0x46618;
 static int weights_empty_dummy = 0x022284;
diff --git a/src/mpid/ch3/channels/nemesis/include/mpid_nem_defs.h b/src/mpid/ch3/channels/nemesis/include/mpid_nem_defs.h
index 3a352b7..81d3d67 100644
--- a/src/mpid/ch3/channels/nemesis/include/mpid_nem_defs.h
+++ b/src/mpid/ch3/channels/nemesis/include/mpid_nem_defs.h
@@ -9,7 +9,7 @@
 
 #include "mpid_nem_datatypes.h"
 #include "mpi.h"
-#include "mpiutil.h"
+#include "mpiimpl.h"
 #include "mpiu_os_wrappers_pre.h"
 
 #define MPID_NEM_MAX_FNAME_LEN 256
diff --git a/src/mpid/ch3/channels/nemesis/include/mpid_nem_impl.h b/src/mpid/ch3/channels/nemesis/include/mpid_nem_impl.h
index 12601ab..addac24 100644
--- a/src/mpid/ch3/channels/nemesis/include/mpid_nem_impl.h
+++ b/src/mpid/ch3/channels/nemesis/include/mpid_nem_impl.h
@@ -9,7 +9,7 @@
 
 #include "my_papi_defs.h"
 #include "mpidi_ch3_impl.h"
-#include "mpimem.h"
+#include "mpir_mem.h"
 #include "mpid_nem_net_module_defs.h"
 #include "mpid_nem_atomics.h"
 #include "mpid_nem_defs.h"
diff --git a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_vc.c b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_vc.c
index 09db8e7..0c68da7 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_vc.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_vc.c
@@ -194,7 +194,7 @@ static int MPID_nem_llc_vc_prnt(MPIDI_VC_t * vc)
     int mpi_errno = MPI_SUCCESS;
 
     /* MPIU_OBJECT_HEADER; */
-    /* src/include/mpihandlemem.h */
+    /* src/include/mpir_objects.h */
     /* int handle; */
     /* MPIU_Handle_ref_count ref_count; */
     /* MPIDI_VC_State_t state; */
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
index 9934d76..959503b 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
@@ -11,7 +11,7 @@
 #define OFI_IMPL_H
 
 #include "mpid_nem_impl.h"
-#include "mpihandlemem.h"
+#include "mpir_objects.h"
 #include "pmi.h"
 #include <rdma/fabric.h>
 #include <rdma/fi_errno.h>
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c b/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
index 60cea66..2f378db 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
@@ -6,7 +6,7 @@
 
 #include "mpid_nem_impl.h"
 #include "mpidimpl.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 #include "mpidrma.h"
 
 /* FIXME: get this from OS */
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c b/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c
index c1897a9..f0ce8d2 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c
@@ -7,7 +7,7 @@
 #include "mpid_nem_impl.h"
 #undef utarray_oom
 #define utarray_oom() do { goto fn_oom; } while (0)
-#include "mpiu_utarray.h"
+#include "mpir_utarray.h"
 
 #define NULL_CONTEXT_ID -1
 
diff --git a/src/mpid/ch3/include/mpid_thread.h b/src/mpid/ch3/include/mpid_thread.h
index 7c55275..c3f369d 100644
--- a/src/mpid/ch3/include/mpid_thread.h
+++ b/src/mpid/ch3/include/mpid_thread.h
@@ -8,7 +8,6 @@
 #define MPID_THREAD_H_INCLUDED
 
 #include "mpidu_thread_fallback.h"
-#include "mpiutil.h"
 
 /* We simply use the fallback timer functionality and do not define
  * our own */
diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index abe83f2..68cb155 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -7,7 +7,7 @@
 #ifndef HAVE_MPIDPKT_H
 #define HAVE_MPIDPKT_H
 
-#include "oputil.h"
+#include "mpir_op_util.h"
 
 #ifdef HAVE_STDINT_H
 #include <stdint.h>
diff --git a/src/mpid/ch3/include/mpidpost.h b/src/mpid/ch3/include/mpidpost.h
index cc3ecbe..be5edb4 100644
--- a/src/mpid/ch3/include/mpidpost.h
+++ b/src/mpid/ch3/include/mpidpost.h
@@ -4,8 +4,8 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-#if !defined(MPICH_MPIDPOST_H_INCLUDED)
-#define MPICH_MPIDPOST_H_INCLUDED
+#if !defined(MPIDPOST_H_INCLUDED)
+#define MPIDPOST_H_INCLUDED
 
 #include "mpid_coll.h"
 
@@ -194,4 +194,4 @@ int MPID_PG_ForwardPGInfo( MPIR_Comm *peer_ptr, MPIR_Comm *comm_ptr,
 int MPIDI_CH3I_Comm_create_hook(struct MPIR_Comm *);
 int MPIDI_CH3I_Comm_destroy_hook(struct MPIR_Comm *);
 
-#endif /* !defined(MPICH_MPIDPOST_H_INCLUDED) */
+#endif /* !defined(MPIDPOST_H_INCLUDED) */
diff --git a/src/mpid/ch3/include/mpidpre.h b/src/mpid/ch3/include/mpidpre.h
index 284f80e..9d87e5d 100644
--- a/src/mpid/ch3/include/mpidpre.h
+++ b/src/mpid/ch3/include/mpidpre.h
@@ -7,8 +7,8 @@
 /* FIXME: This header should contain only the definitions exported to the
    mpiimpl.h level */
 
-#if !defined(MPICH_MPIDPRE_H_INCLUDED)
-#define MPICH_MPIDPRE_H_INCLUDED
+#if !defined(MPIDPRE_H_INCLUDED)
+#define MPIDPRE_H_INCLUDED
 
 /* Tell the compiler that we're going to declare struct MPIR_Request later */
 struct MPIR_Request;
@@ -507,4 +507,206 @@ typedef struct {
 /* Tell initthread to prepare a private comm_world */
 #define MPID_NEEDS_ICOMM_WORLD
 
-#endif /* !defined(MPICH_MPIDPRE_H_INCLUDED) */
+int MPID_Init( int *argc_p, char ***argv_p, int requested,
+	       int *provided, int *has_args, int *has_env );
+
+int MPID_InitCompleted( void );
+
+int MPID_Finalize(void);
+int MPID_Abort( MPIR_Comm *comm, int mpi_errno, int exit_code, const char *error_msg );
+
+int MPID_Open_port(MPIR_Info *, char *);
+int MPID_Close_port(const char *);
+
+int MPID_Comm_accept(const char *, MPIR_Info *, int, MPIR_Comm *, MPIR_Comm **);
+
+int MPID_Comm_connect(const char *, MPIR_Info *, int, MPIR_Comm *, MPIR_Comm **);
+
+int MPID_Comm_disconnect(MPIR_Comm *);
+
+int MPID_Comm_spawn_multiple(int, char *[], char **[], const int [], MPIR_Info* [],
+                             int, MPIR_Comm *, MPIR_Comm **, int []);
+
+int MPID_Comm_failure_ack(MPIR_Comm *comm);
+
+int MPID_Comm_failure_get_acked(MPIR_Comm *comm, MPIR_Group **failed_group_ptr);
+
+int MPID_Comm_get_all_failed_procs(MPIR_Comm *comm_ptr, MPIR_Group **failed_group, int tag);
+
+int MPID_Comm_revoke(MPIR_Comm *comm, int is_remote);
+
+int MPID_Send( const void *buf, MPI_Aint count, MPI_Datatype datatype,
+	       int dest, int tag, MPIR_Comm *comm, int context_offset,
+	       MPIR_Request **request );
+
+int MPID_Rsend( const void *buf, int count, MPI_Datatype datatype,
+		int dest, int tag, MPIR_Comm *comm, int context_offset,
+		MPIR_Request **request );
+
+int MPID_Ssend( const void *buf, MPI_Aint count, MPI_Datatype datatype,
+		int dest, int tag, MPIR_Comm *comm, int context_offset,
+		MPIR_Request **request );
+
+int MPID_tBsend( const void *buf, int count, MPI_Datatype datatype,
+		 int dest, int tag, MPIR_Comm *comm, int context_offset );
+
+int MPID_Isend( const void *buf, MPI_Aint count, MPI_Datatype datatype,
+		int dest, int tag, MPIR_Comm *comm, int context_offset,
+		MPIR_Request **request );
+
+int MPID_Irsend( const void *buf, int count, MPI_Datatype datatype,
+		 int dest, int tag, MPIR_Comm *comm, int context_offset,
+		 MPIR_Request **request );
+
+int MPID_Issend( const void *buf, int count, MPI_Datatype datatype,
+		 int dest, int tag, MPIR_Comm *comm, int context_offset,
+		 MPIR_Request **request );
+
+int MPID_Recv( void *buf, MPI_Aint count, MPI_Datatype datatype,
+	       int source, int tag, MPIR_Comm *comm, int context_offset,
+	       MPI_Status *status, MPIR_Request **request );
+
+int MPID_Irecv( void *buf, MPI_Aint count, MPI_Datatype datatype,
+		int source, int tag, MPIR_Comm *comm, int context_offset,
+		MPIR_Request **request );
+
+int MPID_Send_init( const void *buf, int count, MPI_Datatype datatype,
+		    int dest, int tag, MPIR_Comm *comm, int context_offset,
+		    MPIR_Request **request );
+
+int MPID_Bsend_init(const void *, int, MPI_Datatype, int, int, MPIR_Comm *,
+		   int, MPIR_Request **);
+int MPID_Rsend_init( const void *buf, int count, MPI_Datatype datatype,
+		     int dest, int tag, MPIR_Comm *comm, int context_offset,
+		     MPIR_Request **request );
+int MPID_Ssend_init( const void *buf, int count, MPI_Datatype datatype,
+		     int dest, int tag, MPIR_Comm *comm, int context_offset,
+		     MPIR_Request **request );
+
+int MPID_Recv_init( void *buf, int count, MPI_Datatype datatype,
+		    int source, int tag, MPIR_Comm *comm, int context_offset,
+		    MPIR_Request **request );
+
+int MPID_Startall(int count, MPIR_Request *requests[]);
+
+int MPID_Probe(int, int, MPIR_Comm *, int, MPI_Status *);
+int MPID_Iprobe(int, int, MPIR_Comm *, int, int *, MPI_Status *);
+
+int MPID_Mprobe(int source, int tag, MPIR_Comm *comm, int context_offset,
+                MPIR_Request **message, MPI_Status *status);
+
+int MPID_Improbe(int source, int tag, MPIR_Comm *comm, int context_offset,
+                 int *flag, MPIR_Request **message, MPI_Status *status);
+
+int MPID_Imrecv(void *buf, int count, MPI_Datatype datatype,
+                MPIR_Request *message, MPIR_Request **rreqp);
+
+int MPID_Mrecv(void *buf, int count, MPI_Datatype datatype,
+               MPIR_Request *message, MPI_Status *status);
+
+int MPID_Cancel_send(MPIR_Request *);
+int MPID_Cancel_recv(MPIR_Request *);
+
+int MPID_Comm_AS_enabled(MPIR_Comm *);
+
+int MPID_Request_is_anysource(MPIR_Request *);
+
+MPI_Aint MPID_Aint_add(MPI_Aint base, MPI_Aint disp);
+
+MPI_Aint MPID_Aint_diff(MPI_Aint addr1, MPI_Aint addr2);
+
+int MPID_Win_create(void *, MPI_Aint, int, MPIR_Info *, MPIR_Comm *,
+                    MPIR_Win **);
+int MPID_Win_free(MPIR_Win **);
+
+int MPID_Put(const void *, int, MPI_Datatype, int, MPI_Aint, int,
+             MPI_Datatype, MPIR_Win *);
+int MPID_Get(void *, int, MPI_Datatype, int, MPI_Aint, int,
+             MPI_Datatype, MPIR_Win *);
+int MPID_Accumulate(const void *, int, MPI_Datatype, int, MPI_Aint, int,
+                    MPI_Datatype, MPI_Op, MPIR_Win *);
+
+int MPID_Win_fence(int, MPIR_Win *);
+int MPID_Win_post(MPIR_Group *group_ptr, int assert, MPIR_Win *win_ptr);
+int MPID_Win_start(MPIR_Group *group_ptr, int assert, MPIR_Win *win_ptr);
+int MPID_Win_test(MPIR_Win *win_ptr, int *flag);
+int MPID_Win_wait(MPIR_Win *win_ptr);
+int MPID_Win_complete(MPIR_Win *win_ptr);
+
+int MPID_Win_lock(int lock_type, int dest, int assert, MPIR_Win *win_ptr);
+int MPID_Win_unlock(int dest, MPIR_Win *win_ptr);
+
+int MPID_Win_allocate(MPI_Aint size, int disp_unit, MPIR_Info *info,
+                      MPIR_Comm *comm, void *baseptr, MPIR_Win **win);
+int MPID_Win_allocate_shared(MPI_Aint size, int disp_unit, MPIR_Info *info_ptr, MPIR_Comm *comm_ptr,
+                             void *base_ptr, MPIR_Win **win_ptr);
+int MPID_Win_shared_query(MPIR_Win *win, int rank, MPI_Aint *size, int *disp_unit,
+                          void *baseptr);
+int MPID_Win_create_dynamic(MPIR_Info *info, MPIR_Comm *comm, MPIR_Win **win);
+int MPID_Win_attach(MPIR_Win *win, void *base, MPI_Aint size);
+int MPID_Win_detach(MPIR_Win *win, const void *base);
+int MPID_Win_get_info(MPIR_Win *win, MPIR_Info **info_used);
+int MPID_Win_set_info(MPIR_Win *win, MPIR_Info *info);
+
+int MPID_Get_accumulate(const void *origin_addr, int origin_count,
+                        MPI_Datatype origin_datatype, void *result_addr, int result_count,
+                        MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
+                        int target_count, MPI_Datatype target_datatype, MPI_Op op, MPIR_Win *win);
+int MPID_Fetch_and_op(const void *origin_addr, void *result_addr,
+                      MPI_Datatype datatype, int target_rank, MPI_Aint target_disp,
+                      MPI_Op op, MPIR_Win *win);
+int MPID_Compare_and_swap(const void *origin_addr, const void *compare_addr,
+                          void *result_addr, MPI_Datatype datatype, int target_rank,
+                          MPI_Aint target_disp, MPIR_Win *win);
+int MPID_Rput(const void *origin_addr, int origin_count,
+              MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp,
+              int target_count, MPI_Datatype target_datatype, MPIR_Win *win,
+              MPIR_Request **request);
+int MPID_Rget(void *origin_addr, int origin_count,
+              MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp,
+              int target_count, MPI_Datatype target_datatype, MPIR_Win *win,
+              MPIR_Request **request);
+int MPID_Raccumulate(const void *origin_addr, int origin_count,
+                     MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp,
+                     int target_count, MPI_Datatype target_datatype, MPI_Op op, MPIR_Win *win,
+                     MPIR_Request **request);
+int MPID_Rget_accumulate(const void *origin_addr, int origin_count,
+                         MPI_Datatype origin_datatype, void *result_addr, int result_count,
+                         MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
+                         int target_count, MPI_Datatype target_datatype, MPI_Op op, MPIR_Win *win,
+                         MPIR_Request **request);
+
+int MPID_Win_lock_all(int assert, MPIR_Win *win);
+int MPID_Win_unlock_all(MPIR_Win *win);
+int MPID_Win_flush(int rank, MPIR_Win *win);
+int MPID_Win_flush_all(MPIR_Win *win);
+int MPID_Win_flush_local(int rank, MPIR_Win *win);
+int MPID_Win_flush_local_all(MPIR_Win *win);
+int MPID_Win_sync(MPIR_Win *win);
+
+void MPID_Progress_start(MPID_Progress_state * state);
+int MPID_Progress_wait(MPID_Progress_state * state);
+void MPID_Progress_end(MPID_Progress_state * stae);
+int MPID_Progress_test(void);
+int MPID_Progress_poke(void);
+
+int MPID_Get_processor_name( char *name, int namelen, int *resultlen);
+int MPID_Get_universe_size(int  * universe_size);
+int MPID_Comm_get_lpid(MPIR_Comm *comm_ptr, int idx, int * lpid_ptr, MPIU_BOOL is_remote);
+
+void MPID_Request_init(MPIR_Request *);
+void MPID_Request_finalize(MPIR_Request *);
+int MPID_Request_complete(MPIR_Request *);
+
+void *MPID_Alloc_mem( size_t size, MPIR_Info *info );
+int MPID_Free_mem( void *ptr );
+
+/* Prototypes and definitions for the node ID code.  This is used to support
+   hierarchical collectives in a (mostly) device-independent way. */
+#if defined(MPID_USE_NODE_IDS)
+/* MPID_Node_id_t is a signed integer type defined by the device in mpidpre.h. */
+int MPID_Get_node_id(MPIR_Comm *comm, int rank, MPID_Node_id_t *id_p);
+int MPID_Get_max_node_id(MPIR_Comm *comm, MPID_Node_id_t *max_id_p);
+#endif
+
+#endif /* !defined(MPIDPRE_H_INCLUDED) */
diff --git a/src/mpid/ch3/src/ch3u_handle_connection.c b/src/mpid/ch3/src/ch3u_handle_connection.c
index 46e2948..4d276b0 100644
--- a/src/mpid/ch3/src/ch3u_handle_connection.c
+++ b/src/mpid/ch3/src/ch3u_handle_connection.c
@@ -12,7 +12,7 @@
 #endif
 #undef utarray_oom
 #define utarray_oom() do { goto fn_oom; } while (0)
-#include "mpiu_utarray.h"
+#include "mpir_utarray.h"
 
 /* Count the number of outstanding close requests */
 static volatile int MPIDI_Outstanding_close_ops = 0;
diff --git a/src/mpid/ch3/src/ch3u_win_fns.c b/src/mpid/ch3/src/ch3u_win_fns.c
index ddfe6ee..fac96df 100644
--- a/src/mpid/ch3/src/ch3u_win_fns.c
+++ b/src/mpid/ch3/src/ch3u_win_fns.c
@@ -5,7 +5,7 @@
  */
 
 #include "mpidimpl.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 #include "mpidrma.h"
 
 extern MPIR_T_pvar_timer_t PVAR_TIMER_rma_wincreate_allgather ATTRIBUTE((unused));
diff --git a/src/mpid/common/datatype/mpidu_datatype.h b/src/mpid/common/datatype/mpidu_datatype.h
index dcd6e5a..37f524d 100644
--- a/src/mpid/common/datatype/mpidu_datatype.h
+++ b/src/mpid/common/datatype/mpidu_datatype.h
@@ -8,7 +8,7 @@
 
 #include "mpiimpl.h"
 #include "mpidu_dataloop.h"
-#include "mpihandlemem.h"
+#include "mpir_objects.h"
 
 /* NOTE: 
  * - struct MPIDU_Dataloop and MPIDU_Segment are defined in 
diff --git a/src/mpid/common/sock/mpidu_sock.h b/src/mpid/common/sock/mpidu_sock.h
index e061b88..8833290 100644
--- a/src/mpid/common/sock/mpidu_sock.h
+++ b/src/mpid/common/sock/mpidu_sock.h
@@ -22,8 +22,11 @@ CPLUSPLUS_BEGIN
 /* Load just the utility definitions that we need */
 #include "mpichconf.h"
 #include "mpl.h"
-#include "mpiutil.h"
-#include "mpich_cvars.h"
+#include "mpir_strerror.h"
+#include "mpir_type_defs.h"
+#include "mpir_assert.h"
+#include "mpir_pointers.h"
+#include "mpir_cvars.h"
 /* implementation specific header file */    
 #include "mpidu_socki.h"
 
diff --git a/src/mpid/common/thread/mpidu_thread_fallback.h b/src/mpid/common/thread/mpidu_thread_fallback.h
index dd00b86..b441e7f 100644
--- a/src/mpid/common/thread/mpidu_thread_fallback.h
+++ b/src/mpid/common/thread/mpidu_thread_fallback.h
@@ -7,7 +7,6 @@
 #if !defined(MPIDU_THREAD_H_INCLUDED)
 #define MPIDU_THREAD_H_INCLUDED
 
-#include "mpiutil.h"
 #include "opa_primitives.h"
 
 /* some important critical section names:
diff --git a/src/mpid/pamid/include/mpidi_mutex.h b/src/mpid/pamid/include/mpidi_mutex.h
index 396cdec..197eecc 100644
--- a/src/mpid/pamid/include/mpidi_mutex.h
+++ b/src/mpid/pamid/include/mpidi_mutex.h
@@ -27,7 +27,7 @@
 #define __include_mpidi_mutex_h__
 
 #include <opa_primitives.h>
-#include <mpiutil.h>
+#include <mpiimpl.h>
 #include <malloc.h>
 
 #define MPIDI_THREAD_ID() Kernel_ProcessorID()
diff --git a/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c b/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c
index 46bc3f6..5de7b1a 100644
--- a/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c
+++ b/src/mpid/pamid/src/dyntask/mpid_comm_spawn_multiple.c
@@ -167,8 +167,8 @@ int MPIDI_Comm_spawn_multiple(int count, char **commands,
 
 	/* Open a port for the spawned processes to connect to */
 	/* FIXME: info may be needed for port name */
-        mpi_errno = MPIR_Open_port(NULL, port_name);
-        TRACE_ERR("mpi_errno from MPIR_Open_port=%d\n", mpi_errno);
+        mpi_errno = MPID_Open_port(NULL, port_name);
+        TRACE_ERR("mpi_errno from MPID_Open_port=%d\n", mpi_errno);
 
 	/* Spawn the processes */
 #ifdef USE_PMI2_API
diff --git a/src/mpid/pamid/src/dyntask/mpid_port.c b/src/mpid/pamid/src/dyntask/mpid_port.c
index e35e0f3..7f115d8 100644
--- a/src/mpid/pamid/src/dyntask/mpid_port.c
+++ b/src/mpid/pamid/src/dyntask/mpid_port.c
@@ -30,7 +30,7 @@ static MPIDI_PortFns portFns = { MPIDI_Open_port,
 				 MPIDI_Comm_connect };
 
 /*@
-   MPIR_Open_port - Open an MPI Port
+   MPID_Open_port - Open an MPI Port
 
    Input Arguments:
 .  MPI_Info info - info
@@ -45,7 +45,7 @@ static MPIDI_PortFns portFns = { MPIDI_Open_port,
 .N MPI_SUCCESS
 .N MPI_ERR_OTHER
 @*/
-int MPIR_Open_port(MPIR_Info *info_ptr, char *port_name)
+int MPID_Open_port(MPIR_Info *info_ptr, char *port_name)
 {
     int mpi_errno=MPI_SUCCESS;
 
diff --git a/src/mpid/pamid/src/misc/mpid_unimpl.c b/src/mpid/pamid/src/misc/mpid_unimpl.c
index 5e12cc3..6b33086 100644
--- a/src/mpid/pamid/src/misc/mpid_unimpl.c
+++ b/src/mpid/pamid/src/misc/mpid_unimpl.c
@@ -27,7 +27,7 @@ int MPID_Close_port(const char *port_name)
   MPID_abort();
   return 0;
 }
-int MPIR_Open_port(MPIR_Info *info_ptr,
+int MPID_Open_port(MPIR_Info *info_ptr,
                    char *port_name)
 {
   MPID_abort();
diff --git a/src/mpid/pamid/src/onesided/mpid_win_get_info.c b/src/mpid/pamid/src/onesided/mpid_win_get_info.c
index e74c379..1cb1156 100644
--- a/src/mpid/pamid/src/onesided/mpid_win_get_info.c
+++ b/src/mpid/pamid/src/onesided/mpid_win_get_info.c
@@ -21,7 +21,7 @@
  *        associated with win.                                          
  */
 #include "mpidi_onesided.h"
-#include "mpiinfo.h"
+#include "mpir_info.h"
 
 /**
  * \brief MPI-PAMI glue for MPI_WIN_GET_INFO function
diff --git a/src/pm/gforker/mpiexec.c b/src/pm/gforker/mpiexec.c
index 33fd636..09e83a3 100644
--- a/src/pm/gforker/mpiexec.c
+++ b/src/pm/gforker/mpiexec.c
@@ -84,11 +84,11 @@
 #include "env.h"
 #include "simple_pmiutil.h"
 
-/* We can't use mpimem.h, because the memory routines are no longer available
+/* We can't use mpir_mem.h, because the memory routines are no longer available
    as utility routines, and instead now import properties from the device 
    and other parts of the code */
-/* mpimem.h contains prototypes for MPL_strncpy etc. */
-/* #include "mpimem.h" */
+/* mpir_mem.h contains prototypes for MPL_strncpy etc. */
+/* #include "mpir_mem.h" */
 
 typedef struct { PMISetup pmiinfo; IOLabelSetup labelinfo; } SetupInfo;
 
diff --git a/src/pm/remshell/mpiexec.c b/src/pm/remshell/mpiexec.c
index 96e16cc..7752b3f 100644
--- a/src/pm/remshell/mpiexec.c
+++ b/src/pm/remshell/mpiexec.c
@@ -80,9 +80,9 @@
 #include "rm.h"
 #include "simple_pmiutil.h"
 #include "env.h"             /* MPIE_Putenv */
-/* mpimem.h contains prototypes for MPL_strncpy etc. */
+/* mpir_mem.h contains prototypes for MPL_strncpy etc. */
 /* We no longer can use these because they are MPI device specific */
-/* #include "mpimem.h" */
+/* #include "mpir_mem.h" */
 
 typedef struct { PMISetup pmiinfo; IOLabelSetup labelinfo; } SetupInfo;
 
diff --git a/src/pmi/simple/simple_pmi.c b/src/pmi/simple/simple_pmi.c
index 70ccadf..b506050 100644
--- a/src/pmi/simple/simple_pmi.c
+++ b/src/pmi/simple/simple_pmi.c
@@ -47,7 +47,7 @@
 
 #include "mpl.h"            /* Get ATTRIBUTE, some base functions */
 /* mpimem includes the definitions for MPL_malloc and MPL_free */
-#include "mpimem.h"
+#include "mpir_mem.h"
 
 /* Temporary debug definitions */
 /* #define DBG_PRINTF(args) printf args ; fflush(stdout) */
diff --git a/src/pmi/simple/simple_pmiutil.c b/src/pmi/simple/simple_pmiutil.c
index 21203b5..2061c9d 100644
--- a/src/pmi/simple/simple_pmiutil.c
+++ b/src/pmi/simple/simple_pmiutil.c
@@ -33,7 +33,7 @@
 #include "simple_pmiutil.h"
 
 /* Use the memory definitions from mpich/src/include */
-#include "mpimem.h"
+#include "mpir_mem.h"
 
 #define MAXVALLEN 1024
 #define MAXKEYLEN   32
diff --git a/src/util/Makefile.mk b/src/util/Makefile.mk
index a6b3e1c..f97cc63 100644
--- a/src/util/Makefile.mk
+++ b/src/util/Makefile.mk
@@ -9,9 +9,5 @@ include $(top_srcdir)/src/util/logging/Makefile.mk
 include $(top_srcdir)/src/util/mem/Makefile.mk
 include $(top_srcdir)/src/util/cvar/Makefile.mk
 include $(top_srcdir)/src/util/procmap/Makefile.mk
-include $(top_srcdir)/src/util/refcount/Makefile.mk
-include $(top_srcdir)/src/util/type/Makefile.mk
 include $(top_srcdir)/src/util/wrappers/Makefile.mk
 include $(top_srcdir)/src/util/assert/Makefile.mk
-include $(top_srcdir)/src/util/pointer/Makefile.mk
-
diff --git a/src/util/assert/Makefile.mk b/src/util/assert/Makefile.mk
index 94771fa..7fa6594 100644
--- a/src/util/assert/Makefile.mk
+++ b/src/util/assert/Makefile.mk
@@ -5,8 +5,5 @@
 ##     See COPYRIGHT in top-level directory.
 ##
 
-AM_CPPFLAGS += -I$(top_srcdir)/src/util/assert
-
-noinst_HEADERS += src/util/assert/mpiassert.h
 mpi_core_sources += src/util/assert/assert.c
 
diff --git a/src/util/cvar/Makefile.mk b/src/util/cvar/Makefile.mk
index cab118e..9a2de34 100644
--- a/src/util/cvar/Makefile.mk
+++ b/src/util/cvar/Makefile.mk
@@ -6,10 +6,10 @@
 ##
 
 mpi_core_sources +=   \
-    src/util/cvar/mpich_cvars.c
+    src/util/cvar/mpir_cvars.c
 
 if MAINTAINER_MODE
 # normally built by autogen.sh, but this rebuild rule is here
-$(top_srcdir)/src/util/cvar/mpich_cvars.c: $(top_srcdir)/maint/extractcvars
+$(top_srcdir)/src/util/cvar/mpir_cvars.c: $(top_srcdir)/maint/extractcvars
 	( cd $(top_srcdir) && $(top_srcdir)/maint/extractcvars --dirs="`cat $(top_srcdir)/maint/cvardirs`")
 endif MAINTAINER_MODE
diff --git a/src/util/logging/rlog/TraceInput/logformat_trace_InputLog.c b/src/util/logging/rlog/TraceInput/logformat_trace_InputLog.c
index 6b0d0ed..a064045 100644
--- a/src/util/logging/rlog/TraceInput/logformat_trace_InputLog.c
+++ b/src/util/logging/rlog/TraceInput/logformat_trace_InputLog.c
@@ -13,7 +13,7 @@
 #include "logformat_trace_InputLog.h"
 #include "trace_API.h"
 #include <stdlib.h>
-#include "mpimem.h"
+#include "mpir_mem.h"
 
 /* style: allow:fprintf:20 sig:0 */
 
diff --git a/src/util/logging/rlog/TraceInput/trace_input.c b/src/util/logging/rlog/TraceInput/trace_input.c
index f0f28c6..a508887 100644
--- a/src/util/logging/rlog/TraceInput/trace_input.c
+++ b/src/util/logging/rlog/TraceInput/trace_input.c
@@ -16,7 +16,7 @@
 #include <string.h>
 #endif
 #include "trace_API.h"
-#include "mpimem.h"
+#include "mpir_mem.h"
 
 #define TRACEINPUT_SUCCESS 0
 #define TRACEINPUT_FAIL    -1
diff --git a/src/util/logging/rlog/irlog2rlog.c b/src/util/logging/rlog/irlog2rlog.c
index 1b24484..fe0d83d 100644
--- a/src/util/logging/rlog/irlog2rlog.c
+++ b/src/util/logging/rlog/irlog2rlog.c
@@ -15,7 +15,7 @@
 #include <errno.h>
 #include <ctype.h> /* isdigit */
 #include "mpichconf.h" /* HAVE_SNPRINTF */
-#include "mpimem.h" /* MPL_snprintf */
+#include "mpir_mem.h" /* MPL_snprintf */
 
 #ifndef BOOL
 #define BOOL int
diff --git a/src/util/logging/rlog/irlogutil.c b/src/util/logging/rlog/irlogutil.c
index 4442ad2..fbe9e3f 100644
--- a/src/util/logging/rlog/irlogutil.c
+++ b/src/util/logging/rlog/irlogutil.c
@@ -8,7 +8,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
-#include "mpimem.h"
+#include "mpir_mem.h"
 
 static int ReadFileData(char *pBuffer, int length, FILE *fin)
 {
diff --git a/src/util/logging/rlog/minalignrlog.c b/src/util/logging/rlog/minalignrlog.c
index 8dd9cb0..d408eb3 100644
--- a/src/util/logging/rlog/minalignrlog.c
+++ b/src/util/logging/rlog/minalignrlog.c
@@ -11,7 +11,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
-#include "mpimem.h"
+#include "mpir_mem.h"
 
 void PrintState(RLOG_STATE *pState)
 {
diff --git a/src/util/logging/rlog/rlog.c b/src/util/logging/rlog/rlog.c
index e870920..a2218b2 100644
--- a/src/util/logging/rlog/rlog.c
+++ b/src/util/logging/rlog/rlog.c
@@ -13,7 +13,7 @@
 #include "mpl.h"   /* MPL_error_printf */
 
 #include "mpichconf.h" /* HAVE_SNPRINTF */
-#include "mpimem.h"    /* MPL_snprintf */
+#include "mpir_mem.h"    /* MPL_snprintf */
 
 #include "mpi.h"
 /*#define RLOG_timestamp PMPI_Wtime*/
diff --git a/src/util/logging/rlog/rlogtime.c b/src/util/logging/rlog/rlogtime.c
index e2952fd..c166097 100644
--- a/src/util/logging/rlog/rlogtime.c
+++ b/src/util/logging/rlog/rlogtime.c
@@ -13,7 +13,7 @@
 #include "mpi.h"
 
 #include "mpichconf.h"
-#include "mpimem.h" /* for MPL_snprintf */
+#include "mpir_mem.h" /* for MPL_snprintf */
 #include "rlog.h"
 #include <math.h>
 #include <stdlib.h>
diff --git a/src/util/logging/rlog/rlogutil.c b/src/util/logging/rlog/rlogutil.c
index f49ef28..5a9ce34 100644
--- a/src/util/logging/rlog/rlogutil.c
+++ b/src/util/logging/rlog/rlogutil.c
@@ -10,7 +10,7 @@
 #include <string.h>
 #include <errno.h>
 #include <stdarg.h>
-#include "mpimem.h"
+#include "mpir_mem.h"
 
 static int ReadFileData(char *pBuffer, int length, FILE *fin)
 {
diff --git a/src/util/mem/Makefile.mk b/src/util/mem/Makefile.mk
index d7cf614..ed6a2e6 100644
--- a/src/util/mem/Makefile.mk
+++ b/src/util/mem/Makefile.mk
@@ -7,10 +7,6 @@
 
 AM_CPPFLAGS += -I$(top_srcdir)/src/util/mem
 
-noinst_HEADERS +=                               \
-    src/util/mem/mpiu_strerror.h
-
 mpi_core_sources += \
     src/util/mem/handlemem.c  \
     src/util/mem/strerror.c
-
diff --git a/src/util/pointer/Makefile.mk b/src/util/pointer/Makefile.mk
deleted file mode 100644
index eea239a..0000000
--- a/src/util/pointer/Makefile.mk
+++ /dev/null
@@ -1,12 +0,0 @@
-## -*- Mode: Makefile; -*-
-## vim: set ft=automake :
-##
-## (C) 2011 by Argonne National Laboratory.
-##     See COPYRIGHT in top-level directory.
-##
-
-AM_CPPFLAGS += -I$(top_srcdir)/src/util/pointer
-
-noinst_HEADERS += src/util/pointer/mpiu_pointer.h
-
-mpi_core_sources +=
diff --git a/src/util/pointer/mpiu_pointer.h b/src/util/pointer/mpiu_pointer.h
deleted file mode 100644
index fccea14..0000000
--- a/src/util/pointer/mpiu_pointer.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- *  (C) 2001 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#if !defined(MPIU_POINTER_H_INCLUDED)
-#define MPIU_POINTER_H_INCLUDED
-
-#include "mpi.h"
-#include "mpichconf.h"
-#include "mpichconfconst.h"
-#include "mpiassert.h"
-#include "mpl.h"
-
-/* Assigns (src_) to (dst_), checking that (src_) fits in (dst_) without
- * truncation.
- *
- * When fiddling with this macro, please keep C's overly complicated integer
- * promotion/truncation/conversion rules in mind.  A discussion of these issues
- * can be found in Chapter 5 of "Secure Coding in C and C++" by Robert Seacord.
- */
-#define MPIU_Assign_trunc(dst_,src_,dst_type_)                                         \
-    do {                                                                               \
-        /* will catch some of the cases if the expr_inttype macros aren't available */ \
-        MPIU_Assert((src_) == (dst_type_)(src_));                                      \
-        dst_ = (dst_type_)(src_);                                                      \
-    } while (0)
-
-/*
- * Ensure an MPI_Aint value fits into a signed int.
- * Useful for detecting overflow when MPI_Aint is larger than an int.
- *
- * \param[in]  aint  Variable of type MPI_Aint
- */
-#define MPIU_Ensure_Aint_fits_in_int(aint) \
-  MPIU_Assert((aint) == (MPI_Aint)(int)(aint));
-
-/*
- * Ensure an MPI_Aint value fits into a pointer.
- * Useful for detecting overflow when MPI_Aint is larger than a pointer.
- *
- * \param[in]  aint  Variable of type MPI_Aint
- */
-#define MPIU_Ensure_Aint_fits_in_pointer(aint) \
-  MPIU_Assert((aint) == (MPI_Aint)(uintptr_t) MPIU_AINT_CAST_TO_VOID_PTR(aint));
-
-
-#endif /* !defined(MPIU_POINTER_H_INCLUDED) */
diff --git a/src/util/refcount/Makefile.mk b/src/util/refcount/Makefile.mk
deleted file mode 100644
index cea1625..0000000
--- a/src/util/refcount/Makefile.mk
+++ /dev/null
@@ -1,14 +0,0 @@
-## -*- Mode: Makefile; -*-
-## vim: set ft=automake :
-##
-## (C) 2011 by Argonne National Laboratory.
-##     See COPYRIGHT in top-level directory.
-##
-
-AM_CPPFLAGS += -I$(top_srcdir)/src/util/refcount
-
-noinst_HEADERS +=                               \
-    src/util/refcount/mpir_refcount.h           \
-    src/util/refcount/mpir_refcount_global.h	\
-    src/util/refcount/mpir_refcount_pobj.h	\
-    src/util/refcount/mpir_refcount_single.h
diff --git a/src/util/type/Makefile.mk b/src/util/type/Makefile.mk
deleted file mode 100644
index cc5b83e..0000000
--- a/src/util/type/Makefile.mk
+++ /dev/null
@@ -1,11 +0,0 @@
-## -*- Mode: Makefile; -*-
-## vim: set ft=automake :
-##
-## (C) 2011 by Argonne National Laboratory.
-##     See COPYRIGHT in top-level directory.
-##
-
-AM_CPPFLAGS += -I$(top_srcdir)/src/util/type
-
-noinst_HEADERS +=                               \
-    src/util/type/mpiu_type_defs.h
diff --git a/src/util/type/mpiu_type_defs.h b/src/util/type/mpiu_type_defs.h
deleted file mode 100644
index d04f5ae..0000000
--- a/src/util/type/mpiu_type_defs.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- *  (C) 2001 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#if !defined(MPIU_TYPE_DEFS_H_INCLUDED)
-#define MPIU_TYPE_DEFS_H_INCLUDED
-
-#include "mpichconf.h"
-
-/* Basic typedefs */
-#ifdef HAVE_SYS_BITYPES_H
-#include <sys/bitypes.h>
-#endif
-
-/* inttypes.h is supposed to include stdint.h but this is here as
-   belt-and-suspenders for platforms that aren't fully compliant */
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>
-#endif
-
-/* stdint.h gives us fixed-width C99 types like int16_t, among others */
-#ifdef HAVE_STDINT_H
-#include <stdint.h>
-#endif
-
-/* stdbool.h gives us the C boolean type */
-#ifdef HAVE_STDBOOL_H
-#include <stdbool.h>
-#endif
-
-/* complex.h gives us the C complex type */
-#ifdef HAVE_COMPLEX_H
-#include <complex.h>
-#endif
-
-#ifdef HAVE_WINDOWS_H
-#include <winsock2.h>
-#include <windows.h>
-#else
-#ifndef BOOL
-#define BOOL int
-#endif
-#endif
-
-#include "mpl.h"
-
-typedef MPIU_SIZE_T MPIU_Size_t;
-
-/* Use the MPIU_PtrToXXX macros to convert pointers to and from integer types */
-
-/* The Microsoft compiler will not allow casting of different sized types
- * without
- * printing a compiler warning.  Using these macros allows compiler specific
- * type casting and avoids the warning output.  These macros should only be used
- * in code that can handle loss of bits.
- */
-
-/* PtrToAint converts a pointer to an MPI_Aint type, truncating bits if necessary */
-#ifdef HAVE_PTRTOAINT
-#define MPIU_PtrToAint(a) ((MPI_Aint)(INT_PTR) (a))
-#else
-/* An MPI_Aint may be *larger* than a pointer.  By using 2 casts, we can
-   keep some compilers from complaining about converting a pointer to an
-   integer of a different size */
-#define MPIU_PtrToAint(a) ((MPI_Aint)(uintptr_t)(a))
-#endif
-
-/* AintToPtr converts an MPI_Aint to a pointer type, extending bits if necessary */
-#ifdef HAVE_AINTTOPTR
-#define MPIU_AintToPtr(a) ((VOID *)(INT_PTR)((MPI_Aint)a))
-#else
-#define MPIU_AintToPtr(a) (void*)(a)
-#endif
-
-/* Adding the 32-bit compute/64-bit I/O related type-casts in here as
- * they are not a part of the MPI standard yet. */
-#define MPIU_AINT_CAST_TO_VOID_PTR (void *)(intptr_t)
-#define MPIU_VOID_PTR_CAST_TO_MPI_AINT (MPI_Aint)(uintptr_t)
-#define MPIU_PTR_DISP_CAST_TO_MPI_AINT (MPI_Aint)(intptr_t)
-
-#define MPIU_CONTEXT_ID_T_DATATYPE MPI_UINT16_T
-typedef uint16_t MPIU_Context_id_t;
-#define MPIU_INVALID_CONTEXT_ID ((MPIU_Context_id_t)0xffff)
-
-#endif /* !defined(MPIU_TYPE_DEFS_H_INCLUDED) */
diff --git a/src/util/wrappers/mpiu_sock_wrappers.h b/src/util/wrappers/mpiu_sock_wrappers.h
index 1b1236a..760b8bb 100644
--- a/src/util/wrappers/mpiu_sock_wrappers.h
+++ b/src/util/wrappers/mpiu_sock_wrappers.h
@@ -19,10 +19,12 @@
 #include "mpl.h"
 #include "mpichconf.h"
 #include "mpi.h"
-#include "mpierror.h"
-#include "mpierrs.h"
-#include "mpimem.h"
-#include "mpiutil.h"
+#include "mpir_err.h"
+#include "mpir_mem.h"
+#include "mpir_strerror.h"
+#include "mpir_type_defs.h"
+#include "mpir_assert.h"
+#include "mpir_pointers.h"
 
 #ifdef USE_NT_SOCK
 
diff --git a/src/util/wrappers/mpiu_util_wrappers.h b/src/util/wrappers/mpiu_util_wrappers.h
index 7e2ee54..571da6f 100644
--- a/src/util/wrappers/mpiu_util_wrappers.h
+++ b/src/util/wrappers/mpiu_util_wrappers.h
@@ -19,7 +19,7 @@
 #endif
 
 #include "mpichconf.h"
-#include "mpimem.h"
+#include "mpir_mem.h"
 
 #ifdef HAVE_GETLASTERROR
 #   define MPIU_OSW_Get_errno()   GetLastError()

http://git.mpich.org/mpich.git/commitdiff/9fcf5f3e32b80e1419a71736203f4964875709e5

commit 9fcf5f3e32b80e1419a71736203f4964875709e5
Author: Pavan Balaji <balaji at anl.gov>
Date:   Sat Apr 23 00:25:52 2016 -0500

    MPL: fixes to allow compilation with C++ compilers.
    
    Avoid using C++ keywords in MPL.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/src/mpl/include/mpl_dbg.h b/src/mpl/include/mpl_dbg.h
index df50f0d..f8881d1 100644
--- a/src/mpl/include/mpl_dbg.h
+++ b/src/mpl/include/mpl_dbg.h
@@ -126,11 +126,11 @@ extern MPL_dbg_class MPL_DBG_ROUTINE;
 extern MPL_dbg_class MPL_DBG_ALL;
 
 MPL_dbg_class MPL_dbg_class_alloc(const char *ucname, const char *lcname);
-void MPL_dbg_class_register(MPL_dbg_class class, const char *ucname, const char *lcname);
+void MPL_dbg_class_register(MPL_dbg_class cls, const char *ucname, const char *lcname);
 
-#define MPL_DBG_CLASS_CLR(class)                \
+#define MPL_DBG_CLASS_CLR(cls)                  \
     do {                                        \
-        (class) = 0;                            \
+        (cls) = 0;                              \
     } while (0)
 
 #define MPL_DBG_CLASS_APPEND(out_class, in_class)       \

http://git.mpich.org/mpich.git/commitdiff/5ba5ff318b8a4668d535dea08e910288288184eb

commit 5ba5ff318b8a4668d535dea08e910288288184eb
Author: Pavan Balaji <balaji at anl.gov>
Date:   Thu Apr 21 16:23:02 2016 -0500

    Remove handle allocation code.
    
    This is broken in many ways and needs to be redone.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/configure.ac b/configure.ac
index be61b85..2575c50 100644
--- a/configure.ac
+++ b/configure.ac
@@ -518,13 +518,6 @@ AC_ARG_ENABLE(mutex-timing,
 	AC_HELP_STRING([--enable-mutex-timing], [calculate the time spent waiting on mutexes]),
 	AC_DEFINE(MPIU_MUTEX_WAIT_TIME,1,[Define to enable timing mutexes]))
 
-AC_ARG_ENABLE(handle-allocation,
-	AC_HELP_STRING([--enable-handle-allocation=type],
-			[Choose the method used for allocating MPI
-			 object handles.  Values may be 'tls' for
-			 thread-local storage or 'mutex' for simple
-			 locking.  'mutex' is the default.]),,enable_handle_allocation=default)
-
 AC_ARG_ENABLE([predefined-refcount],
 	AS_HELP_STRING([--enable-predefined-refcount],
                        [control whether predefined objects like
@@ -1398,20 +1391,6 @@ if test "$enable_predefined_refcount" = "no" ; then
     AC_DEFINE([MPIU_THREAD_SUPPRESS_PREDEFINED_REFCOUNTS],[1],[define to disable reference counting predefined objects like MPI_COMM_WORLD])
 fi
 
-case $enable_handle_allocation in
-    mutex|default)
-        handle_allocation_method=MPIU_HANDLE_ALLOCATION_MUTEX
-    ;;
-    tls)
-        handle_allocation_method=MPIU_HANDLE_ALLOCATION_THREAD_LOCAL
-    ;;
-    *)
-        AC_MSG_ERROR([Unrecognized value $enable_handle_allocation for --enable-handle-allocation])
-    ;;
-esac
-AC_DEFINE_UNQUOTED([MPIU_HANDLE_ALLOCATION_METHOD],$handle_allocation_method,[Method used to allocate MPI object handles])
-
-
 AC_DEFINE_UNQUOTED([MPIU_THREAD_REFCOUNT],$thread_refcount,[Method used to implement refcount updates])
 
 # enable-g
diff --git a/src/include/mpichconfconst.h b/src/include/mpichconfconst.h
index 18b17e5..24d983c 100644
--- a/src/include/mpichconfconst.h
+++ b/src/include/mpichconfconst.h
@@ -36,11 +36,6 @@
 /* _SINGLE is the "null" granularity, where all processes are single-threaded */
 #define MPICH_THREAD_GRANULARITY_SINGLE 4
 
-/* controls the allocation mechanism for MPIR_Request handles, which can greatly
- * affect concurrency on the critical path */
-#define MPIU_HANDLE_ALLOCATION_MUTEX         0
-#define MPIU_HANDLE_ALLOCATION_THREAD_LOCAL  1
-
 /* _NONE means no concurrency control, such as when using MPI_THREAD_SINGLE */
 #define MPIU_REFCOUNT_NONE 1
 #define MPIU_REFCOUNT_LOCKFREE 2

http://git.mpich.org/mpich.git/commitdiff/c4473173e550cc572d908ac1f7dd3c9e459c5405

commit c4473173e550cc572d908ac1f7dd3c9e459c5405
Author: Pavan Balaji <balaji at anl.gov>
Date:   Thu Apr 21 16:16:01 2016 -0500

    Purge unused code.
    
    Remove unused code from mpiimpl.h and an unused constant from
    mpichconfconst.h
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/src/include/Makefile.mk b/src/include/Makefile.mk
index 8e537cf..2336291 100644
--- a/src/include/Makefile.mk
+++ b/src/include/Makefile.mk
@@ -41,7 +41,6 @@ noinst_HEADERS +=                   \
     src/include/mpimem.h            \
     src/include/mpir_nbc.h          \
     src/include/mpir_type_defs.h    \
-    src/include/mpistates.h         \
     src/include/mpitimerimpl.h      \
     src/include/mpiu_utarray.h      \
     src/include/mpiu_uthash.h       \
diff --git a/src/include/mpichconfconst.h b/src/include/mpichconfconst.h
index de3cbfb..18b17e5 100644
--- a/src/include/mpichconfconst.h
+++ b/src/include/mpichconfconst.h
@@ -41,8 +41,6 @@
 #define MPIU_HANDLE_ALLOCATION_MUTEX         0
 #define MPIU_HANDLE_ALLOCATION_THREAD_LOCAL  1
 
-/* _INVALID exists to avoid accidental macro evaluations to 0 */
-#define MPIU_REFCOUNT_INVALID 0
 /* _NONE means no concurrency control, such as when using MPI_THREAD_SINGLE */
 #define MPIU_REFCOUNT_NONE 1
 #define MPIU_REFCOUNT_LOCKFREE 2
diff --git a/src/include/mpifunc.h b/src/include/mpifunc.h
index e5cbc77..f694439 100644
--- a/src/include/mpifunc.h
+++ b/src/include/mpifunc.h
@@ -9,13 +9,10 @@
 
 /* state declaration macros */
 #if defined(MPL_USE_DBG_LOGGING) || defined(MPICH_DEBUG_MEMARENA)
-#define MPIR_STATE_DECL(a)
-#define MPID_MPI_STATE_DECL(a)		MPIR_STATE_DECL(a)
-#define MPID_MPI_INIT_STATE_DECL(a)	MPIR_STATE_DECL(a)
-#define MPID_MPI_FINALIZE_STATE_DECL(a)	MPIR_STATE_DECL(a)
-#define MPIDI_STATE_DECL(a)		MPIR_STATE_DECL(a)
-#define MPIDI_INIT_STATE_DECL(a)	MPIR_STATE_DECL(a)
-#define MPIDI_FINALIZE_STATE_DECL(a)	MPIR_STATE_DECL(a)
+#define MPID_MPI_STATE_DECL(a)
+#define MPID_MPI_INIT_STATE_DECL(a)
+#define MPID_MPI_FINALIZE_STATE_DECL(a)
+#define MPIDI_STATE_DECL(a)
 
 /* Tell the package to define the rest of the enter/exit macros in
    terms of these */
diff --git a/src/include/mpihandlemem.h b/src/include/mpihandlemem.h
index 91f993f..1de6dda 100644
--- a/src/include/mpihandlemem.h
+++ b/src/include/mpihandlemem.h
@@ -225,7 +225,6 @@ M*/
 #if MPIU_THREAD_REFCOUNT == MPIU_REFCOUNT_NONE
 
 typedef int MPIU_Handle_ref_count;
-#define MPIU_HANDLE_REF_COUNT_INITIALIZER(val_) (val_)
 
 #define MPIU_Object_set_ref(objptr_,val)                 \
     do {                                                 \
@@ -254,7 +253,6 @@ typedef int MPIU_Handle_ref_count;
 
 #include "opa_primitives.h"
 typedef OPA_int_t MPIU_Handle_ref_count;
-#define MPIU_HANDLE_REF_COUNT_INITIALIZER(val_) OPA_INT_T_INITIALIZER(val_)
 
 #define MPIU_Object_set_ref(objptr_,val)                 \
     do {                                                 \
@@ -367,13 +365,6 @@ typedef struct MPIU_Handle_common {
                      free object */
 } MPIU_Handle_common;
 
-/* Provides a type to which a specific object structure can be casted.  In
- * general this should not be used, since most uses are violations of C's strict
- * aliasing rules. */
-typedef struct MPIU_Handle_head {
-    MPIU_OBJECT_HEADER;
-} MPIU_Handle_head;
-
 /* This type contains all of the data, except for the direct array,
    used by the object allocators. */
 typedef struct MPIU_Object_alloc_t {
diff --git a/src/include/mpiimpl.h b/src/include/mpiimpl.h
index 3f7b4ba..c7a5f1a 100644
--- a/src/include/mpiimpl.h
+++ b/src/include/mpiimpl.h
@@ -75,17 +75,8 @@ int MPIR_Comm_split_filesystem(MPI_Comm comm, int key, const char *dirname, MPI_
 #endif
 
 #if defined(HAVE_LONG_LONG_INT)
-/* tt#1776: some platforms have "long long" but not a LLONG_MAX/ULLONG_MAX,
- * usually because some feature test macro has turned them off in glibc's
- * features.h header b/c we are not in a >=C99 mode.  Use well-defined unsigned
- * integer overflow to determine ULLONG_MAX, and assume two's complement for
- * determining LLONG_MAX (already assumed elsewhere in MPICH). */
-#ifndef ULLONG_MIN
-#define ULLONG_MIN (0) /* trivial */
-#endif
-#ifndef ULLONG_MAX
-#define ULLONG_MAX ((unsigned long long)0 - 1)
-#endif
+/* Assume two's complement for determining LLONG_MAX (already assumed
+ * elsewhere in MPICH). */
 #ifndef LLONG_MAX
 /* slightly tricky (values in binary):
  * - put a 1 in the second-to-msb digit                   (0100...0000)
@@ -94,11 +85,6 @@ int MPIR_Comm_split_filesystem(MPI_Comm comm, int key, const char *dirname, MPI_
  * - add 1, yielding all 1s in positive space             (0111...1111) */
 #define LLONG_MAX (((((long long) 1 << (sizeof(long long) * CHAR_BIT - 2)) - 1 ) << 1) + 1)
 #endif
-#ifndef LLONG_MIN
-/* (1000...0000) is the most negative value in a twos-complement representation,
- * which is the bitwise complement of the most positive value */
-#define LLONG_MIN (~LLONG_MAX)
-#endif
 #endif /* defined(HAVE_LONG_LONG_INT) */
 
 #if (!defined MAXHOSTNAMELEN) && (!defined MAX_HOSTNAME_LEN)
@@ -148,36 +134,6 @@ int MPIR_Comm_split_filesystem(MPI_Comm comm, int key, const char *dirname, MPI_
 #include "../mpid/common/hcoll/hcollpre.h"
 #endif
 
-/*
- * Use MPIU_SYSCALL to wrap system calls; this provides a convenient point
- * for timing the calls and keeping track of the use of system calls.
- * This macro simply invokes the system call and does not even handle
- * EINTR.
- * To use, 
- *    MPIU_SYSCALL( return-value, name-of-call, args-in-parenthesis )
- * e.g., change "n = read(fd,buf,maxn);" into
- *    MPIU_SYSCALL( n,read,(fd,buf,maxn) );
- * An example that prints each syscall to stdout is shown below. 
- */
-#ifdef USE_LOG_SYSCALLS
-#define MPIU_SYSCALL(a_,b_,c_) { \
-    printf( "[%d]about to call %s\n", MPIR_Process.comm_world->rank,#b_);\
-          fflush(stdout); errno = 0;\
-    a_ = b_ c_; \
-    if ((a_)>=0 || errno==0) {\
-    printf( "[%d]%s returned %d\n", \
-          MPIR_Process.comm_world->rank, #b_, a_ );\
-    } \
- else { \
-    printf( "[%d]%s returned %d (errno = %d,%s)\n", \
-          MPIR_Process.comm_world->rank, \
-          #b_, a_, errno, MPIU_Strerror(errno));\
-    };           fflush(stdout);}
-#else
-#define MPIU_SYSCALL(a_,b_,c_) a_ = b_ c_
-#endif
-
-
 typedef struct {
     int thread_provided;        /* Provided level of thread support */
 
@@ -436,9 +392,6 @@ void MPIR_DatatypeAttrFinalize( void );
 /* Valid pointer checks */
 /* This test is lame.  Should eventually include cookie test 
    and in-range addresses */
-#define MPIR_Valid_ptr(kind,ptr,err) \
-  {if (!(ptr)) { err = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, \
-                                             "**nullptrtype", "**nullptrtype %s", #kind ); } }
 #define MPIR_Valid_ptr_class(kind,ptr,errclass,err) \
   {if (!(ptr)) { err = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, errclass, \
                                              "**nullptrtype", "**nullptrtype %s", #kind ); } }
@@ -471,27 +424,6 @@ void MPIR_DatatypeAttrFinalize( void );
      (type == MPI_LONG_INT) || (type == MPI_SHORT_INT) || \
      (type == MPI_LONG_DOUBLE_INT))
 
-/* FIXME: 
-   Generic pointer test.  This is applied to any address, not just one from
-   an MPI object.
-   Currently unimplemented (returns success except for null pointers.
-   With a little work, could check that the pointer is properly aligned,
-   using something like 
-   ((p) == 0 || ((char *)(p) & MPID_Alignbits[alignment] != 0)
-   where MPID_Alignbits is set with a mask whose bits must be zero in a 
-   properly aligned quantity.  For systems with no alignment rules, 
-   all of these masks are zero, and this part of test can be eliminated.
- */
-#define MPIR_Pointer_is_invalid(p,alignment) ((p) == 0)
-/* Fixme: The following MPID_ALIGNED_xxx values are temporary.  They 
-   need to be computed by configure and included in the mpichconf.h file.
-   Note that they cannot be set conservatively (i.e., as sizeof(object)),
-   since the runtime system may generate objects with lesser alignment
-   rules if the processor allows them.
- */
-#define MPIR_ALIGNED_PTR_INT   1
-#define MPIR_ALIGNED_PTR_LONG  1
-#define MPIR_ALIGNED_PTR_VOIDP 1
 /* ------------------------------------------------------------------------- */
 /* end of code that should the following be moved into mpihandlemem.h ?*/
 /* ------------------------------------------------------------------------- */
@@ -1497,9 +1429,6 @@ extern MPL_dbg_class MPIR_DBG_REQUEST;
 extern MPL_dbg_class MPIR_DBG_ASSERT;
 #endif /* MPL_USE_DBG_LOGGING */
 
-/* MPI_Status manipulation macros */
-#define MPIR_BITS_IN_INT (8 * SIZEOF_INT)
-
 /* We use bits from the "count_lo" and "count_hi_and_cancelled" fields
  * to represent the 'count' and 'cancelled' objects.  The LSB of the
  * "count_hi_and_cancelled" field represents the 'cancelled' object.
@@ -1516,11 +1445,11 @@ extern MPL_dbg_class MPIR_DBG_ASSERT;
     {                                                                   \
         (status_).count_lo = ((int) count_);                            \
         (status_).count_hi_and_cancelled &= 1;                          \
-        (status_).count_hi_and_cancelled |= (int) ((MPIR_Ucount) count_ >> MPIR_BITS_IN_INT << 1); \
+        (status_).count_hi_and_cancelled |= (int) ((MPIR_Ucount) count_ >> (8 * SIZEOF_INT) << 1); \
     }
 
 #define MPIR_STATUS_GET_COUNT(status_)                                  \
-    ((MPI_Count) ((((MPIR_Ucount) (((unsigned int) (status_).count_hi_and_cancelled) >> 1)) << MPIR_BITS_IN_INT) + (unsigned int) (status_).count_lo))
+    ((MPI_Count) ((((MPIR_Ucount) (((unsigned int) (status_).count_hi_and_cancelled) >> 1)) << (8 * SIZEOF_INT)) + (unsigned int) (status_).count_lo))
 
 #define MPIR_STATUS_SET_CANCEL_BIT(status_, cancelled_)	\
     {                                                   \
@@ -1981,28 +1910,6 @@ void *MPID_Alloc_mem( size_t size, MPIR_Info *info );
   @*/
 int MPID_Free_mem( void *ptr );
 
-/*@
-  MPID_Mem_was_alloced - Return true if this memory was allocated with 
-  'MPID_Alloc_mem'
-
-  Input Parameters:
-+ ptr  - Address of memory
-- size - Size of reqion in bytes.
-
-  Return value:
-  True if the memory was allocated with 'MPID_Alloc_mem', false otherwise.
-
-  Notes:
-  This routine may be needed by 'MPI_Win_create' to ensure that the memory 
-  for passive target RMA operations was allocated with 'MPI_Mem_alloc'.
-  This may be used, for example, for ensuring that memory used with
-  passive target operations was allocated with 'MPID_Alloc_mem'.
-
-  Module:
-  Win
-  @*/
-int MPID_Mem_was_alloced( void *ptr );  /* brad : this isn't used or implemented anywhere */
-
 /* ------------------------------------------------------------------------- */
 /* end of also in mpirma.h ? */
 /* ------------------------------------------------------------------------- */
@@ -2255,7 +2162,6 @@ typedef struct MPIR_Collops {
                                MPIR_Comm *comm_ptr, MPID_Sched_t s);
 } MPIR_Collops;
 
-#define MPIR_BARRIER_TAG 1
 /* ------------------------------------------------------------------------- */
 /* end of mpicoll.h (in src/mpi/coll? */
 /* ------------------------------------------------------------------------- */
@@ -2401,20 +2307,8 @@ extern MPICH_PerProcess_t MPIR_Process;
 #define MPIDI_FUNC_EXIT(a)			MPIR_FUNC_EXIT(a)
 #define MPIDI_PT2PT_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
 #define MPIDI_PT2PT_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
-#define MPIDI_PT2PT_FUNC_ENTER_FRONT(a)		MPIR_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_EXIT_FRONT(a)		MPIR_FUNC_EXIT(a)
-#define MPIDI_PT2PT_FUNC_ENTER_BACK(a)		MPIR_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_ENTER_BOTH(a)		MPIR_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_EXIT_BACK(a)		MPIR_FUNC_EXIT(a)
-#define MPIDI_PT2PT_FUNC_EXIT_BOTH(a)		MPIR_FUNC_EXIT(a)
-#define MPIDI_COLL_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
-#define MPIDI_COLL_FUNC_EXIT(a)			MPIR_FUNC_EXIT(a)
 #define MPIDI_RMA_FUNC_ENTER(a)			MPIR_FUNC_ENTER(a)
 #define MPIDI_RMA_FUNC_EXIT(a)			MPIR_FUNC_EXIT(a)
-#define MPIDI_INIT_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
-#define MPIDI_INIT_FUNC_EXIT(a)			MPIR_FUNC_EXIT(a)
-#define MPIDI_FINALIZE_FUNC_ENTER(a)		MPIR_FUNC_ENTER(a)
-#define MPIDI_FINALIZE_FUNC_EXIT(a)		MPIR_FUNC_EXIT(a)
 
 /* evaporate the timing macros since timing is not selected */
 #define MPIU_Timer_init(rank, size)
@@ -2425,10 +2319,6 @@ extern MPICH_PerProcess_t MPIR_Process;
 #include "mpierror.h"
 #include "mpierrs.h"
 
-/* FIXME: This routine is only used within mpi/src/err/errutil.c.  We
- * may not want to export it.  */
-void MPIR_Err_print_stack(FILE *, int);
-
 /* ------------------------------------------------------------------------- */
 
 /* FIXME: Move these to the communicator block; make sure that all 
@@ -2490,7 +2380,6 @@ extern void MPIR_Errhandler_set_cxx( MPI_Errhandler, void (*)(void) );
 int MPIR_Group_create( int, MPIR_Group ** );
 int MPIR_Group_release(MPIR_Group *group_ptr);
 
-int MPIR_dup_fn ( MPI_Comm, int, void *, void *, void *, int * );
 /* marks a request as complete, extracting the status */
 int MPIR_Request_complete(MPI_Request *, MPIR_Request *, MPI_Status *, int *);
 
@@ -3646,101 +3535,6 @@ typedef struct MPIR_Grequest_class {
 /* Interfaces exposed by MPI_T */
 #include "mpit.h"
 
-/*TTopoOverview.tex
- *
- * The MPI collective and topology routines can benefit from information 
- * about the topology of the underlying interconnect.  Unfortunately, there
- * is no best form for the representation (the MPI-1 Forum tried to define
- * such a representation, but was unable to).  One useful decomposition
- * that has been used in cluster enviroments is a hierarchical decomposition.
- *
- * The other obviously useful topology information would match the needs of 
- * 'MPI_Cart_create'.  However, it may be simpler to for the device to 
- * implement this routine directly.
- *
- * Other useful information could be the topology information that matches
- * the needs of the collective operation, such as spanning trees and rings.
- * These may be added to ADI3 later.
- *
- * Question: Should we define a cart create function?  Dims create?
- *
- * Usage:
- * This routine has nothing to do with the choice of communication method
- * that a implementation of the ADI may make.  It is intended only to
- * communicate information on the heirarchy of processes, if any, to 
- * the implementation of the collective communication routines.  This routine
- * may also be useful for the MPI Graph topology functions.
- *
- T*/
-
-/*@
-  MPID_Topo_cluster_info - Return information on the hierarchy of 
-  interconnections
-
-  Input Parameter:
-. comm - Communicator to study.  May be 'NULL', in which case 'MPI_COMM_WORLD'
-  is the effective communicator.
-
-  Output Parameters:
-+ levels - The number of levels in the hierarchy.  
-  To simplify the use of this routine, the maximum value is 
-  'MPID_TOPO_CLUSTER_MAX_LEVELS' (typically 8 or less).
-. my_cluster - For each level, the id of the cluster that the calling process
-  belongs to.
-- my_rank - For each level, the rank of the calling process in its cluster
-
-  Notes:
-  This routine returns a description of the system in terms of nested 
-  clusters of processes.  Levels are numbered from zero.  At each level,
-  each process may belong to no more than cluster; if a process is in any
-  cluster at level i, it must be in some cluster at level i-1.
-
-  The communicator argument allows this routine to be used in the dynamic
-  process case (i.e., with communicators that are created after 'MPI_Init' 
-  and that involve processes that are not part of 'MPI_COMM_WORLD').
-
-  For non-hierarchical systems, this routine simply returns a single 
-  level containing all processes.
-
-  Sample Outputs:
-  For a single, switch-connected cluster or a uniform-memory-access (UMA)
-  symmetric multiprocessor (SMP), the return values could be
-.vb
-    level       my_cluster         my_rank
-    0           0                  rank in comm_world
-.ve
-  This is also a valid response for `any` device.
-
-  For a switch-connected cluster of 2 processor SMPs
-.vb
-    level       my_cluster         my_rank
-    0           0                  rank in comm_world
-    1           0 to p/2           0 or 1
-.ve
- where the value each process on the same SMP has the same value for
- 'my_cluster[1]' and a different value for 'my_rank[1]'.
-
-  For two SMPs connected by a network,
-.vb
-    level       my_cluster         my_rank
-    0           0                  rank in comm_world
-    1           0 or 1             0 to # on SMP
-.ve
-
-  An example with more than 2 levels is a collection of clusters, each with
-  SMP nodes.  
-
-  Limitations:
-  This approach does not provide a representations for topologies that
-  are not hierarchical.  For example, a mesh interconnect is a single-level
-  cluster in this view.
-
-  Module: 
-  Topology
-  @*/
-int MPID_Topo_cluster_info( MPIR_Comm *comm,
-			    int *levels, int my_cluster[], int my_rank[] );
-
 /*@
   MPID_Get_processor_name - Return the name of the current processor
 
@@ -4142,8 +3936,6 @@ int MPIR_Comm_is_node_aware( MPIR_Comm * );
 
 int MPIR_Comm_is_node_consecutive( MPIR_Comm *);
 
-void MPIR_Free_err_dyncodes( void );
-
 int MPIR_Comm_idup_impl(MPIR_Comm *comm_ptr, MPIR_Comm **newcomm, MPIR_Request **reqp);
 
 int MPIR_Comm_shrink(MPIR_Comm *comm_ptr, MPIR_Comm **newcomm_ptr);
@@ -4311,15 +4103,6 @@ int MPIR_Group_init(void);
 int MPIR_Comm_init(MPIR_Comm *);
 
 
-/* Collective functions cannot be called from multiple threads. These
-   are stubs used in the collective communication calls to check for
-   user error. Currently they are just being macroed out. */
-#define MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER(comm_ptr)
-#define MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT(comm_ptr)
-
-/* Miscellaneous */
-void MPIU_SetTimeout( int );
-
 /* Communicator info hint functions */
 typedef int (*MPIR_Comm_hint_fn_t)(MPIR_Comm *, MPIR_Info *, void *);
 int MPIR_Comm_register_hint(const char *hint_key, MPIR_Comm_hint_fn_t fn, void *state);
diff --git a/src/include/mpimem.h b/src/include/mpimem.h
index 9250f4d..97bbd2d 100644
--- a/src/include/mpimem.h
+++ b/src/include/mpimem.h
@@ -159,33 +159,6 @@ if (pointer_) { \
 #define MPIU_CHKLMEM_MALLOC_ORJUMP(pointer_,type_,nbytes_,rc_,name_) \
     MPIU_CHKLMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,goto fn_fail)
 
-/* In some cases, we need to allocate large amounts of memory. This can
-   be a problem if alloca is used, as the available stack space may be small.
-   This is the same approach for the temporary memory as is used when alloca
-   is not available. */
-#define MPIU_CHKLBIGMEM_DECL(n_) \
- void *(mpiu_chklbigmem_stk_[n_]);\
- int mpiu_chklbigmem_stk_sp_=0;\
- MPIU_AssertDeclValue(const int mpiu_chklbigmem_stk_sz_,n_)
-
-#define MPIU_CHKLBIGMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,stmt_) \
-{pointer_ = (type_)MPL_malloc(nbytes_); \
-if (pointer_) { \
-    MPIU_Assert(mpiu_chklbigmem_stk_sp_<mpiu_chklbigmem_stk_sz_);\
-    mpiu_chklbigmem_stk_[mpiu_chklbigmem_stk_sp_++] = pointer_;\
- } else if (nbytes_ > 0) {				       \
-    MPIU_CHKMEM_SETERR(rc_,nbytes_,name_); \
-    stmt_;\
-}}
-#define MPIU_CHKLBIGMEM_FREEALL() \
-    { while (mpiu_chklbigmem_stk_sp_ > 0) {\
-       MPL_free( mpiu_chklbigmem_stk_[--mpiu_chklbigmem_stk_sp_] ); } }
-
-#define MPIU_CHKLBIGMEM_MALLOC(pointer_,type_,nbytes_,rc_,name_) \
-    MPIU_CHKLBIGMEM_MALLOC_ORJUMP(pointer_,type_,nbytes_,rc_,name_)
-#define MPIU_CHKLBIGMEM_MALLOC_ORJUMP(pointer_,type_,nbytes_,rc_,name_) \
-    MPIU_CHKLBIGMEM_MALLOC_ORSTMT(pointer_,type_,nbytes_,rc_,name_,goto fn_fail)
-
 /* Persistent memory that we may want to recover if something goes wrong */
 #define MPIU_CHKPMEM_DECL(n_) \
  void *(mpiu_chkpmem_stk_[n_]) = { NULL };     \
@@ -242,15 +215,6 @@ if (pointer_) { \
 /* Provides a easy way to use realloc safely and avoid the temptation to use
  * realloc unsafely (direct ptr assignment).  Zero-size reallocs returning NULL
  * are handled and are not considered an error. */
-#define MPIU_REALLOC_OR_FREE_AND_JUMP(ptr_,size_,rc_) do { \
-    void *realloc_tmp_ = MPL_realloc((ptr_), (size_)); \
-    if ((size_) && !realloc_tmp_) { \
-        MPL_free(ptr_); \
-        MPIR_ERR_SETANDJUMP2(rc_,MPI_ERR_OTHER,"**nomem2","**nomem2 %d %s",(size_),MPL_QUOTE(ptr_)); \
-    } \
-    (ptr_) = realloc_tmp_; \
-} while (0)
-/* this version does not free ptr_ */
 #define MPIU_REALLOC_ORJUMP(ptr_,size_,rc_) do { \
     void *realloc_tmp_ = MPL_realloc((ptr_), (size_)); \
     if (size_) \
diff --git a/src/include/mpistates.h b/src/include/mpistates.h
deleted file mode 100644
index 607159c..0000000
--- a/src/include/mpistates.h
+++ /dev/null
@@ -1,318 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- *  (C) 2001 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-#ifndef MPISTATES_H_INCLUDED
-#define MPISTATES_H_INCLUDED
-
-/* Insert all the states to be logged here */
-
-/* Note to package writers:
- * If you insert your timer states here, you
- * also need to insert a function call in 
- * MPIU_Timer_init to describe your states
- * to the logging library.
- * MPIU_Timer_init is probably found in:
- * mpich/src/util/timing/timer.c
- */
-
-enum MPID_TIMER_STATE
-{
-/* device states */
-#ifdef MPID_STATE_LIST_MPID
-MPID_STATE_LIST_MPID
-#endif
-
-#ifdef MPID_STATE_LIST_SEGMENT
-MPID_STATE_LIST_SEGMENT
-#endif
-
-/* bsocket states */
-#ifdef MPID_STATE_LIST_BSOCKET
-MPID_STATE_LIST_BSOCKET
-#endif
-
-/* pmi states */
-#ifdef MPID_STATE_LIST_PMI
-MPID_STATE_LIST_PMI
-#endif
-
-/* mpi states */
-MPID_STATE_MPI_SEND,
-MPID_STATE_MPI_RECV,
-MPID_STATE_MPI_BSEND,
-MPID_STATE_MPI_SSEND,
-MPID_STATE_MPI_RSEND,
-MPID_STATE_MPI_ISEND,
-MPID_STATE_MPI_IBSEND,
-MPID_STATE_MPI_ISSEND,
-MPID_STATE_MPI_IRSEND,
-MPID_STATE_MPI_IRECV,
-MPID_STATE_MPI_WAIT,
-MPID_STATE_MPI_TEST,
-MPID_STATE_MPI_SENDRECV,
-MPID_STATE_MPI_SENDRECV_REPLACE,
-MPID_STATE_MPI_WAITANY,
-MPID_STATE_MPI_TESTANY,
-MPID_STATE_MPI_WAITALL,
-MPID_STATE_MPI_TESTALL,
-MPID_STATE_MPI_WAITSOME,
-MPID_STATE_MPI_TESTSOME,
-MPID_STATE_MPI_IPROBE,
-MPID_STATE_MPI_PROBE,
-MPID_STATE_MPI_CANCEL,
-MPID_STATE_MPI_TEST_CANCELLED,
-MPID_STATE_MPI_SEND_INIT,
-MPID_STATE_MPI_BSEND_INIT,
-MPID_STATE_MPI_SSEND_INIT,
-MPID_STATE_MPI_RSEND_INIT,
-MPID_STATE_MPI_RECV_INIT,
-MPID_STATE_MPI_START,
-MPID_STATE_MPI_STARTALL,
-MPID_STATE_MPI_BUFFER_ATTACH,
-MPID_STATE_MPI_BUFFER_DETACH,
-MPID_STATE_MPI_REQUEST_FREE,
-
-MPID_STATE_MPI_GET_COUNT,
-MPID_STATE_MPI_GET_ELEMENTS,
-
-MPID_STATE_MPI_TYPE_CONTIGUOUS,
-MPID_STATE_MPI_TYPE_VECTOR,
-MPID_STATE_MPI_TYPE_HVECTOR,
-MPID_STATE_MPI_TYPE_INDEXED,
-MPID_STATE_MPI_TYPE_HINDEXED,
-MPID_STATE_MPI_TYPE_STRUCT,
-MPID_STATE_MPI_TYPE_EXTENT,
-MPID_STATE_MPI_TYPE_SIZE,
-MPID_STATE_MPI_TYPE_COUNT,
-MPID_STATE_MPI_TYPE_LB,
-MPID_STATE_MPI_TYPE_UB,
-MPID_STATE_MPI_TYPE_COMMIT,
-MPID_STATE_MPI_TYPE_FREE,
-MPID_STATE_MPI_TYPE_CREATE_DARRAY,
-MPID_STATE_MPI_TYPE_CREATE_HINDEXED,
-MPID_STATE_MPI_TYPE_CREATE_HVECTOR,
-MPID_STATE_MPI_TYPE_CREATE_INDEXED_BLOCK,
-MPID_STATE_MPI_TYPE_CREATE_RESIZED,
-MPID_STATE_MPI_TYPE_CREATE_STRUCT,
-MPID_STATE_MPI_TYPE_CREATE_SUBARRAY,
-MPID_STATE_MPI_TYPE_GET_EXTENT,
-MPID_STATE_MPI_TYPE_GET_TRUE_EXTENT,
-MPID_STATE_MPI_ADDRESS,
-
-MPID_STATE_MPI_PACK,
-MPID_STATE_MPI_UNPACK,
-MPID_STATE_MPI_PACK_SIZE,
-
-MPID_STATE_MPI_BARRIER,
-MPID_STATE_MPI_BCAST,
-MPID_STATE_MPI_ALLREDUCE,
-MPID_STATE_MPI_GATHER, 
-MPID_STATE_MPI_GATHERV, 
-MPID_STATE_MPI_SCATTER,
-MPID_STATE_MPI_SCATTERV,
-MPID_STATE_MPI_ALLGATHER,
-MPID_STATE_MPI_ALLGATHERV,
-MPID_STATE_MPI_ALLTOALL,
-MPID_STATE_MPI_ALLTOALLV,
-MPID_STATE_MPI_ALLTOALLW,
-MPID_STATE_MPI_EXSCAN,
-MPID_STATE_MPI_REDUCE,
-MPID_STATE_MPI_REDUCE_SCATTER,
-MPID_STATE_MPI_SCAN,
-MPID_STATE_MPI_OP_CREATE,
-MPID_STATE_MPI_OP_FREE,
-
-MPID_STATE_MPI_GROUP_SIZE,
-MPID_STATE_MPI_GROUP_RANK,
-MPID_STATE_MPI_GROUP_TRANSLATE_RANKS ,
-MPID_STATE_MPI_GROUP_COMPARE,
-MPID_STATE_MPI_COMM_GROUP,
-MPID_STATE_MPI_GROUP_UNION,
-MPID_STATE_MPI_GROUP_INTERSECTION,
-MPID_STATE_MPI_GROUP_DIFFERENCE,
-MPID_STATE_MPI_GROUP_INCL,
-MPID_STATE_MPI_GROUP_EXCL,
-MPID_STATE_MPI_GROUP_RANGE_INCL,
-MPID_STATE_MPI_GROUP_RANGE_EXCL,
-MPID_STATE_MPI_GROUP_FREE,
-
-MPID_STATE_MPI_COMM_DUP,
-MPID_STATE_MPI_COMM_SIZE,
-MPID_STATE_MPI_COMM_RANK,
-MPID_STATE_MPI_COMM_COMPARE,
-MPID_STATE_MPI_COMM_CREATE,
-MPID_STATE_MPI_COMM_SPLIT,
-MPID_STATE_MPI_COMM_FREE,
-MPID_STATE_MPI_COMM_TEST_INTER,
-MPID_STATE_MPI_COMM_REMOTE_SIZE,
-MPID_STATE_MPI_COMM_REMOTE_GROUP,
-
-MPID_STATE_MPI_INTERCOMM_CREATE,
-MPID_STATE_MPI_INTERCOMM_MERGE,
-
-MPID_STATE_MPI_KEYVAL_CREATE,
-MPID_STATE_MPI_KEYVAL_FREE,
-MPID_STATE_MPI_ATTR_PUT,
-MPID_STATE_MPI_ATTR_GET,
-MPID_STATE_MPI_ATTR_DELETE,
-
-MPID_STATE_MPI_TOPO_TEST,
-MPID_STATE_MPI_CART_CREATE,
-MPID_STATE_MPI_DIMS_CREATE,
-MPID_STATE_MPI_GRAPH_CREATE,
-MPID_STATE_MPI_GRAPHDIMS_GET,
-MPID_STATE_MPI_GRAPH_GET,
-MPID_STATE_MPI_CARTDIM_GET,
-MPID_STATE_MPI_CART_GET,
-MPID_STATE_MPI_CART_RANK,
-MPID_STATE_MPI_CART_COORDS,
-MPID_STATE_MPI_GRAPH_NEIGHBORS_COUNT,
-MPID_STATE_MPI_GRAPH_NEIGHBORS,
-MPID_STATE_MPI_CART_SHIFT,
-MPID_STATE_MPI_CART_SUB,
-MPID_STATE_MPI_CART_MAP,
-MPID_STATE_MPI_GRAPH_MAP,
-
-MPID_STATE_MPI_GET_PROCESSOR_NAME,
-MPID_STATE_MPI_GET_VERSION,
-
-MPID_STATE_MPI_ERRHANDLER_CREATE,
-MPID_STATE_MPI_ERRHANDLER_SET,
-MPID_STATE_MPI_ERRHANDLER_GET,
-MPID_STATE_MPI_ERRHANDLER_FREE,
-MPID_STATE_MPI_ERROR_STRING,
-MPID_STATE_MPI_ERROR_CLASS,
-
-MPID_STATE_MPI_WTIME,
-MPID_STATE_MPI_WTICK,
-
-MPID_STATE_MPI_INIT,
-MPID_STATE_MPI_FINALIZE,
-MPID_STATE_MPI_INITIALIZED,
-MPID_STATE_MPI_ABORT,
-
-MPID_STATE_MPI_PCONTROL,
-
-MPID_STATE_MPI_STATUS_F2C,
-MPID_STATE_MPI_STATUS_C2F,
-
-MPID_STATE_MPI_FINALIZED,
-MPID_STATE_MPI_IS_THREAD_MAIN,
-MPID_STATE_MPI_QUERY_THREAD,
-
-MPID_STATE_MPI_STATUS_SET_CANCELLED,
-MPID_STATE_MPI_STATUS_SET_ELEMENTS,
-
-MPID_STATE_MPI_CLOSE_PORT,
-MPID_STATE_MPI_COMM_ACCEPT,
-MPID_STATE_MPI_COMM_CONNECT,
-MPID_STATE_MPI_COMM_DISCONNECT,
-MPID_STATE_MPI_COMM_GET_PARENT,
-MPID_STATE_MPI_COMM_JOIN,
-MPID_STATE_MPI_COMM_SPAWN,
-MPID_STATE_MPI_COMM_SPAWN_MULTIPLE, 
-
-MPID_STATE_MPI_LOOKUP_NAME,
-MPID_STATE_MPI_OPEN_PORT,
-MPID_STATE_MPI_PUBLISH_NAME,
-MPID_STATE_MPI_UNPUBLISH_NAME,
-
-MPID_STATE_MPI_ACCUMULATE,
-MPID_STATE_MPI_GET,
-MPID_STATE_MPI_PUT,
-MPID_STATE_MPI_WIN_COMPLETE,
-MPID_STATE_MPI_WIN_CREATE,
-MPID_STATE_MPI_WIN_ALLOCATE,
-MPID_STATE_MPI_WIN_FENCE,
-MPID_STATE_MPI_WIN_FREE,
-MPID_STATE_MPI_WIN_GET_GROUP,
-MPID_STATE_MPI_WIN_LOCK,
-MPID_STATE_MPI_WIN_POST,
-MPID_STATE_MPI_WIN_START,
-MPID_STATE_MPI_WIN_TEST,
-MPID_STATE_MPI_WIN_UNLOCK,
-MPID_STATE_MPI_WIN_WAIT,
-
-
-MPID_STATE_MPI_ADD_ERROR_CLASS,
-MPID_STATE_MPI_ADD_ERROR_CODE,
-MPID_STATE_MPI_ADD_ERROR_STRING,
-MPID_STATE_MPI_COMM_CALL_ERRHANDLER,
-
-MPID_STATE_MPI_COMM_CREATE_KEYVAL,
-MPID_STATE_MPI_COMM_DELETE_ATTR,
-MPID_STATE_MPI_COMM_FREE_KEYVAL,
-MPID_STATE_MPI_COMM_GET_ATTR,
-MPID_STATE_MPI_COMM_GET_NAME,
-MPID_STATE_MPI_COMM_SET_ATTR,
-MPID_STATE_MPI_COMM_SET_NAME,
-MPID_STATE_MPI_FILE_CALL_ERRHANDLER,
-MPID_STATE_MPI_GREQUEST_COMPLETE,
-MPID_STATE_MPI_GREQUEST_START,
-MPID_STATE_MPI_INIT_THREAD,
-
-MPID_STATE_MPI_TYPE_GET_ENVELOPE, 
-MPID_STATE_MPI_TYPE_GET_CONTENTS,
-MPID_STATE_MPI_TYPE_CREATE_KEYVAL,
-MPID_STATE_MPI_TYPE_DELETE_ATTR,
-MPID_STATE_MPI_TYPE_DUP,
-MPID_STATE_MPI_TYPE_FREE_KEYVAL,
-MPID_STATE_MPI_TYPE_GET_ATTR,
-MPID_STATE_MPI_TYPE_GET_NAME,
-MPID_STATE_MPI_TYPE_SET_ATTR,
-MPID_STATE_MPI_TYPE_SET_NAME,
-MPID_STATE_MPI_WIN_CALL_ERRHANDLER,
-MPID_STATE_MPI_WIN_CREATE_KEYVAL,
-MPID_STATE_MPI_WIN_DELETE_ATTR,
-MPID_STATE_MPI_WIN_FREE_KEYVAL,
-MPID_STATE_MPI_WIN_GET_ATTR,
-MPID_STATE_MPI_WIN_GET_NAME,
-MPID_STATE_MPI_WIN_SET_ATTR,
-MPID_STATE_MPI_WIN_SET_NAME,
-MPID_STATE_MPI_ALLOC_MEM,
-MPID_STATE_MPI_COMM_CREATE_ERRHANDLER,
-MPID_STATE_MPI_COMM_GET_ERRHANDLER,
-MPID_STATE_MPI_COMM_SET_ERRHANDLER,
-MPID_STATE_MPI_FILE_CREATE_ERRHANDLER,
-MPID_STATE_MPI_FILE_GET_ERRHANDLER,
-MPID_STATE_MPI_FILE_SET_ERRHANDLER,
-MPID_STATE_MPI_FREE_MEM,
-
-MPID_STATE_MPI_GET_ADDRESS,
-
-/* Info */
-MPID_STATE_MPI_INFO_CREATE,
-MPID_STATE_MPI_INFO_DELETE,
-MPID_STATE_MPI_INFO_DUP,
-MPID_STATE_MPI_INFO_FREE,
-MPID_STATE_MPI_INFO_GET,
-MPID_STATE_MPI_INFO_GET_NKEYS,
-MPID_STATE_MPI_INFO_GET_NTHKEY,
-MPID_STATE_MPI_INFO_GET_VALUELEN,
-MPID_STATE_MPI_INFO_SET,
-
-MPID_STATE_MPI_PACK_EXTERNAL, 
-MPID_STATE_MPI_PACK_EXTERNAL_SIZE, 
-MPID_STATE_MPI_REQUEST_GET_STATUS,
-MPID_STATE_MPI_UNPACK_EXTERNAL, 
-MPID_STATE_MPI_WIN_CREATE_ERRHANDLER,
-MPID_STATE_MPI_WIN_GET_ERRHANDLER,
-MPID_STATE_MPI_WIN_SET_ERRHANDLER,
-
-MPID_STATE_MPI_TYPE_MATCH_SIZE,
-MPID_STATE_MPI_REGISTER_DATAREP,
-
-/* internal mpi states */
-MPID_STATE_MPIC_SEND,
-MPID_STATE_MPIC_ISEND,
-MPID_STATE_MPIC_RECV,
-MPID_STATE_MPIC_IRECV,
-MPID_STATE_MPIC_SENDRECV,
-MPID_STATE_MPIC_WAIT,
-
-MPID_NUM_TIMER_STATES
-};
-
-#endif
diff --git a/src/include/mpitimerimpl.h b/src/include/mpitimerimpl.h
index ea6eae3..9113ce6 100644
--- a/src/include/mpitimerimpl.h
+++ b/src/include/mpitimerimpl.h
@@ -22,15 +22,6 @@
     HAVE_TIMING == MPID_TIMING_KIND_ALL || \
     HAVE_TIMING == MPID_TIMING_KIND_RUNTIME)
 
-/* define MPID_LOG_RECV_FROM_BEGINNING to log arrows from the beginning of 
-   send operations to the beginning of the corresponding receive operations.  
-   Otherwise, arrows are logged from the beginning of the send to the end of 
-   the receive. */
-/* FIXME: Document this and/or make it a runtime feature or decide on a 
-   single approach. */
-#undef MPID_LOG_RECV_FROM_BEGINNING
-/*#define MPID_LOG_RECV_FROM_BEGINNING*/
-
 /* This include file contains the static state definitions */
 #include "mpiallstates.h"
 
@@ -66,71 +57,23 @@
 #define MPID_MPI_FINALIZE_FUNC_ENTER(a)       MPIDU_FINALIZE_FUNC_ENTER(a)
 #define MPID_MPI_FINALIZE_FUNC_EXIT(a)        MPIDU_FINALIZE_FUNC_EXIT(a)
 
-#define MPID_LOG_ARROWS
-#ifdef MPID_LOG_ARROWS
 #define MPID_MPI_PT2PT_FUNC_ENTER_FRONT(a)    MPIDU_PT2PT_FUNC_ENTER_FRONT(a)
 #define MPID_MPI_PT2PT_FUNC_EXIT_FRONT(a)     MPIDU_PT2PT_FUNC_EXIT(a)
-#ifdef MPID_LOG_RECV_FROM_BEGINNING
-#define MPID_MPI_PT2PT_FUNC_ENTER_BACK(a)     MPIDU_PT2PT_FUNC_ENTER_BACK(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BACK(a)      MPIDU_PT2PT_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BOTH(a)      MPIDU_PT2PT_FUNC_EXIT(a)
-#else
 #define MPID_MPI_PT2PT_FUNC_ENTER_BACK(a)     MPIDU_PT2PT_FUNC_ENTER(a)
 #define MPID_MPI_PT2PT_FUNC_EXIT_BACK(a)      MPIDU_PT2PT_FUNC_EXIT_BACK(a)
 #define MPID_MPI_PT2PT_FUNC_EXIT_BOTH(a)      MPIDU_PT2PT_FUNC_EXIT_BOTH(a)
-#endif
 #define MPID_MPI_PT2PT_FUNC_ENTER_BOTH(a)     MPIDU_PT2PT_FUNC_ENTER_BOTH(a)
-#else
-#define MPID_MPI_PT2PT_FUNC_ENTER_FRONT(a)    MPIDU_PT2PT_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_FRONT(a)     MPIDU_PT2PT_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_BACK(a)     MPIDU_PT2PT_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_ENTER_BOTH(a)     MPIDU_PT2PT_FUNC_ENTER(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BACK(a)      MPIDU_PT2PT_FUNC_EXIT(a)
-#define MPID_MPI_PT2PT_FUNC_EXIT_BOTH(a)      MPIDU_PT2PT_FUNC_EXIT(a)
-#endif
 
 #if defined(HAVE_TIMING) && (HAVE_TIMING == MPID_TIMING_KIND_LOG_DETAILED || HAVE_TIMING == MPID_TIMING_KIND_ALL)
 
 /* device layer definitions */
 #define MPIDI_STATE_DECL(a)                MPIDU_STATE_DECL(a)
-#define MPIDI_INIT_STATE_DECL(a)           MPIDU_INIT_STATE_DECL(a)
-#define MPIDI_FINALIZE_STATE_DECL(a)       MPIDU_FINALIZE_STATE_DECL(a)
-
 #define MPIDI_FUNC_ENTER(a)                MPIDU_FUNC_ENTER(a)
 #define MPIDI_FUNC_EXIT(a)                 MPIDU_FUNC_EXIT(a)
 #define MPIDI_PT2PT_FUNC_ENTER(a)          MPIDU_PT2PT_FUNC_ENTER(a)
 #define MPIDI_PT2PT_FUNC_EXIT(a)           MPIDU_PT2PT_FUNC_EXIT(a)
-#define MPIDI_COLL_FUNC_ENTER(a)           MPIDU_COLL_FUNC_ENTER(a)
-#define MPIDI_COLL_FUNC_EXIT(a)            MPIDU_COLL_FUNC_EXIT(a)
 #define MPIDI_RMA_FUNC_ENTER(a)            MPIDU_RMA_FUNC_ENTER(a)
 #define MPIDI_RMA_FUNC_EXIT(a)             MPIDU_RMA_FUNC_EXIT(a)
-#define MPIDI_INIT_FUNC_ENTER(a)           MPIDU_INIT_FUNC_ENTER(a)
-#define MPIDI_INIT_FUNC_EXIT(a)            MPIDU_INIT_FUNC_EXIT(a)
-#define MPIDI_FINALIZE_FUNC_ENTER(a)       MPIDU_FINALIZE_FUNC_ENTER(a)
-#define MPIDI_FINALIZE_FUNC_EXIT(a)        MPIDU_FINALIZE_FUNC_EXIT(a)
-
-#define MPID_LOG_MPID_ARROWS
-#ifdef MPID_LOG_MPID_ARROWS
-#define MPIDI_PT2PT_FUNC_ENTER_FRONT(a)    MPIDU_PT2PT_FUNC_ENTER_FRONT(a)
-#define MPIDI_PT2PT_FUNC_EXIT_FRONT(a)     MPIDU_PT2PT_FUNC_EXIT(a)
-#ifdef MPID_LOG_RECV_FROM_BEGINNING
-#define MPIDI_PT2PT_FUNC_ENTER_BACK(a)     MPIDU_PT2PT_FUNC_ENTER_BACK(a)
-#define MPIDI_PT2PT_FUNC_EXIT_BACK(a)      MPIDU_PT2PT_FUNC_EXIT(a)
-#define MPIDI_PT2PT_FUNC_EXIT_BOTH(a)      MPIDU_PT2PT_FUNC_EXIT(a)
-#else
-#define MPIDI_PT2PT_FUNC_ENTER_BACK(a)     MPIDU_PT2PT_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_EXIT_BACK(a)      MPIDU_PT2PT_FUNC_EXIT_BACK(a)
-#define MPIDI_PT2PT_FUNC_EXIT_BOTH(a)      MPIDU_PT2PT_FUNC_EXIT_BOTH(a)
-#endif
-#define MPIDI_PT2PT_FUNC_ENTER_BOTH(a)     MPIDU_PT2PT_FUNC_ENTER_BOTH(a)
-#else
-#define MPIDI_PT2PT_FUNC_ENTER_FRONT(a)    MPIDU_PT2PT_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_EXIT_FRONT(a)     MPIDU_PT2PT_FUNC_EXIT(a)
-#define MPIDI_PT2PT_FUNC_ENTER_BACK(a)     MPIDU_PT2PT_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_ENTER_BOTH(a)     MPIDU_PT2PT_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_EXIT_BACK(a)      MPIDU_PT2PT_FUNC_EXIT(a)
-#define MPIDI_PT2PT_FUNC_EXIT_BOTH(a)      MPIDU_PT2PT_FUNC_EXIT(a)
-#endif
 
 #else
 
@@ -138,21 +81,9 @@
 #define MPIDI_FUNC_ENTER(a)
 #define MPIDI_FUNC_EXIT(a)
 #define MPIDI_PT2PT_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_ENTER_FRONT(a)
-#define MPIDI_PT2PT_FUNC_EXIT_FRONT(a)
-#define MPIDI_PT2PT_FUNC_ENTER_BACK(a)
-#define MPIDI_PT2PT_FUNC_ENTER_BOTH(a)
-#define MPIDI_PT2PT_FUNC_EXIT_BACK(a)
-#define MPIDI_PT2PT_FUNC_EXIT_BOTH(a)
 #define MPIDI_PT2PT_FUNC_EXIT(a)
-#define MPIDI_COLL_FUNC_ENTER(a)
-#define MPIDI_COLL_FUNC_EXIT(a)
 #define MPIDI_RMA_FUNC_ENTER(a)
 #define MPIDI_RMA_FUNC_EXIT(a)
-#define MPIDI_INIT_FUNC_ENTER(a)
-#define MPIDI_INIT_FUNC_EXIT(a)
-#define MPIDI_FINALIZE_FUNC_ENTER(a)
-#define MPIDI_FINALIZE_FUNC_EXIT(a)
 
 #endif /* (HAVE_TIMING == MPID_TIMING_KIND_LOG_DETAILED || HAVE_TIMING == MPID_TIMING_KIND_ALL) */
 
@@ -196,21 +127,9 @@ int MPIR_Describe_timer_states(void);
 #define MPIDI_FUNC_ENTER(a)
 #define MPIDI_FUNC_EXIT(a)
 #define MPIDI_PT2PT_FUNC_ENTER(a)
-#define MPIDI_PT2PT_FUNC_ENTER_FRONT(a)
-#define MPIDI_PT2PT_FUNC_EXIT_FRONT(a)
-#define MPIDI_PT2PT_FUNC_ENTER_BACK(a)
-#define MPIDI_PT2PT_FUNC_ENTER_BOTH(a)
 #define MPIDI_PT2PT_FUNC_EXIT(a)
-#define MPIDI_PT2PT_FUNC_EXIT_BACK(a)
-#define MPIDI_PT2PT_FUNC_EXIT_BOTH(a)
-#define MPIDI_COLL_FUNC_ENTER(a)
-#define MPIDI_COLL_FUNC_EXIT(a)
 #define MPIDI_RMA_FUNC_ENTER(a)
 #define MPIDI_RMA_FUNC_EXIT(a)
-#define MPIDI_INIT_FUNC_ENTER(a)
-#define MPIDI_INIT_FUNC_EXIT(a)
-#define MPIDI_FINALIZE_FUNC_ENTER(a)
-#define MPIDI_FINALIZE_FUNC_EXIT(a)
 
 #endif /* HAVE_TIMING */
 
diff --git a/src/include/rlog_macros.h b/src/include/rlog_macros.h
index bb2a9bb..1bf00ce 100644
--- a/src/include/rlog_macros.h
+++ b/src/include/rlog_macros.h
@@ -92,24 +92,13 @@ if (g_pRLOG) \
     RLOG_LogRecv( g_pRLOG, source, tag, count ); \
 }
 
-#ifdef MPID_LOG_RECV_FROM_BEGINNING
 #define MPIDU_PT2PT_FUNC_ENTER_BOTH(a) \
 if (g_pRLOG) \
 { \
     g_pRLOG->nRecursion++; \
     MPID_Wtime( &time_stamp_in##a ); \
     RLOG_LogSend( g_pRLOG, dest, sendtag, sendcount ); \
-    RLOG_LogRecv( g_pRLOG, source, recvtag, recvcount ); \
 }
-#else
-#define MPIDU_PT2PT_FUNC_ENTER_BOTH(a) \
-if (g_pRLOG) \
-{ \
-    g_pRLOG->nRecursion++; \
-    MPID_Wtime( &time_stamp_in##a ); \
-    RLOG_LogSend( g_pRLOG, dest, sendtag, sendcount ); \
-}
-#endif
 
 #define MPIDU_PT2PT_FUNC_EXIT_BACK(a) \
 if (g_pRLOG) \
diff --git a/src/mpi/coll/allgather.c b/src/mpi/coll/allgather.c
index dac7dda..99b5ddf 100644
--- a/src/mpi/coll/allgather.c
+++ b/src/mpi/coll/allgather.c
@@ -150,9 +150,6 @@ int MPIR_Allgather_intra (
     MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
 				     (comm_size * recvcount * recvtype_extent));
 
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     tot_bytes = (MPI_Aint)recvcount * comm_size * type_size;
     if ((tot_bytes < MPIR_CVAR_ALLGATHER_LONG_MSG_SIZE) && !(comm_size & (comm_size - 1))) {
 
@@ -626,8 +623,6 @@ int MPIR_Allgather_intra (
 
  fn_exit:
     MPIU_CHKLMEM_FREEALL();
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
diff --git a/src/mpi/coll/allgatherv.c b/src/mpi/coll/allgatherv.c
index c3767c3..dff4ed2 100644
--- a/src/mpi/coll/allgatherv.c
+++ b/src/mpi/coll/allgatherv.c
@@ -117,9 +117,6 @@ int MPIR_Allgatherv_intra (
 #endif
     MPIU_CHKLMEM_DECL(1);
     
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
     
@@ -741,8 +738,6 @@ int MPIR_Allgatherv_intra (
 
  fn_exit:
     MPIU_CHKLMEM_FREEALL();
-  /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
diff --git a/src/mpi/coll/allreduce.c b/src/mpi/coll/allreduce.c
index 423a14c..27d9460 100644
--- a/src/mpi/coll/allreduce.c
+++ b/src/mpi/coll/allreduce.c
@@ -200,9 +200,6 @@ int MPIR_Allreduce_intra (
     void *tmp_buf;
     MPIU_CHKLMEM_DECL(3);
     
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     if (count == 0) goto fn_exit;
 
     is_commutative = MPIR_Op_is_commutative(op);
@@ -598,9 +595,6 @@ int MPIR_Allreduce_intra (
     }
 
   fn_exit:
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
-
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
@@ -638,8 +632,6 @@ int MPIR_Allreduce_inter (
     MPIR_Comm *newcomm_ptr = NULL;
     MPIU_CHKLMEM_DECL(1);
 
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     if (comm_ptr->rank == 0) {
         MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
         MPID_Datatype_get_extent_macro(datatype, extent);
@@ -692,7 +684,6 @@ int MPIR_Allreduce_inter (
     }
 
   fn_exit:
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
diff --git a/src/mpi/coll/alltoall.c b/src/mpi/coll/alltoall.c
index d3a8381..2a65c9c 100644
--- a/src/mpi/coll/alltoall.c
+++ b/src/mpi/coll/alltoall.c
@@ -163,9 +163,6 @@ int MPIR_Alltoall_intra(
     MPID_Datatype_get_size_macro(sendtype, sendtype_size);
     nbytes = sendtype_size * sendcount;
 
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     if (sendbuf == MPI_IN_PLACE) {
         /* We use pair-wise sendrecv_replace in order to conserve memory usage,
          * which is keeping with the spirit of the MPI-2.2 Standard.  But
@@ -440,8 +437,6 @@ int MPIR_Alltoall_intra(
 
  fn_exit:
     MPIU_CHKLMEM_FREEALL();
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -496,9 +491,6 @@ int MPIR_Alltoall_inter(
     MPID_Datatype_get_extent_macro(sendtype, sendtype_extent);
     MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);
     
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     /* Do the pairwise exchanges */
     max_size = MPL_MAX(local_size, remote_size);
     MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf +
@@ -536,8 +528,6 @@ int MPIR_Alltoall_inter(
     }
 
  fn_exit:
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
diff --git a/src/mpi/coll/alltoallv.c b/src/mpi/coll/alltoallv.c
index 8fd3692..276b6aa 100644
--- a/src/mpi/coll/alltoallv.c
+++ b/src/mpi/coll/alltoallv.c
@@ -84,9 +84,6 @@ int MPIR_Alltoallv_intra(const void *sendbuf, const int *sendcounts, const int *
     /* Get extent of recv type, but send type is only valid if (sendbuf!=MPI_IN_PLACE) */
     MPID_Datatype_get_extent_macro(recvtype, recv_extent);
 
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     if (sendbuf == MPI_IN_PLACE) {
         /* We use pair-wise sendrecv_replace in order to conserve memory usage,
          * which is keeping with the spirit of the MPI-2.2 Standard.  But
@@ -213,8 +210,6 @@ int MPIR_Alltoallv_intra(const void *sendbuf, const int *sendcounts, const int *
     }
 
 fn_exit:
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     MPIU_CHKLMEM_FREEALL();
 
     if (mpi_errno_ret)
@@ -268,9 +263,6 @@ int MPIR_Alltoallv_inter(const void *sendbuf, const int *sendcounts, const int *
     MPID_Datatype_get_extent_macro(sendtype, send_extent);
     MPID_Datatype_get_extent_macro(recvtype, recv_extent);
     
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     /* Use pairwise exchange algorithm. */
     max_size = MPL_MAX(local_size, remote_size);
     for (i=0; i<max_size; i++) {
@@ -312,8 +304,6 @@ int MPIR_Alltoallv_inter(const void *sendbuf, const int *sendcounts, const int *
     }
 
  fn_exit:
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
diff --git a/src/mpi/coll/alltoallw.c b/src/mpi/coll/alltoallw.c
index 08818b7..fb625a8 100644
--- a/src/mpi/coll/alltoallw.c
+++ b/src/mpi/coll/alltoallw.c
@@ -74,9 +74,6 @@ int MPIR_Alltoallw_intra(const void *sendbuf, const int sendcounts[], const int
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
     
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     if (sendbuf == MPI_IN_PLACE) {
         /* We use pair-wise sendrecv_replace in order to conserve memory usage,
          * which is keeping with the spirit of the MPI-2.2 Standard.  But
@@ -215,9 +212,7 @@ int MPIR_Alltoallw_intra(const void *sendbuf, const int sendcounts[], const int
 #endif
     }
 
-    /* check if multiple threads are calling this collective function */
   fn_exit:
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );  
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
@@ -263,9 +258,6 @@ int MPIR_Alltoallw_inter(const void *sendbuf, const int sendcounts[], const int
     remote_size = comm_ptr->remote_size;
     rank = comm_ptr->rank;
 
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     /* Use pairwise exchange algorithm. */
     max_size = MPL_MAX(local_size, remote_size);
     for (i=0; i<max_size; i++) {
@@ -307,8 +299,6 @@ int MPIR_Alltoallw_inter(const void *sendbuf, const int sendcounts[], const int
     }
     
  fn_exit:
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
diff --git a/src/mpi/coll/barrier.c b/src/mpi/coll/barrier.c
index 88499c8..b52efd6 100644
--- a/src/mpi/coll/barrier.c
+++ b/src/mpi/coll/barrier.c
@@ -133,10 +133,6 @@ int MPIR_Barrier_intra( MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag )
     int size, rank, src, dst, mask, mpi_errno=MPI_SUCCESS;
     int mpi_errno_ret = MPI_SUCCESS;
 
-    /* Only one collective operation per communicator can be active at any
-       time */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     size = comm_ptr->local_size;
     /* Trivial barriers return immediately */
     if (size == 1) goto fn_exit;
@@ -173,7 +169,6 @@ int MPIR_Barrier_intra( MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag )
     }
 
  fn_exit:
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
diff --git a/src/mpi/coll/bcast.c b/src/mpi/coll/bcast.c
index 547f8ea..a82b545 100644
--- a/src/mpi/coll/bcast.c
+++ b/src/mpi/coll/bcast.c
@@ -1233,9 +1233,6 @@ int MPIR_Bcast_intra (
 
     MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_BCAST);
 
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     if (count == 0) goto fn_exit;
 
     MPID_Datatype_get_size_macro(datatype, type_size);
@@ -1316,9 +1313,6 @@ int MPIR_Bcast_intra (
     }
 
 fn_exit:
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
-
     MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_BCAST);
 
     /* --BEGIN ERROR HANDLING-- */
@@ -1368,7 +1362,6 @@ int MPIR_Bcast_inter (
     else if (root == MPI_ROOT)
     {
         /* root sends to rank 0 on remote group and returns */
-        MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
         mpi_errno =  MPIC_Send(buffer, count, datatype, 0,
                                   MPIR_BCAST_TAG, comm_ptr, errflag);
         if (mpi_errno) {
@@ -1377,7 +1370,6 @@ int MPIR_Bcast_inter (
             MPIR_ERR_SET(mpi_errno, *errflag, "**fail");
             MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
         }
-        MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     }
     else
     {
diff --git a/src/mpi/coll/exscan.c b/src/mpi/coll/exscan.c
index 4c7620f..c6e5eda 100644
--- a/src/mpi/coll/exscan.c
+++ b/src/mpi/coll/exscan.c
@@ -148,9 +148,6 @@ int MPIR_Exscan (
                                partial_scan, count, datatype);
     if (mpi_errno) MPIR_ERR_POP(mpi_errno);
 
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     flag = 0;
     mask = 0x1;
     while (mask < comm_size) {
@@ -217,9 +214,6 @@ int MPIR_Exscan (
         mask <<= 1;
     }
 
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
-
     {
         MPIR_Per_thread_t *per_thread = NULL;
         int err = 0;
diff --git a/src/mpi/coll/gather.c b/src/mpi/coll/gather.c
index 8c35426..f2e6351 100644
--- a/src/mpi/coll/gather.c
+++ b/src/mpi/coll/gather.c
@@ -125,9 +125,6 @@ int MPIR_Gather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
         is_homogeneous = 0;
 #endif
 
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     /* Use binomial tree algorithm. */
     
     relative_rank = (rank >= root) ? rank - root : rank - root + comm_size;
@@ -483,8 +480,6 @@ int MPIR_Gather_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
 
  fn_exit:
     MPIU_CHKLMEM_FREEALL();
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -532,8 +527,6 @@ int MPIR_Gather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
         return MPI_SUCCESS;
     }
 
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     remote_size = comm_ptr->remote_size; 
     local_size = comm_ptr->local_size; 
 
@@ -654,7 +647,6 @@ int MPIR_Gather_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
 
  fn_exit:
     MPIU_CHKLMEM_FREEALL();
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
diff --git a/src/mpi/coll/gatherv.c b/src/mpi/coll/gatherv.c
index a09f360..c0f496e 100644
--- a/src/mpi/coll/gatherv.c
+++ b/src/mpi/coll/gatherv.c
@@ -97,9 +97,6 @@ int MPIR_Gatherv (
 
     rank = comm_ptr->rank;
     
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     /* If rank == root, then I recv lots, otherwise I send */
     if (((comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) && (root == rank)) ||
         ((comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) && (root == MPI_ROOT))) {
@@ -195,8 +192,6 @@ int MPIR_Gatherv (
     
 
 fn_exit:
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
diff --git a/src/mpi/coll/helper_fns.c b/src/mpi/coll/helper_fns.c
index c5b2729..d2f015f 100644
--- a/src/mpi/coll/helper_fns.c
+++ b/src/mpi/coll/helper_fns.c
@@ -535,10 +535,6 @@ int MPIC_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype,
     MPI_Aint tmpbuf_count = 0;
     MPIU_CHKLMEM_DECL(1);
     MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_REPLACE);
-#ifdef MPID_LOG_ARROWS
-    /* The logging macros log sendcount and recvcount */
-    int sendcount = count, recvcount = count;
-#endif
 
     MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SENDRECV_REPLACE);
 
diff --git a/src/mpi/coll/red_scat.c b/src/mpi/coll/red_scat.c
index 2f4c35c..7babc13 100644
--- a/src/mpi/coll/red_scat.c
+++ b/src/mpi/coll/red_scat.c
@@ -301,9 +301,6 @@ int MPIR_Reduce_scatter_intra(const void *sendbuf, void *recvbuf, const int recv
     MPID_Datatype_get_size_macro(datatype, type_size);
     nbytes = total_count * type_size;
     
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     /* total_count*extent eventually gets malloced. it isn't added to
      * a user-passed in buffer */
     MPIU_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));
@@ -882,9 +879,6 @@ int MPIR_Reduce_scatter_intra(const void *sendbuf, void *recvbuf, const int recv
 fn_exit:
     MPIU_CHKLMEM_FREEALL();
 
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
-
     {
         MPIR_Per_thread_t *per_thread = NULL;
         int err = 0;
diff --git a/src/mpi/coll/red_scat_block.c b/src/mpi/coll/red_scat_block.c
index 0d6f058..6a23515 100644
--- a/src/mpi/coll/red_scat_block.c
+++ b/src/mpi/coll/red_scat_block.c
@@ -300,9 +300,6 @@ int MPIR_Reduce_scatter_block_intra (
     MPID_Datatype_get_size_macro(datatype, type_size);
     nbytes = total_count * type_size;
     
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     /* total_count*extent eventually gets malloced. it isn't added to
      * a user-passed in buffer */
     MPIU_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));
@@ -854,9 +851,6 @@ int MPIR_Reduce_scatter_block_intra (
 fn_exit:
     MPIU_CHKLMEM_FREEALL();
 
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
-
     {
         MPIR_Per_thread_t *per_thread = NULL;
         int err = 0;
diff --git a/src/mpi/coll/reduce.c b/src/mpi/coll/reduce.c
index 9083bd2..136209c 100644
--- a/src/mpi/coll/reduce.c
+++ b/src/mpi/coll/reduce.c
@@ -744,8 +744,6 @@ int MPIR_Reduce_intra (
     MPIU_CHKLMEM_DECL(1);
 
     if (count == 0) return MPI_SUCCESS;
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
 
     if (MPIR_CVAR_ENABLE_SMP_COLLECTIVES && MPIR_CVAR_ENABLE_SMP_REDUCE) {
     /* is the op commutative? We do SMP optimizations only if it is. */
@@ -896,9 +894,6 @@ int MPIR_Reduce_intra (
         
 
   fn_exit:
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
-
     MPIU_CHKLMEM_FREEALL();
 
     if (mpi_errno_ret)
@@ -947,9 +942,6 @@ int MPIR_Reduce_inter (
         return MPI_SUCCESS;
     }
 
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
-
     if (root == MPI_ROOT) {
         /* root receives data from rank 0 on remote group */
         mpi_errno = MPIC_Recv(recvbuf, count, datatype, 0,
@@ -1013,7 +1005,6 @@ int MPIR_Reduce_inter (
     }
 
   fn_exit:
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr ); 
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
diff --git a/src/mpi/coll/scan.c b/src/mpi/coll/scan.c
index 1b14c0e..dfaf145 100644
--- a/src/mpi/coll/scan.c
+++ b/src/mpi/coll/scan.c
@@ -90,9 +90,6 @@ static int MPIR_Scan_generic (
     
     if (count == 0) return MPI_SUCCESS;
 
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
 
@@ -213,8 +210,6 @@ static int MPIR_Scan_generic (
     
  fn_exit:
     MPIU_CHKLMEM_FREEALL();
-     /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
diff --git a/src/mpi/coll/scatter.c b/src/mpi/coll/scatter.c
index 2faf560..8a9d6ea 100644
--- a/src/mpi/coll/scatter.c
+++ b/src/mpi/coll/scatter.c
@@ -94,9 +94,6 @@ int MPIR_Scatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype
          ((rank != root) && (recvcount == 0)) )
         return MPI_SUCCESS;
 
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     is_homogeneous = 1;
 #ifdef MPID_HAS_HETERO
     if (comm_ptr->is_hetero)
@@ -402,8 +399,6 @@ int MPIR_Scatter_intra(const void *sendbuf, int sendcount, MPI_Datatype sendtype
     
  fn_exit:
     MPIU_CHKLMEM_FREEALL();
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
@@ -446,7 +441,6 @@ int MPIR_Scatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype
         /* local processes other than root do nothing */
         return MPI_SUCCESS;
     }
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
 
     remote_size = comm_ptr->remote_size; 
     local_size  = comm_ptr->local_size; 
@@ -552,8 +546,6 @@ int MPIR_Scatter_inter(const void *sendbuf, int sendcount, MPI_Datatype sendtype
 
  fn_exit:
     MPIU_CHKLMEM_FREEALL();
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
     else if (*errflag != MPIR_ERR_NONE)
diff --git a/src/mpi/coll/scatterv.c b/src/mpi/coll/scatterv.c
index c915917..5db178c 100644
--- a/src/mpi/coll/scatterv.c
+++ b/src/mpi/coll/scatterv.c
@@ -67,9 +67,6 @@ int MPIR_Scatterv(const void *sendbuf, const int *sendcounts, const int *displs,
 
     rank = comm_ptr->rank;
     
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     /* If I'm the root, then scatter */
     if (((comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) && (root == rank)) ||
         ((comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) && (root == MPI_ROOT))) {
@@ -144,8 +141,6 @@ int MPIR_Scatterv(const void *sendbuf, const int *sendcounts, const int *displs,
     
     
 fn_exit:
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     MPIU_CHKLMEM_FREEALL();
     if (mpi_errno_ret)
         mpi_errno = mpi_errno_ret;
diff --git a/src/mpi/pt2pt/sendrecv_rep.c b/src/mpi/pt2pt/sendrecv_rep.c
index e9d2f11..70ce78b 100644
--- a/src/mpi/pt2pt/sendrecv_rep.c
+++ b/src/mpi/pt2pt/sendrecv_rep.c
@@ -72,10 +72,6 @@ int MPI_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype,
     static const char FCNAME[] = "MPI_Sendrecv_replace";
     int mpi_errno = MPI_SUCCESS;
     MPIR_Comm *comm_ptr = NULL;
-#ifdef MPID_LOG_ARROWS
-    /* This isn't the right test, but it is close enough for now */
-    int sendcount = count, recvcount = count;
-#endif
     MPIU_CHKLMEM_DECL(1);
     MPID_MPI_STATE_DECL(MPID_STATE_MPI_SENDRECV_REPLACE);
     
diff --git a/src/mpi/spawn/comm_spawn.c b/src/mpi/spawn/comm_spawn.c
index 34b6663..b2e4ff8 100644
--- a/src/mpi/spawn/comm_spawn.c
+++ b/src/mpi/spawn/comm_spawn.c
@@ -118,9 +118,6 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf
 
     /* ... body of routine ...  */
     
-    /* check if multiple threads are calling this collective function */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
-
     mpi_errno = MPID_Comm_spawn_multiple(1, (char **) &command, &argv,
                                          &maxprocs, &info_ptr, root,  
                                          comm_ptr, &intercomm_ptr,
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c b/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c
index 4b65b33..c1897a9 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3i_comm.c
@@ -174,10 +174,6 @@ static int barrier(MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag)
     if (comm_ptr->local_size == 1)
         return MPI_SUCCESS;
 
-    /* Only one collective operation per communicator can be active at any
-       time */
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER (comm_ptr);
-
     if (comm_ptr->dev.ch.barrier_vars == NULL) {
         mpi_errno = alloc_barrier_vars (comm_ptr, &comm_ptr->dev.ch.barrier_vars);
         if (mpi_errno) MPIR_ERR_POP (mpi_errno);
@@ -220,7 +216,6 @@ static int barrier(MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag)
     }
 
  fn_exit:
-    MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
     return mpi_errno;
  fn_fail:
     goto fn_exit;
diff --git a/src/util/assert/mpiassert.h b/src/util/assert/mpiassert.h
index 23f2300..f06170f 100644
--- a/src/util/assert/mpiassert.h
+++ b/src/util/assert/mpiassert.h
@@ -34,12 +34,8 @@ int MPIR_Assert_fail_fmt(const char *cond, const char *file_name, int line_num,
  * Similar to assert() except that it performs an MPID_Abort() when the 
  * assertion fails.  Also, for Windows, it doesn't popup a
  * mesage box on a remote machine.
- *
- * MPIU_AssertDecl may be used to include declarations only needed
- * when MPIU_Assert is non-null (e.g., when assertions are enabled)
  */
 #if (!defined(NDEBUG) && defined(HAVE_ERROR_CHECKING))
-#   define MPIU_AssertDecl(a_) a_
 #   define MPIU_AssertDeclValue(_a,_b) _a = _b
 #   define MPIU_Assert(a_)                             \
     do {                                               \
@@ -50,7 +46,6 @@ int MPIR_Assert_fail_fmt(const char *cond, const char *file_name, int line_num,
 #else
 #   define MPIU_Assert(a_)
 /* Empty decls not allowed in C */
-#   define MPIU_AssertDecl(a_) a_ 
 #   define MPIU_AssertDeclValue(_a,_b) _a ATTRIBUTE((unused)) = _b
 #endif
 
@@ -132,32 +127,4 @@ int MPIR_Assert_fail_fmt(const char *cond, const char *file_name, int line_num,
 #  define MPIU_Static_assert(cond_,msg_) MPIU_Assert_fmt_msg((cond_), ("%s", (msg_)))
 #endif
 
-/* evaluates to TRUE if ((a_)*(b_)>(max_)), only detects overflow for positive
- * a_ and _b. */
-#define MPIU_Prod_overflows_max(a_, b_, max_) \
-    ( (a_) > 0 && (b_) > 0 && ((a_) > ((max_) / (b_))) )
-
-/* asserts that ((a_)*(b_)<=(max_)) holds in a way that is robust against
- * undefined integer overflow behavior and is suitable for both signed and
- * unsigned math (only suitable for positive values of (a_) and (b_)) */
-#define MPIU_Assert_prod_pos_overflow_safe(a_, b_, max_)                               \
-    MPIU_Assert_fmt_msg(!MPIU_Prod_overflows_max((a_),(b_),(max_)),                    \
-                        ("overflow detected: (%llx * %llx) > %s", (a_), (b_), #max_)); \
-
-
-
-/* -------------------------------------------------------------------------- */
-/* static type checking macros */
-
-/* implement using C11's "_Generic" functionality (optimal case) */
-#ifdef HAVE_C11__GENERIC
-#  define MPIU_Assert_has_type(expr_,type_) \
-    MPIU_Static_assert(_Generic((expr_), type_: 1, default: 0), \
-                       "expression '" #expr_ "' does not have type '" #type_ "'")
-#endif
-/* fallthrough to do nothing */
-#ifndef MPIU_Assert_has_type
-#  define MPIU_Assert_has_type(expr_,type_) do {} while (0)
-#endif
-
 #endif /* !defined(MPIASSERT_H_INCLUDED) */
diff --git a/src/util/pointer/mpiu_pointer.h b/src/util/pointer/mpiu_pointer.h
index d6d6597..fccea14 100644
--- a/src/util/pointer/mpiu_pointer.h
+++ b/src/util/pointer/mpiu_pointer.h
@@ -37,16 +37,6 @@
   MPIU_Assert((aint) == (MPI_Aint)(int)(aint));
 
 /*
- * Ensure an MPI_Aint value fits into an unsigned int.
- * Useful for detecting overflow when MPI_Aint is larger than an
- * unsigned int.
- *
- * \param[in]  aint  Variable of type MPI_Aint
- */
-#define MPIU_Ensure_Aint_fits_in_uint(aint) \
-  MPIU_Assert((aint) == (MPI_Aint)(unsigned int)(aint));
-
-/*
  * Ensure an MPI_Aint value fits into a pointer.
  * Useful for detecting overflow when MPI_Aint is larger than a pointer.
  *

http://git.mpich.org/mpich.git/commitdiff/50b21ecddabd41bdfcded62276b7ce9da699a7e5

commit 50b21ecddabd41bdfcded62276b7ce9da699a7e5
Author: Pavan Balaji <balaji at anl.gov>
Date:   Fri Apr 22 10:15:58 2016 -0500

    Fix typo in error message.
    
    Signed-off-by: Ken Raffenetti <raffenet at mcs.anl.gov>

diff --git a/src/include/mpiimpl.h b/src/include/mpiimpl.h
index 38d1b38..3f7b4ba 100644
--- a/src/include/mpiimpl.h
+++ b/src/include/mpiimpl.h
@@ -459,7 +459,7 @@ void MPIR_DatatypeAttrFinalize( void );
      }                                                \
 }
 #define MPIR_Win_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Win,ptr,MPI_ERR_WIN,err)
-#define MPIR_Group_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Win,ptr,MPI_ERR_GROUP,err)
+#define MPIR_Group_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Group,ptr,MPI_ERR_GROUP,err)
 #define MPIR_Op_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Op,ptr,MPI_ERR_OP,err)
 #define MPIR_Errhandler_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Errhandler,ptr,MPI_ERR_ARG,err)
 #define MPIR_Request_valid_ptr(ptr,err) MPIR_Valid_ptr_class(Request,ptr,MPI_ERR_REQUEST,err)

-----------------------------------------------------------------------

Summary of changes:
 .gitignore                                         |    6 +-
 CHANGES                                            |    4 +-
 autogen.sh                                         |    2 +-
 confdb/aclocal_cc.m4                               |    2 +-
 configure.ac                                       |   81 +-
 maint/decode_handle                                |    8 +-
 maint/extractcvars.in                              |    8 +-
 maint/extracterrmsgs                               |   10 +-
 maint/genstates.in                                 |    6 +-
 mpi.def                                            |   28 +-
 mpich.def                                          |   28 +-
 src/binding/cxx/buildiface                         |   47 +-
 src/binding/fortran/mpif_h/buildiface              |  120 +-
 src/binding/fortran/mpif_h/mpi_fortimpl.h          |   19 +-
 src/binding/fortran/mpif_h/mpichf.def              |    4 +-
 src/binding/fortran/mpif_h/mpichfg.def             |    4 +-
 src/binding/fortran/mpif_h/mpichfs.def             |    4 +-
 src/binding/fortran/mpif_h/setbot.c.in             |    6 +-
 src/binding/fortran/use_mpi/create_f90_complex.c   |    6 +-
 src/binding/fortran/use_mpi/create_f90_int.c       |    6 +-
 src/binding/fortran/use_mpi/create_f90_real.c      |    6 +-
 src/binding/fortran/use_mpi/create_f90_util.c      |    2 +-
 .../fortran/use_mpi_f08/mpi_c_interface_glue.f90   |   16 +-
 .../fortran/use_mpi_f08/mpi_c_interface_nobuf.f90  |   12 +-
 .../fortran/use_mpi_f08/wrappers_c/buildiface      |    4 +-
 .../wrappers_f/comm_create_keyval_f08ts.f90        |    4 +-
 .../wrappers_f/type_create_keyval_f08ts.f90        |    4 +-
 .../wrappers_f/win_create_keyval_f08ts.f90         |    4 +-
 src/env/mpichversion.c                             |   16 +-
 src/glue/romio/glue_romio.c                        |    2 +-
 src/include/Makefile.mk                            |   71 +-
 src/include/glue_romio.h.in                        |   48 -
 src/include/mpi_attr.h                             |   25 -
 src/include/mpi_f77interface.h                     |   19 -
 src/include/mpi_fortlogical.h                      |   45 -
 src/include/mpi_lang.h                             |   77 -
 src/include/mpibsend.h                             |   76 -
 src/include/mpichconfconst.h                       |   49 +-
 src/include/mpierror.h                             |  189 -
 src/include/mpierrs.h                              |  856 ----
 src/include/mpiext.h                               |   43 -
 src/include/mpifunc.h                              |   38 -
 src/include/mpihandlemem.h                         |  394 --
 src/include/mpii_bsend.h                           |   76 +
 src/include/mpii_cxxinterface.h                    |   15 +
 src/include/mpii_f77interface.h                    |   24 +
 src/include/mpii_fortlogical.h                     |   45 +
 src/include/mpiimpl.h                              | 4472 +-------------------
 src/include/mpiinfo.h                              |    7 -
 src/include/mpimem.h                               |  306 --
 src/include/mpir_assert.h                          |  130 +
 src/include/mpir_attr.h                            |  205 +
 src/include/mpir_attr_generic.h                    |  188 +
 src/include/mpir_coll.h                            |  451 ++
 src/include/mpir_comm.h                            |  363 ++
 src/include/mpir_contextid.h                       |  105 +
 src/include/mpir_datatype.h                        |   64 +
 src/include/mpir_dbg.h                             |   24 +
 src/include/mpir_debugger.h                        |   36 +
 src/include/mpir_err.h                             | 1000 +++++
 src/include/mpir_errhandler.h                      |   96 +
 src/include/mpir_ext.h.in                          |   97 +
 src/include/mpir_func.h                            |  129 +
 src/include/mpir_group.h                           |  121 +
 src/include/mpir_info.h                            |  105 +
 src/include/mpir_mem.h                             |  270 ++
 src/include/mpir_misc.h                            |   90 +
 src/include/mpir_misc_post.h                       |   35 +
 src/include/mpir_nbc.h                             |   62 +-
 src/include/mpir_objects.h                         |  494 +++
 src/include/mpir_op.h                              |  169 +
 src/include/mpir_op_util.h                         |  375 ++
 src/include/mpir_pointers.h                        |   79 +
 src/include/mpir_process.h                         |   63 +
 src/include/mpir_pt2pt.h                           |   20 +
 src/include/mpir_refcount.h                        |   26 +
 .../refcount => include}/mpir_refcount_global.h    |    0
 .../refcount => include}/mpir_refcount_pobj.h      |    0
 .../refcount => include}/mpir_refcount_single.h    |    0
 src/include/mpir_request.h                         |  314 ++
 src/include/mpir_status.h                          |   65 +
 src/include/mpir_strerror.h                        |   15 +
 src/include/mpir_tags.h                            |   79 +
 src/include/mpir_thread.h                          |   85 +
 src/include/mpir_topo.h                            |  112 +
 src/include/mpir_type_defs.h                       |   76 +-
 src/include/mpir_utarray.h                         |  258 ++
 src/include/mpir_uthash.h                          |  951 +++++
 src/include/mpir_win.h                             |   93 +
 src/include/mpistates.h                            |  318 --
 src/include/mpit.h                                 |   28 +-
 src/include/mpitimerimpl.h                         |  217 -
 src/include/mpitimpl.h                             |  102 +-
 src/include/mpiu_utarray.h                         |  258 --
 src/include/mpiu_uthash.h                          |  951 -----
 src/include/mpiutil.h                              |   14 -
 src/include/oputil.h                               |  375 --
 src/include/rlog_macros.h                          |  145 +-
 src/mpi/attr/attr.h                                |    6 +-
 src/mpi/attr/attr_delete.c                         |   12 +-
 src/mpi/attr/attr_get.c                            |    8 +-
 src/mpi/attr/attr_put.c                            |    6 +-
 src/mpi/attr/attrutil.c                            |   58 +-
 src/mpi/attr/comm_create_keyval.c                  |   16 +-
 src/mpi/attr/comm_delete_attr.c                    |   18 +-
 src/mpi/attr/comm_free_keyval.c                    |   20 +-
 src/mpi/attr/comm_get_attr.c                       |   30 +-
 src/mpi/attr/comm_set_attr.c                       |   44 +-
 src/mpi/attr/keyval_create.c                       |    6 +-
 src/mpi/attr/keyval_free.c                         |   12 +-
 src/mpi/attr/type_create_keyval.c                  |   18 +-
 src/mpi/attr/type_delete_attr.c                    |   14 +-
 src/mpi/attr/type_free_keyval.c                    |   16 +-
 src/mpi/attr/type_get_attr.c                       |   20 +-
 src/mpi/attr/type_set_attr.c                       |   38 +-
 src/mpi/attr/win_create_keyval.c                   |   18 +-
 src/mpi/attr/win_delete_attr.c                     |   16 +-
 src/mpi/attr/win_free_keyval.c                     |   16 +-
 src/mpi/attr/win_get_attr.c                        |   34 +-
 src/mpi/attr/win_set_attr.c                        |   36 +-
 src/mpi/coll/allgather.c                           |   31 +-
 src/mpi/coll/allgatherv.c                          |   27 +-
 src/mpi/coll/allred_group.c                        |   16 +-
 src/mpi/coll/allreduce.c                           |   39 +-
 src/mpi/coll/alltoall.c                            |   34 +-
 src/mpi/coll/alltoallv.c                           |   32 +-
 src/mpi/coll/alltoallw.c                           |   24 +-
 src/mpi/coll/barrier.c                             |   15 +-
 src/mpi/coll/bcast.c                               |   66 +-
 src/mpi/coll/exscan.c                              |   24 +-
 src/mpi/coll/gather.c                              |   44 +-
 src/mpi/coll/gatherv.c                             |   21 +-
 src/mpi/coll/helper_fns.c                          |  102 +-
 src/mpi/coll/iallgather.c                          |  112 +-
 src/mpi/coll/iallgatherv.c                         |   98 +-
 src/mpi/coll/iallreduce.c                          |  150 +-
 src/mpi/coll/ialltoall.c                           |  102 +-
 src/mpi/coll/ialltoallv.c                          |   58 +-
 src/mpi/coll/ialltoallw.c                          |   44 +-
 src/mpi/coll/ibarrier.c                            |   42 +-
 src/mpi/coll/ibcast.c                              |  160 +-
 src/mpi/coll/iexscan.c                             |   52 +-
 src/mpi/coll/igather.c                             |   86 +-
 src/mpi/coll/igatherv.c                            |   30 +-
 src/mpi/coll/ired_scat.c                           |  186 +-
 src/mpi/coll/ired_scat_block.c                     |  184 +-
 src/mpi/coll/ireduce.c                             |  168 +-
 src/mpi/coll/iscan.c                               |   94 +-
 src/mpi/coll/iscatter.c                            |  106 +-
 src/mpi/coll/iscatterv.c                           |   26 +-
 src/mpi/coll/op_commutative.c                      |    6 +-
 src/mpi/coll/op_create.c                           |   16 +-
 src/mpi/coll/op_free.c                             |    8 +-
 src/mpi/coll/opband.c                              |    4 +-
 src/mpi/coll/opbor.c                               |    4 +-
 src/mpi/coll/opbxor.c                              |    4 +-
 src/mpi/coll/opland.c                              |   10 +-
 src/mpi/coll/oplor.c                               |   10 +-
 src/mpi/coll/oplxor.c                              |   10 +-
 src/mpi/coll/opmax.c                               |    4 +-
 src/mpi/coll/opmaxloc.c                            |    2 +-
 src/mpi/coll/opmin.c                               |    4 +-
 src/mpi/coll/opminloc.c                            |    2 +-
 src/mpi/coll/opprod.c                              |    4 +-
 src/mpi/coll/opsum.c                               |    4 +-
 src/mpi/coll/red_scat.c                            |   62 +-
 src/mpi/coll/red_scat_block.c                      |   58 +-
 src/mpi/coll/reduce.c                              |   77 +-
 src/mpi/coll/reduce_local.c                        |   12 +-
 src/mpi/coll/scan.c                                |   49 +-
 src/mpi/coll/scatter.c                             |   42 +-
 src/mpi/coll/scatterv.c                            |   21 +-
 src/mpi/comm/comm_agree.c                          |   12 +-
 src/mpi/comm/comm_compare.c                        |    6 +-
 src/mpi/comm/comm_create.c                         |   80 +-
 src/mpi/comm/comm_create_group.c                   |   22 +-
 src/mpi/comm/comm_dup.c                            |    8 +-
 src/mpi/comm/comm_dup_with_info.c                  |    6 +-
 src/mpi/comm/comm_failure_ack.c                    |    6 +-
 src/mpi/comm/comm_failure_get_acked.c              |    6 +-
 src/mpi/comm/comm_free.c                           |    6 +-
 src/mpi/comm/comm_get_info.c                       |   10 +-
 src/mpi/comm/comm_get_name.c                       |    6 +-
 src/mpi/comm/comm_group.c                          |   12 +-
 src/mpi/comm/comm_idup.c                           |    8 +-
 src/mpi/comm/comm_rank.c                           |    6 +-
 src/mpi/comm/comm_remote_group.c                   |   12 +-
 src/mpi/comm/comm_remote_size.c                    |    6 +-
 src/mpi/comm/comm_revoke.c                         |    6 +-
 src/mpi/comm/comm_set_info.c                       |   18 +-
 src/mpi/comm/comm_set_name.c                       |    6 +-
 src/mpi/comm/comm_shrink.c                         |   26 +-
 src/mpi/comm/comm_size.c                           |    6 +-
 src/mpi/comm/comm_split.c                          |   32 +-
 src/mpi/comm/comm_split_type.c                     |    8 +-
 src/mpi/comm/comm_test_inter.c                     |    6 +-
 src/mpi/comm/commutil.c                            |  192 +-
 src/mpi/comm/contextid.c                           |  154 +-
 src/mpi/comm/intercomm_create.c                    |   50 +-
 src/mpi/comm/intercomm_merge.c                     |   22 +-
 src/mpi/comm/mpicomm.h                             |    2 +-
 src/mpi/datatype/address.c                         |   10 +-
 src/mpi/datatype/get_address.c                     |   10 +-
 src/mpi/datatype/get_count.c                       |    8 +-
 src/mpi/datatype/get_elements.c                    |    6 +-
 src/mpi/datatype/get_elements_x.c                  |   14 +-
 src/mpi/datatype/pack.c                            |   14 +-
 src/mpi/datatype/pack_external.c                   |    8 +-
 src/mpi/datatype/pack_external_size.c              |    6 +-
 src/mpi/datatype/pack_size.c                       |    8 +-
 src/mpi/datatype/register_datarep.c                |    6 +-
 src/mpi/datatype/status_set_elements.c             |    6 +-
 src/mpi/datatype/status_set_elements_x.c           |   10 +-
 src/mpi/datatype/type_commit.c                     |    6 +-
 src/mpi/datatype/type_contiguous.c                 |    8 +-
 src/mpi/datatype/type_create_darray.c              |   16 +-
 src/mpi/datatype/type_create_hindexed.c            |   12 +-
 src/mpi/datatype/type_create_hindexed_block.c      |    6 +-
 src/mpi/datatype/type_create_hvector.c             |    6 +-
 src/mpi/datatype/type_create_indexed_block.c       |   12 +-
 src/mpi/datatype/type_create_resized.c             |    6 +-
 src/mpi/datatype/type_create_struct.c              |   12 +-
 src/mpi/datatype/type_create_subarray.c            |   16 +-
 src/mpi/datatype/type_dup.c                        |    6 +-
 src/mpi/datatype/type_extent.c                     |    8 +-
 src/mpi/datatype/type_free.c                       |    6 +-
 src/mpi/datatype/type_get_contents.c               |    6 +-
 src/mpi/datatype/type_get_envelope.c               |    6 +-
 src/mpi/datatype/type_get_extent.c                 |    6 +-
 src/mpi/datatype/type_get_extent_x.c               |    6 +-
 src/mpi/datatype/type_get_name.c                   |    6 +-
 src/mpi/datatype/type_get_true_extent.c            |    6 +-
 src/mpi/datatype/type_get_true_extent_x.c          |    6 +-
 src/mpi/datatype/type_hindexed.c                   |   12 +-
 src/mpi/datatype/type_hvector.c                    |    6 +-
 src/mpi/datatype/type_indexed.c                    |   12 +-
 src/mpi/datatype/type_lb.c                         |    6 +-
 src/mpi/datatype/type_match_size.c                 |   12 +-
 src/mpi/datatype/type_set_name.c                   |    6 +-
 src/mpi/datatype/type_size.c                       |    8 +-
 src/mpi/datatype/type_size_x.c                     |    6 +-
 src/mpi/datatype/type_struct.c                     |   12 +-
 src/mpi/datatype/type_ub.c                         |    6 +-
 src/mpi/datatype/type_vector.c                     |    6 +-
 src/mpi/datatype/typeutil.c                        |   24 +-
 src/mpi/datatype/unpack.c                          |   16 +-
 src/mpi/datatype/unpack_external.c                 |    8 +-
 src/mpi/debugger/dbginit.c                         |   20 +-
 src/mpi/debugger/dll_mpich.c                       |    2 +-
 src/mpi/debugger/tvtest.c                          |    6 +-
 src/mpi/errhan/add_error_class.c                   |    6 +-
 src/mpi/errhan/add_error_code.c                    |    6 +-
 src/mpi/errhan/add_error_string.c                  |    6 +-
 src/mpi/errhan/comm_call_errhandler.c              |    6 +-
 src/mpi/errhan/comm_create_errhandler.c            |   10 +-
 src/mpi/errhan/comm_get_errhandler.c               |    6 +-
 src/mpi/errhan/comm_set_errhandler.c               |    6 +-
 src/mpi/errhan/errcodes.h                          |    2 +-
 src/mpi/errhan/errhandler_create.c                 |    6 +-
 src/mpi/errhan/errhandler_free.c                   |    8 +-
 src/mpi/errhan/errhandler_get.c                    |    6 +-
 src/mpi/errhan/errhandler_set.c                    |    6 +-
 src/mpi/errhan/errnames.txt                        |    4 +-
 src/mpi/errhan/error_class.c                       |    6 +-
 src/mpi/errhan/error_string.c                      |    6 +-
 src/mpi/errhan/errutil.c                           |   48 +-
 src/mpi/errhan/file_call_errhandler.c              |   11 +-
 src/mpi/errhan/file_create_errhandler.c            |   10 +-
 src/mpi/errhan/file_get_errhandler.c               |   11 +-
 src/mpi/errhan/file_set_errhandler.c               |   11 +-
 src/mpi/errhan/win_call_errhandler.c               |    6 +-
 src/mpi/errhan/win_create_errhandler.c             |   10 +-
 src/mpi/errhan/win_get_errhandler.c                |    6 +-
 src/mpi/errhan/win_set_errhandler.c                |    6 +-
 src/mpi/errhan/windefmsg.h                         |    6 +-
 src/mpi/group/group.h                              |    2 +-
 src/mpi/group/group_compare.c                      |   10 +-
 src/mpi/group/group_difference.c                   |   12 +-
 src/mpi/group/group_excl.c                         |   12 +-
 src/mpi/group/group_free.c                         |    6 +-
 src/mpi/group/group_incl.c                         |   12 +-
 src/mpi/group/group_intersection.c                 |   12 +-
 src/mpi/group/group_range_excl.c                   |   12 +-
 src/mpi/group/group_range_incl.c                   |   12 +-
 src/mpi/group/group_rank.c                         |    6 +-
 src/mpi/group/group_size.c                         |    6 +-
 src/mpi/group/group_translate_ranks.c              |   10 +-
 src/mpi/group/group_union.c                        |   16 +-
 src/mpi/group/groupdebug.c                         |    8 +-
 src/mpi/group/grouputil.c                          |   40 +-
 src/mpi/info/info_create.c                         |   10 +-
 src/mpi/info/info_delete.c                         |   10 +-
 src/mpi/info/info_dup.c                            |   12 +-
 src/mpi/info/info_free.c                           |   10 +-
 src/mpi/info/info_get.c                            |    8 +-
 src/mpi/info/info_getn.c                           |    8 +-
 src/mpi/info/info_getnth.c                         |    8 +-
 src/mpi/info/info_getvallen.c                      |    8 +-
 src/mpi/info/info_set.c                            |   16 +-
 src/mpi/info/infoutil.c                            |   20 +-
 src/mpi/init/abort.c                               |    8 +-
 src/mpi/init/async.c                               |   38 +-
 src/mpi/init/finalize.c                            |   14 +-
 src/mpi/init/finalized.c                           |    6 +-
 src/mpi/init/init.c                                |    8 +-
 src/mpi/init/initialized.c                         |    6 +-
 src/mpi/init/initinfo.c                            |   16 +-
 src/mpi/init/initthread.c                          |   90 +-
 src/mpi/init/ismain.c                              |    6 +-
 src/mpi/init/querythread.c                         |    6 +-
 src/mpi/misc/aint_add.c                            |    6 +-
 src/mpi/misc/aint_diff.c                           |    6 +-
 src/mpi/misc/getpname.c                            |    6 +-
 src/mpi/misc/library_version.c                     |   12 +-
 src/mpi/misc/pcontrol.c                            |    6 +-
 src/mpi/misc/version.c                             |    6 +-
 src/mpi/pt2pt/bsend.c                              |    6 +-
 src/mpi/pt2pt/bsend_init.c                         |    8 +-
 src/mpi/pt2pt/bsendutil.c                          |   66 +-
 src/mpi/pt2pt/bsendutil.h                          |    4 +-
 src/mpi/pt2pt/bufattach.c                          |    6 +-
 src/mpi/pt2pt/buffree.c                            |    6 +-
 src/mpi/pt2pt/cancel.c                             |    6 +-
 src/mpi/pt2pt/greq_complete.c                      |    6 +-
 src/mpi/pt2pt/greq_start.c                         |   26 +-
 src/mpi/pt2pt/ibsend.c                             |   10 +-
 src/mpi/pt2pt/improbe.c                            |    8 +-
 src/mpi/pt2pt/imrecv.c                             |    8 +-
 src/mpi/pt2pt/iprobe.c                             |    6 +-
 src/mpi/pt2pt/irecv.c                              |    6 +-
 src/mpi/pt2pt/irsend.c                             |    8 +-
 src/mpi/pt2pt/isend.c                              |    8 +-
 src/mpi/pt2pt/issend.c                             |    8 +-
 src/mpi/pt2pt/mpir_request.c                       |   24 +-
 src/mpi/pt2pt/mprobe.c                             |    8 +-
 src/mpi/pt2pt/mrecv.c                              |    6 +-
 src/mpi/pt2pt/probe.c                              |    6 +-
 src/mpi/pt2pt/recv.c                               |    6 +-
 src/mpi/pt2pt/recv_init.c                          |    6 +-
 src/mpi/pt2pt/request_free.c                       |    8 +-
 src/mpi/pt2pt/request_get_status.c                 |    6 +-
 src/mpi/pt2pt/rsend.c                              |    6 +-
 src/mpi/pt2pt/rsend_init.c                         |    6 +-
 src/mpi/pt2pt/send.c                               |    6 +-
 src/mpi/pt2pt/send_init.c                          |    8 +-
 src/mpi/pt2pt/sendrecv.c                           |    6 +-
 src/mpi/pt2pt/sendrecv_rep.c                       |   16 +-
 src/mpi/pt2pt/ssend.c                              |    6 +-
 src/mpi/pt2pt/ssend_init.c                         |    6 +-
 src/mpi/pt2pt/start.c                              |    6 +-
 src/mpi/pt2pt/startall.c                           |   12 +-
 src/mpi/pt2pt/status_set_cancelled.c               |    6 +-
 src/mpi/pt2pt/test.c                               |    6 +-
 src/mpi/pt2pt/test_cancelled.c                     |    9 +-
 src/mpi/pt2pt/testall.c                            |   12 +-
 src/mpi/pt2pt/testany.c                            |   12 +-
 src/mpi/pt2pt/testsome.c                           |   12 +-
 src/mpi/pt2pt/wait.c                               |    6 +-
 src/mpi/pt2pt/waitall.c                            |   18 +-
 src/mpi/pt2pt/waitany.c                            |   12 +-
 src/mpi/pt2pt/waitsome.c                           |   12 +-
 src/mpi/rma/accumulate.c                           |    6 +-
 src/mpi/rma/alloc_mem.c                            |    8 +-
 src/mpi/rma/compare_and_swap.c                     |    6 +-
 src/mpi/rma/fetch_and_op.c                         |    6 +-
 src/mpi/rma/free_mem.c                             |    6 +-
 src/mpi/rma/get.c                                  |    6 +-
 src/mpi/rma/get_accumulate.c                       |    6 +-
 src/mpi/rma/put.c                                  |    6 +-
 src/mpi/rma/raccumulate.c                          |    6 +-
 src/mpi/rma/rget.c                                 |    6 +-
 src/mpi/rma/rget_accumulate.c                      |    6 +-
 src/mpi/rma/rmatypeutil.c                          |    6 +-
 src/mpi/rma/rput.c                                 |    6 +-
 src/mpi/rma/win_allocate.c                         |    6 +-
 src/mpi/rma/win_allocate_shared.c                  |    6 +-
 src/mpi/rma/win_attach.c                           |    6 +-
 src/mpi/rma/win_complete.c                         |    6 +-
 src/mpi/rma/win_create.c                           |    6 +-
 src/mpi/rma/win_create_dynamic.c                   |    6 +-
 src/mpi/rma/win_detach.c                           |    6 +-
 src/mpi/rma/win_fence.c                            |    6 +-
 src/mpi/rma/win_flush.c                            |    6 +-
 src/mpi/rma/win_flush_all.c                        |    6 +-
 src/mpi/rma/win_flush_local.c                      |    6 +-
 src/mpi/rma/win_flush_local_all.c                  |    6 +-
 src/mpi/rma/win_free.c                             |    8 +-
 src/mpi/rma/win_get_group.c                        |    6 +-
 src/mpi/rma/win_get_info.c                         |    6 +-
 src/mpi/rma/win_get_name.c                         |    6 +-
 src/mpi/rma/win_lock.c                             |    6 +-
 src/mpi/rma/win_lock_all.c                         |    6 +-
 src/mpi/rma/win_post.c                             |    6 +-
 src/mpi/rma/win_set_info.c                         |    6 +-
 src/mpi/rma/win_set_name.c                         |    6 +-
 src/mpi/rma/win_shared_query.c                     |    6 +-
 src/mpi/rma/win_start.c                            |    6 +-
 src/mpi/rma/win_sync.c                             |    6 +-
 src/mpi/rma/win_test.c                             |    6 +-
 src/mpi/rma/win_unlock.c                           |    6 +-
 src/mpi/rma/win_unlock_all.c                       |    6 +-
 src/mpi/rma/win_wait.c                             |    6 +-
 src/mpi/rma/winutil.c                              |    2 +-
 src/mpi/romio/adio/include/adioi.h                 |    6 +-
 src/mpi/romio/mpi-io/glue/mpich/mpio_err.c         |   20 +-
 src/mpi/romio/mpi-io/mpioimpl.h                    |    2 +-
 src/mpi/spawn/close_port.c                         |    6 +-
 src/mpi/spawn/comm_accept.c                        |    6 +-
 src/mpi/spawn/comm_connect.c                       |    6 +-
 src/mpi/spawn/comm_disconnect.c                    |   10 +-
 src/mpi/spawn/comm_get_parent.c                    |    6 +-
 src/mpi/spawn/comm_join.c                          |   14 +-
 src/mpi/spawn/comm_spawn.c                         |    9 +-
 src/mpi/spawn/comm_spawn_multiple.c                |   12 +-
 src/mpi/spawn/lookup_name.c                        |    6 +-
 src/mpi/spawn/open_port.c                          |    6 +-
 src/mpi/spawn/publish_name.c                       |    6 +-
 src/mpi/spawn/unpublish_name.c                     |    6 +-
 src/mpi/timer/wtick.c                              |    6 +-
 src/mpi/timer/wtime.c                              |    6 +-
 src/mpi/topo/Makefile.mk                           |    2 -
 src/mpi/topo/cart_coords.c                         |    7 +-
 src/mpi/topo/cart_create.c                         |   29 +-
 src/mpi/topo/cart_get.c                            |    7 +-
 src/mpi/topo/cart_map.c                            |    7 +-
 src/mpi/topo/cart_rank.c                           |    7 +-
 src/mpi/topo/cart_shift.c                          |    7 +-
 src/mpi/topo/cart_sub.c                            |   19 +-
 src/mpi/topo/cartdim_get.c                         |    7 +-
 src/mpi/topo/dims_create.c                         |    7 +-
 src/mpi/topo/dist_gr_create.c                      |   79 +-
 src/mpi/topo/dist_gr_create_adj.c                  |   35 +-
 src/mpi/topo/dist_gr_neighb.c                      |   15 +-
 src/mpi/topo/dist_gr_neighb_count.c                |    7 +-
 src/mpi/topo/graph_get.c                           |    7 +-
 src/mpi/topo/graph_map.c                           |    7 +-
 src/mpi/topo/graph_nbr.c                           |    7 +-
 src/mpi/topo/graphcreate.c                         |   19 +-
 src/mpi/topo/graphdimsget.c                        |    7 +-
 src/mpi/topo/graphnbrcnt.c                         |    7 +-
 src/mpi/topo/inhb_allgather.c                      |   37 +-
 src/mpi/topo/inhb_allgatherv.c                     |   37 +-
 src/mpi/topo/inhb_alltoall.c                       |   39 +-
 src/mpi/topo/inhb_alltoallv.c                      |   39 +-
 src/mpi/topo/inhb_alltoallw.c                      |   39 +-
 src/mpi/topo/nhb_allgather.c                       |   10 +-
 src/mpi/topo/nhb_allgatherv.c                      |   10 +-
 src/mpi/topo/nhb_alltoall.c                        |   10 +-
 src/mpi/topo/nhb_alltoallv.c                       |   10 +-
 src/mpi/topo/nhb_alltoallw.c                       |   10 +-
 src/mpi/topo/topo.h                                |   67 -
 src/mpi/topo/topo_test.c                           |    7 +-
 src/mpi/topo/topoutil.c                            |   37 +-
 src/mpi_t/cat_changed.c                            |    6 +-
 src/mpi_t/cat_get_categories.c                     |    6 +-
 src/mpi_t/cat_get_cvars.c                          |    6 +-
 src/mpi_t/cat_get_index.c                          |    6 +-
 src/mpi_t/cat_get_info.c                           |    6 +-
 src/mpi_t/cat_get_num.c                            |    6 +-
 src/mpi_t/cat_get_pvars.c                          |    6 +-
 src/mpi_t/cvar_get_index.c                         |    6 +-
 src/mpi_t/cvar_get_info.c                          |    6 +-
 src/mpi_t/cvar_get_num.c                           |    6 +-
 src/mpi_t/cvar_handle_alloc.c                      |   14 +-
 src/mpi_t/cvar_handle_free.c                       |    6 +-
 src/mpi_t/cvar_read.c                              |    8 +-
 src/mpi_t/cvar_write.c                             |   10 +-
 src/mpi_t/enum_get_info.c                          |    6 +-
 src/mpi_t/enum_get_item.c                          |    6 +-
 src/mpi_t/mpit.c                                   |   46 +-
 src/mpi_t/mpit_finalize.c                          |    6 +-
 src/mpi_t/mpit_initthread.c                        |    6 +-
 src/mpi_t/pvar_get_index.c                         |    6 +-
 src/mpi_t/pvar_get_info.c                          |    6 +-
 src/mpi_t/pvar_get_num.c                           |    6 +-
 src/mpi_t/pvar_handle_alloc.c                      |   16 +-
 src/mpi_t/pvar_handle_free.c                       |    8 +-
 src/mpi_t/pvar_read.c                              |   12 +-
 src/mpi_t/pvar_readreset.c                         |    6 +-
 src/mpi_t/pvar_reset.c                             |   10 +-
 src/mpi_t/pvar_session_create.c                    |   14 +-
 src/mpi_t/pvar_session_free.c                      |    6 +-
 src/mpi_t/pvar_start.c                             |   10 +-
 src/mpi_t/pvar_stop.c                              |   10 +-
 src/mpi_t/pvar_write.c                             |    6 +-
 .../ch3/channels/nemesis/include/mpid_nem_defs.h   |    4 +-
 .../ch3/channels/nemesis/include/mpid_nem_fbox.h   |    2 +-
 .../nemesis/include/mpid_nem_generic_queue.h       |   20 +-
 .../ch3/channels/nemesis/include/mpid_nem_impl.h   |    2 +-
 .../ch3/channels/nemesis/include/mpid_nem_inline.h |   76 +-
 .../ch3/channels/nemesis/include/mpid_nem_queue.h  |   18 +-
 .../ch3/channels/nemesis/include/mpidi_ch3_impl.h  |    8 +-
 .../ch3/channels/nemesis/netmod/llc/llc_cancel.c   |    6 +-
 .../ch3/channels/nemesis/netmod/llc/llc_fini.c     |    6 +-
 .../ch3/channels/nemesis/netmod/llc/llc_init.c     |   30 +-
 .../ch3/channels/nemesis/netmod/llc/llc_poll.c     |   54 +-
 .../ch3/channels/nemesis/netmod/llc/llc_probe.c    |   42 +-
 .../ch3/channels/nemesis/netmod/llc/llc_send.c     |  112 +-
 src/mpid/ch3/channels/nemesis/netmod/llc/llc_vc.c  |   23 +-
 .../ch3/channels/nemesis/netmod/mxm/mxm_impl.h     |    4 +-
 .../ch3/channels/nemesis/netmod/mxm/mxm_init.c     |   62 +-
 .../ch3/channels/nemesis/netmod/mxm/mxm_poll.c     |   52 +-
 .../ch3/channels/nemesis/netmod/mxm/mxm_probe.c    |   22 +-
 .../ch3/channels/nemesis/netmod/mxm/mxm_send.c     |  100 +-
 src/mpid/ch3/channels/nemesis/netmod/none/none.c   |    4 +-
 src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_cm.c  |   30 +-
 .../ch3/channels/nemesis/netmod/ofi/ofi_impl.h     |   18 +-
 .../ch3/channels/nemesis/netmod/ofi/ofi_init.c     |   14 +-
 src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c |   28 +-
 .../ch3/channels/nemesis/netmod/ofi/ofi_progress.c |    4 +-
 .../channels/nemesis/netmod/ofi/ofi_tag_layout.h   |    8 +-
 .../nemesis/netmod/ofi/ofi_tagged_template.c       |    8 +-
 .../channels/nemesis/netmod/portals4/ptl_impl.h    |    2 +-
 .../channels/nemesis/netmod/portals4/ptl_init.c    |   80 +-
 .../ch3/channels/nemesis/netmod/portals4/ptl_lmt.c |    8 +-
 .../ch3/channels/nemesis/netmod/portals4/ptl_nm.c  |   92 +-
 .../channels/nemesis/netmod/portals4/ptl_poll.c    |   36 +-
 .../channels/nemesis/netmod/portals4/ptl_probe.c   |   56 +-
 .../channels/nemesis/netmod/portals4/ptl_recv.c    |  158 +-
 .../channels/nemesis/netmod/portals4/ptl_send.c    |   54 +-
 .../ch3/channels/nemesis/netmod/portals4/rptl.c    |   72 +-
 .../channels/nemesis/netmod/portals4/rptl_init.c   |   40 +-
 .../ch3/channels/nemesis/netmod/portals4/rptl_op.c |   20 +-
 src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c  |  344 +-
 .../ch3/channels/nemesis/netmod/tcp/tcp_ckpt.c     |   24 +-
 .../ch3/channels/nemesis/netmod/tcp/tcp_finalize.c |    8 +-
 .../ch3/channels/nemesis/netmod/tcp/tcp_getip.c    |   20 +-
 .../ch3/channels/nemesis/netmod/tcp/tcp_init.c     |   82 +-
 .../ch3/channels/nemesis/netmod/tcp/tcp_queue.h    |   12 +-
 .../ch3/channels/nemesis/netmod/tcp/tcp_send.c     |   94 +-
 .../ch3/channels/nemesis/netmod/tcp/tcp_utility.c  |   30 +-
 src/mpid/ch3/channels/nemesis/src/ch3_finalize.c   |    6 +-
 src/mpid/ch3/channels/nemesis/src/ch3_init.c       |   76 +-
 src/mpid/ch3/channels/nemesis/src/ch3_isend.c      |   14 +-
 src/mpid/ch3/channels/nemesis/src/ch3_isendv.c     |   20 +-
 src/mpid/ch3/channels/nemesis/src/ch3_istartmsg.c  |   16 +-
 src/mpid/ch3/channels/nemesis/src/ch3_istartmsgv.c |   22 +-
 src/mpid/ch3/channels/nemesis/src/ch3_progress.c   |  210 +-
 src/mpid/ch3/channels/nemesis/src/ch3_rma_shm.c    |   18 +-
 src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c    |  100 +-
 src/mpid/ch3/channels/nemesis/src/ch3i_comm.c      |   49 +-
 .../ch3/channels/nemesis/src/ch3i_eagernoncontig.c |   12 +-
 src/mpid/ch3/channels/nemesis/src/mpid_nem_alloc.c |   76 +-
 .../ch3/channels/nemesis/src/mpid_nem_barrier.c    |   12 +-
 src/mpid/ch3/channels/nemesis/src/mpid_nem_ckpt.c  |   78 +-
 src/mpid/ch3/channels/nemesis/src/mpid_nem_debug.c |    8 +-
 .../ch3/channels/nemesis/src/mpid_nem_finalize.c   |    8 +-
 src/mpid/ch3/channels/nemesis/src/mpid_nem_init.c  |   72 +-
 src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c   |   96 +-
 .../ch3/channels/nemesis/src/mpid_nem_lmt_dma.c    |   72 +-
 .../ch3/channels/nemesis/src/mpid_nem_lmt_shm.c    |  144 +-
 .../channels/nemesis/src/mpid_nem_lmt_vmsplice.c   |   66 +-
 src/mpid/ch3/channels/nemesis/src/mpid_nem_mpich.c |   30 +-
 .../ch3/channels/nemesis/src/mpid_nem_mpich_rma.c  |  132 +-
 .../ch3/channels/nemesis/src/mpid_nem_network.c    |   10 +-
 src/mpid/ch3/channels/sock/src/ch3_finalize.c      |    6 +-
 src/mpid/ch3/channels/sock/src/ch3_init.c          |    6 +-
 src/mpid/ch3/channels/sock/src/ch3_isend.c         |   22 +-
 src/mpid/ch3/channels/sock/src/ch3_isendv.c        |   24 +-
 src/mpid/ch3/channels/sock/src/ch3_istartmsg.c     |   22 +-
 src/mpid/ch3/channels/sock/src/ch3_istartmsgv.c    |   22 +-
 src/mpid/ch3/channels/sock/src/ch3_progress.c      |  110 +-
 src/mpid/ch3/channels/sock/src/ch3_win_fns.c       |   18 +-
 src/mpid/ch3/include/mpid_rma_issue.h              |  120 +-
 src/mpid/ch3/include/mpid_rma_lockqueue.h          |    2 +-
 src/mpid/ch3/include/mpid_rma_oplist.h             |    6 +-
 src/mpid/ch3/include/mpid_rma_shm.h                |   70 +-
 src/mpid/ch3/include/mpid_sched.h                  |   34 +-
 src/mpid/ch3/include/mpid_thread.h                 |    1 -
 src/mpid/ch3/include/mpidimpl.h                    |   26 +-
 src/mpid/ch3/include/mpidpkt.h                     |    4 +-
 src/mpid/ch3/include/mpidpost.h                    |    6 +-
 src/mpid/ch3/include/mpidpre.h                     |  211 +-
 src/mpid/ch3/include/mpidrma.h                     |  142 +-
 src/mpid/ch3/src/ch3u_buffer.c                     |   18 +-
 src/mpid/ch3/src/ch3u_comm.c                       |   98 +-
 src/mpid/ch3/src/ch3u_comm_spawn_multiple.c        |   10 +-
 src/mpid/ch3/src/ch3u_eager.c                      |    6 +-
 src/mpid/ch3/src/ch3u_handle_connection.c          |   50 +-
 src/mpid/ch3/src/ch3u_handle_op_req.c              |   10 +-
 src/mpid/ch3/src/ch3u_handle_recv_pkt.c            |   58 +-
 src/mpid/ch3/src/ch3u_handle_recv_req.c            |  234 +-
 src/mpid/ch3/src/ch3u_handle_send_req.c            |   34 +-
 src/mpid/ch3/src/ch3u_port.c                       |  108 +-
 src/mpid/ch3/src/ch3u_recvq.c                      |   52 +-
 src/mpid/ch3/src/ch3u_request.c                    |   66 +-
 src/mpid/ch3/src/ch3u_rma_ops.c                    |   80 +-
 src/mpid/ch3/src/ch3u_rma_pkthandler.c             |  222 +-
 src/mpid/ch3/src/ch3u_rma_progress.c               |   10 +-
 src/mpid/ch3/src/ch3u_rma_reqops.c                 |   32 +-
 src/mpid/ch3/src/ch3u_rma_sync.c                   |  158 +-
 src/mpid/ch3/src/ch3u_win_fns.c                    |   92 +-
 src/mpid/ch3/src/mpid_abort.c                      |    8 +-
 src/mpid/ch3/src/mpid_aint.c                       |   16 +-
 src/mpid/ch3/src/mpid_cancel_recv.c                |    8 +-
 src/mpid/ch3/src/mpid_cancel_send.c                |   14 +-
 src/mpid/ch3/src/mpid_comm_disconnect.c            |    6 +-
 src/mpid/ch3/src/mpid_comm_failure_ack.c           |   12 +-
 src/mpid/ch3/src/mpid_comm_get_all_failed_procs.c  |    6 +-
 src/mpid/ch3/src/mpid_comm_revoke.c                |    6 +-
 src/mpid/ch3/src/mpid_comm_spawn_multiple.c        |    6 +-
 src/mpid/ch3/src/mpid_finalize.c                   |    6 +-
 src/mpid/ch3/src/mpid_imrecv.c                     |    8 +-
 src/mpid/ch3/src/mpid_init.c                       |   14 +-
 src/mpid/ch3/src/mpid_iprobe.c                     |    6 +-
 src/mpid/ch3/src/mpid_irecv.c                      |   10 +-
 src/mpid/ch3/src/mpid_irsend.c                     |    8 +-
 src/mpid/ch3/src/mpid_isend.c                      |    8 +-
 src/mpid/ch3/src/mpid_issend.c                     |    8 +-
 src/mpid/ch3/src/mpid_port.c                       |   36 +-
 src/mpid/ch3/src/mpid_probe.c                      |    6 +-
 src/mpid/ch3/src/mpid_recv.c                       |   10 +-
 src/mpid/ch3/src/mpid_rma.c                        |   64 +-
 src/mpid/ch3/src/mpid_rsend.c                      |    6 +-
 src/mpid/ch3/src/mpid_send.c                       |    6 +-
 src/mpid/ch3/src/mpid_ssend.c                      |    6 +-
 src/mpid/ch3/src/mpid_startall.c                   |   40 +-
 src/mpid/ch3/src/mpid_vc.c                         |  108 +-
 src/mpid/ch3/src/mpidi_pg.c                        |  102 +-
 src/mpid/ch3/src/mpidi_rma.c                       |   36 +-
 src/mpid/ch3/util/ftb/ftb.c                        |   20 +-
 src/mpid/ch3/util/sock/ch3u_connect_sock.c         |  136 +-
 src/mpid/ch3/util/sock/ch3u_getinterfaces.c        |    6 +-
 src/mpid/ch3/util/unordered/unordered.c            |    6 +-
 src/mpid/common/datatype/dataloop/darray_support.c |    2 +-
 .../common/datatype/dataloop/subarray_support.c    |    2 +-
 .../common/datatype/dataloop/typesize_support.c    |    6 +-
 src/mpid/common/datatype/mpidu_dataloop.h          |   12 +-
 src/mpid/common/datatype/mpidu_datatype.h          |   38 +-
 src/mpid/common/datatype/mpidu_datatype_contents.c |   12 +-
 src/mpid/common/datatype/mpidu_datatype_free.c     |    2 +-
 src/mpid/common/datatype/mpidu_ext32_segment.c     |   36 +-
 src/mpid/common/datatype/mpidu_segment.c           |   74 +-
 src/mpid/common/datatype/mpidu_type_blockindexed.c |    6 +-
 src/mpid/common/datatype/mpidu_type_commit.c       |    2 +-
 src/mpid/common/datatype/mpidu_type_contiguous.c   |    6 +-
 .../common/datatype/mpidu_type_create_pairtype.c   |    6 +-
 .../common/datatype/mpidu_type_create_resized.c    |    6 +-
 src/mpid/common/datatype/mpidu_type_debug.c        |    6 +-
 src/mpid/common/datatype/mpidu_type_dup.c          |    8 +-
 src/mpid/common/datatype/mpidu_type_get_contents.c |    6 +-
 src/mpid/common/datatype/mpidu_type_indexed.c      |   12 +-
 src/mpid/common/datatype/mpidu_type_struct.c       |   10 +-
 src/mpid/common/datatype/mpidu_type_vector.c       |    6 +-
 src/mpid/common/datatype/mpidu_type_zerolen.c      |    6 +-
 src/mpid/common/datatype/mpir_type_flatten.c       |    4 +-
 src/mpid/common/hcoll/hcoll_init.c                 |    2 +-
 src/mpid/common/sched/mpidu_sched.c                |   62 +-
 src/mpid/common/sched/mpidu_sched.h                |   30 +-
 src/mpid/common/sock/iocp/sock.c                   |  394 +-
 src/mpid/common/sock/mpidu_sock.h                  |   23 +-
 src/mpid/common/sock/poll/sock.c                   |   12 +-
 src/mpid/common/sock/poll/sock_immed.i             |   86 +-
 src/mpid/common/sock/poll/sock_init.i              |   12 +-
 src/mpid/common/sock/poll/sock_misc.i              |   42 +-
 src/mpid/common/sock/poll/sock_post.i              |   70 +-
 src/mpid/common/sock/poll/sock_set.i               |   24 +-
 src/mpid/common/sock/poll/sock_wait.i              |  102 +-
 src/mpid/common/sock/poll/socki_util.i             |   80 +-
 src/mpid/common/thread/mpidu_thread_fallback.h     |   61 +-
 src/mpid/include/mpidu_pre.h                       |    6 +-
 src/mpid/pamid/include/mpidi_hooks.h               |    1 -
 src/mpid/pamid/include/mpidi_macros.h              |   10 +-
 src/mpid/pamid/include/mpidi_mutex.h               |    8 +-
 src/mpid/pamid/include/mpidi_thread.h              |    6 +-
 src/mpid/pamid/include/mpidimpl.h                  |   10 +-
 .../pamid/src/coll/allgather/mpido_allgather.c     |    2 +-
 src/mpid/pamid/src/comm/mpid_comm.c                |    2 +-
 src/mpid/pamid/src/dyntask/mpid_comm_disconnect.c  |    4 +-
 .../pamid/src/dyntask/mpid_comm_spawn_multiple.c   |    8 +-
 src/mpid/pamid/src/dyntask/mpid_port.c             |    4 +-
 src/mpid/pamid/src/dyntask/mpidi_pg.c              |   20 +-
 src/mpid/pamid/src/dyntask/mpidi_port.c            |   22 +-
 src/mpid/pamid/src/misc/mpid_unimpl.c              |    2 +-
 src/mpid/pamid/src/mpid_aint.c                     |   16 +-
 src/mpid/pamid/src/mpid_imrecv.c                   |    4 +-
 src/mpid/pamid/src/mpid_init.c                     |   14 +-
 src/mpid/pamid/src/mpid_progress.c                 |   40 +-
 src/mpid/pamid/src/mpid_progress.h                 |    2 +-
 src/mpid/pamid/src/mpid_request.c                  |   10 +-
 src/mpid/pamid/src/mpid_request.h                  |   18 +-
 src/mpid/pamid/src/mpid_vc.c                       |   14 +-
 src/mpid/pamid/src/mpidi_env.c                     |   12 +-
 src/mpid/pamid/src/mpidi_pami_datatype.c           |   14 +-
 src/mpid/pamid/src/onesided/mpid_win_create.c      |    2 +-
 .../pamid/src/onesided/mpid_win_fetch_and_op.c     |   14 +-
 src/mpid/pamid/src/onesided/mpid_win_free.c        |    2 +-
 .../pamid/src/onesided/mpid_win_get_accumulate.c   |    2 +-
 src/mpid/pamid/src/onesided/mpid_win_get_info.c    |    4 +-
 src/mpid/pamid/src/pt2pt/mpid_cancel.c             |    6 +-
 src/mpid/pamid/src/pt2pt/mpid_isend.h              |    2 +-
 .../pamid/src/pt2pt/persistent/mpid_startall.c     |    2 +-
 src/mpl/include/mpl_base.h                         |    2 +
 src/mpl/include/mpl_dbg.h                          |    6 +-
 src/pm/gforker/mpiexec.c                           |    6 +-
 src/pm/remshell/mpiexec.c                          |    4 +-
 src/pm/util/simple_pmiutil2.h                      |    2 +-
 src/pmi/pmi2/simple/pmi2compat.h                   |    4 +-
 src/pmi/simple/simple_pmi.c                        |    2 +-
 src/pmi/simple/simple_pmiutil.c                    |    2 +-
 src/pmi/simple/simple_pmiutil.h                    |    2 +-
 src/util/Makefile.mk                               |    4 -
 src/util/assert/Makefile.mk                        |    3 -
 src/util/assert/mpiassert.h                        |  163 -
 src/util/cvar/Makefile.mk                          |    4 +-
 .../rlog/TraceInput/logformat_trace_InputLog.c     |    2 +-
 src/util/logging/rlog/TraceInput/trace_input.c     |    2 +-
 src/util/logging/rlog/irlog2rlog.c                 |    2 +-
 src/util/logging/rlog/irlogutil.c                  |    2 +-
 src/util/logging/rlog/minalignrlog.c               |    2 +-
 src/util/logging/rlog/rlog.c                       |    2 +-
 src/util/logging/rlog/rlogtime.c                   |   12 +-
 src/util/logging/rlog/rlogutil.c                   |    2 +-
 src/util/mem/Makefile.mk                           |    4 -
 src/util/mem/handlemem.c                           |   58 +-
 src/util/mem/mpiu_strerror.h                       |   15 -
 src/util/mem/strerror.c                            |    4 +-
 src/util/pointer/Makefile.mk                       |   12 -
 src/util/pointer/mpiu_pointer.h                    |   59 -
 src/util/procmap/local_proc.c                      |   60 +-
 src/util/refcount/Makefile.mk                      |   14 -
 src/util/refcount/mpir_refcount.h                  |   26 -
 src/util/type/Makefile.mk                          |   11 -
 src/util/type/mpiu_type_defs.h                     |   87 -
 src/util/wrappers/README                           |    2 +-
 src/util/wrappers/mpiu_shm_wrappers.h              |   96 +-
 src/util/wrappers/mpiu_sock_wrappers.h             |   70 +-
 src/util/wrappers/mpiu_util_wrappers.h             |    2 +-
 test/mpi/group/glpid.c                             |    6 +-
 729 files changed, 14943 insertions(+), 16949 deletions(-)
 delete mode 100644 src/include/glue_romio.h.in
 delete mode 100644 src/include/mpi_attr.h
 delete mode 100644 src/include/mpi_f77interface.h
 delete mode 100644 src/include/mpi_fortlogical.h
 delete mode 100644 src/include/mpi_lang.h
 delete mode 100644 src/include/mpibsend.h
 delete mode 100644 src/include/mpierror.h
 delete mode 100644 src/include/mpierrs.h
 delete mode 100644 src/include/mpiext.h
 delete mode 100644 src/include/mpifunc.h
 delete mode 100644 src/include/mpihandlemem.h
 create mode 100644 src/include/mpii_bsend.h
 create mode 100644 src/include/mpii_cxxinterface.h
 create mode 100644 src/include/mpii_f77interface.h
 create mode 100644 src/include/mpii_fortlogical.h
 delete mode 100644 src/include/mpiinfo.h
 delete mode 100644 src/include/mpimem.h
 create mode 100644 src/include/mpir_assert.h
 create mode 100644 src/include/mpir_attr.h
 create mode 100644 src/include/mpir_attr_generic.h
 create mode 100644 src/include/mpir_coll.h
 create mode 100644 src/include/mpir_comm.h
 create mode 100644 src/include/mpir_contextid.h
 create mode 100644 src/include/mpir_datatype.h
 create mode 100644 src/include/mpir_dbg.h
 create mode 100644 src/include/mpir_debugger.h
 create mode 100644 src/include/mpir_err.h
 create mode 100644 src/include/mpir_errhandler.h
 create mode 100644 src/include/mpir_ext.h.in
 create mode 100644 src/include/mpir_func.h
 create mode 100644 src/include/mpir_group.h
 create mode 100644 src/include/mpir_info.h
 create mode 100644 src/include/mpir_mem.h
 create mode 100644 src/include/mpir_misc.h
 create mode 100644 src/include/mpir_misc_post.h
 create mode 100644 src/include/mpir_objects.h
 create mode 100644 src/include/mpir_op.h
 create mode 100644 src/include/mpir_op_util.h
 create mode 100644 src/include/mpir_pointers.h
 create mode 100644 src/include/mpir_process.h
 create mode 100644 src/include/mpir_pt2pt.h
 create mode 100644 src/include/mpir_refcount.h
 rename src/{util/refcount => include}/mpir_refcount_global.h (100%)
 rename src/{util/refcount => include}/mpir_refcount_pobj.h (100%)
 rename src/{util/refcount => include}/mpir_refcount_single.h (100%)
 create mode 100644 src/include/mpir_request.h
 create mode 100644 src/include/mpir_status.h
 create mode 100644 src/include/mpir_strerror.h
 create mode 100644 src/include/mpir_tags.h
 create mode 100644 src/include/mpir_thread.h
 create mode 100644 src/include/mpir_topo.h
 create mode 100644 src/include/mpir_utarray.h
 create mode 100644 src/include/mpir_uthash.h
 create mode 100644 src/include/mpir_win.h
 delete mode 100644 src/include/mpistates.h
 delete mode 100644 src/include/mpitimerimpl.h
 delete mode 100644 src/include/mpiu_utarray.h
 delete mode 100644 src/include/mpiu_uthash.h
 delete mode 100644 src/include/mpiutil.h
 delete mode 100644 src/include/oputil.h
 delete mode 100644 src/mpi/topo/topo.h
 delete mode 100644 src/util/assert/mpiassert.h
 delete mode 100644 src/util/mem/mpiu_strerror.h
 delete mode 100644 src/util/pointer/Makefile.mk
 delete mode 100644 src/util/pointer/mpiu_pointer.h
 delete mode 100644 src/util/refcount/Makefile.mk
 delete mode 100644 src/util/refcount/mpir_refcount.h
 delete mode 100644 src/util/type/Makefile.mk
 delete mode 100644 src/util/type/mpiu_type_defs.h


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list