[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1rc2-23-gf6cfee0

mysql vizuser noreply at mpich.org
Tue Dec 3 11:42:47 CST 2013


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  f6cfee090cdc45c003e3dd029e0e8d82dfca799e (commit)
       via  6e9e14061bf5e460256df1c6625c5936fdeef9f6 (commit)
       via  d5afe938afe806c5b1d434659ac158b9423b7f58 (commit)
       via  f29c8a467691c36cc73237e11689190b7ea8480d (commit)
       via  a43515ec77b043ce8378b947206915d9c831ba5a (commit)
       via  28cb70ba8597f09917c694f9f4ee002b44bce29f (commit)
       via  3f133e8e7c33452aa6ea78b64f5e9838346e5f93 (commit)
       via  76b0b6696dfeac8ceb1078131d1459f611e43d75 (commit)
      from  cb41d880028ce7d67209a8657b8db4aa12be6476 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/f6cfee090cdc45c003e3dd029e0e8d82dfca799e

commit f6cfee090cdc45c003e3dd029e0e8d82dfca799e
Author: Masamichi Takagi <masamichi.takagi at gmail.com>
Date:   Mon Dec 2 13:52:24 2013 +0900

    Add Fortran related cross compilation values
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/cross_values.txt b/src/mpid/ch3/channels/nemesis/netmod/dcfa/cross_values.txt
new file mode 100644
index 0000000..88a2171
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/cross_values.txt
@@ -0,0 +1,16 @@
+# The Fortran related cross compilation values.
+# This file is generated with mpich/maint/fcrosscompile/configure
+# with CC/F77/FC set to "icc/ifort -mmic".
+CROSS_F77_SIZEOF_INTEGER="4"
+CROSS_F77_SIZEOF_REAL="4"
+CROSS_F77_SIZEOF_DOUBLE_PRECISION="8"
+CROSS_F77_TRUE_VALUE="-1"
+CROSS_F77_FALSE_VALUE="0"
+CROSS_F90_ADDRESS_KIND="8"
+CROSS_F90_OFFSET_KIND="8"
+CROSS_F90_INTEGER_KIND="4"
+CROSS_F90_REAL_MODEL=" 6 , 37"
+CROSS_F90_DOUBLE_MODEL=" 15 , 307"
+CROSS_F90_INTEGER_MODEL=" 9"
+CROSS_F90_ALL_INTEGER_MODELS=" 2 , 1, 4 , 2, 9 , 4, 18 , 8,"
+CROSS_F90_INTEGER_MODEL_MAP=" {  2 , 1 , 1 }, {  4 , 2 , 2 }, {  9 , 4 , 4 }, {  18 , 8 , 8 },"

http://git.mpich.org/mpich.git/commitdiff/6e9e14061bf5e460256df1c6625c5936fdeef9f6

commit 6e9e14061bf5e460256df1c6625c5936fdeef9f6
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Thu Nov 28 21:56:51 2013 -0600

    White-space cleanup.
    
    Signed-off-by: Masamichi Takagi <masamichi.takagi at gmail.com>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_finalize.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_finalize.c
index f283f86..3baf664 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_finalize.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_finalize.c
@@ -8,7 +8,7 @@
 #include "dcfa_impl.h"
 
 //#define DEBUG_DCFA_FINALIZE
-#ifdef dprintf /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
+#ifdef dprintf  /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
 #undef dprintf
 #endif
 #ifdef DEBUG_DCFA_FINALIZE
@@ -33,15 +33,15 @@ int MPID_nem_dcfa_finalize(void)
 #if 0
     for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
         ibcom_errno = ibcom_close(MPID_nem_dcfa_conns[i].fd);
-        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_close");        
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_close");
 
     }
 #endif
 
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_FINALIZE);
 
- fn_exit:
+  fn_exit:
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.c
index 47f9e84..d9e624a 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.c
@@ -19,7 +19,7 @@
 #include <assert.h>
 
 //#define DEBUG_IBCOM
-#ifdef dprintf /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
+#ifdef dprintf  /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
 #undef dprintf
 #endif
 #ifdef DEBUG_IBCOM
@@ -28,25 +28,25 @@
 #define dprintf(...)
 #endif
 
-int	dflag;
-
-static int                  sendwr_id = 10;
-static IbCom                contab[IBCOM_SIZE];
-static int                  ib_initialized = 0;
-static int                  maxcon;
-static struct ibv_device	**ib_devlist;
-static struct ibv_context	*ib_ctx;
-struct ibv_context *ib_ctx_export; /* for SC13 demo connector */
-static struct ibv_pd		*ib_pd;
-struct ibv_pd		*ib_pd_export; /* for SC13 demo connector */
-struct ibv_cq               *rc_shared_scq;
-struct ibv_cq               *rc_shared_scq_lmt_put;
-struct ibv_cq               *rc_shared_scq_scratch_pad;
-static struct ibv_cq		*rc_shared_rcq;
-static struct ibv_cq		*rc_shared_rcq_lmt_put;
-static struct ibv_cq		*rc_shared_rcq_scratch_pad;
-static struct ibv_cq		*ud_shared_scq;
-struct ibv_cq		*ud_shared_rcq;
+int dflag;
+
+static int sendwr_id = 10;
+static IbCom contab[IBCOM_SIZE];
+static int ib_initialized = 0;
+static int maxcon;
+static struct ibv_device **ib_devlist;
+static struct ibv_context *ib_ctx;
+struct ibv_context *ib_ctx_export;      /* for SC13 demo connector */
+static struct ibv_pd *ib_pd;
+struct ibv_pd *ib_pd_export;    /* for SC13 demo connector */
+struct ibv_cq *rc_shared_scq;
+struct ibv_cq *rc_shared_scq_lmt_put;
+struct ibv_cq *rc_shared_scq_scratch_pad;
+static struct ibv_cq *rc_shared_rcq;
+static struct ibv_cq *rc_shared_rcq_lmt_put;
+static struct ibv_cq *rc_shared_rcq_scratch_pad;
+static struct ibv_cq *ud_shared_scq;
+struct ibv_cq *ud_shared_rcq;
 static uint8_t *scratch_pad = 0;
 
 #define RANGE_CHECK(condesc, conp)			\
@@ -65,7 +65,8 @@ static uint8_t *scratch_pad = 0;
     IBCOM_ERR_CHKANDJUMP(conp->icom_used != 1, -1, dprintf("RANGE_CHECK_WITH_ERROR,conp->icom_used=%d\n", conp->icom_used)); \
 }
 
-static int modify_qp_to_init(struct ibv_qp *qp, int ib_port) {
+static int modify_qp_to_init(struct ibv_qp *qp, int ib_port)
+{
     struct ibv_qp_attr attr;
     int flags;
     int rc;
@@ -74,7 +75,8 @@ static int modify_qp_to_init(struct ibv_qp *qp, int ib_port) {
     attr.qp_state = IBV_QPS_INIT;
     attr.port_num = ib_port;
     attr.pkey_index = 0;
-    attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE;
+    attr.qp_access_flags =
+        IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE;
     flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS;
     rc = ibv_modify_qp(qp, &attr, flags);
     if (rc) {
@@ -83,10 +85,12 @@ static int modify_qp_to_init(struct ibv_qp *qp, int ib_port) {
     return rc;
 }
 
-static int modify_qp_to_rtr(struct ibv_qp *qp, uint32_t remote_qpn, uint16_t dlid, union ibv_gid *dgid, int ib_port, int gid_idx) {
-    struct ibv_qp_attr	attr;
-    int		flags;
-    int		rc;
+static int modify_qp_to_rtr(struct ibv_qp *qp, uint32_t remote_qpn, uint16_t dlid,
+                            union ibv_gid *dgid, int ib_port, int gid_idx)
+{
+    struct ibv_qp_attr attr;
+    int flags;
+    int rc;
 
     memset(&attr, 0, sizeof(attr));
     attr.qp_state = IBV_QPS_RTR;
@@ -100,7 +104,7 @@ static int modify_qp_to_rtr(struct ibv_qp *qp, uint32_t remote_qpn, uint16_t dli
     //attr.max_dest_rd_atomic = 0; /* DCFA */
 
     /* Default is 0x12 (= 5.12ms) see IB Spec. Rel. 1.2, Vol. 1, 9.7.5.2.8 */
-    attr.min_rnr_timer = 0x12; 
+    attr.min_rnr_timer = 0x12;
 
     attr.ah_attr.dlid = dlid;
     attr.ah_attr.sl = 0;
@@ -110,7 +114,7 @@ static int modify_qp_to_rtr(struct ibv_qp *qp, uint32_t remote_qpn, uint16_t dli
 
     /* In dcfa gid is not set and for testing here it is also not set */
 #if 1
-#ifdef DCFA /* DCFA doesn't use gid */
+#ifdef DCFA     /* DCFA doesn't use gid */
 #else
     if (gid_idx >= 0) {
         attr.ah_attr.is_global = 1;
@@ -125,79 +129,89 @@ static int modify_qp_to_rtr(struct ibv_qp *qp, uint32_t remote_qpn, uint16_t dli
 #endif
 
     flags = IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN
-	| IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER;
+        | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER;
     rc = ibv_modify_qp(qp, &attr, flags);
-    if(rc) { dprintf("failed to modify QP state to RTR\n"); }
+    if (rc) {
+        dprintf("failed to modify QP state to RTR\n");
+    }
     return rc;
 }
 
-static int modify_qp_to_rts(struct ibv_qp *qp) {
-    struct ibv_qp_attr	attr;
-    int		flags;
-    int		rc;
+static int modify_qp_to_rts(struct ibv_qp *qp)
+{
+    struct ibv_qp_attr attr;
+    int flags;
+    int rc;
 
     memset(&attr, 0, sizeof(attr));
     attr.qp_state = IBV_QPS_RTS;
-    attr.timeout = (0x14); /* timeout 4.096us * 2^x */
+    attr.timeout = (0x14);      /* timeout 4.096us * 2^x */
     attr.retry_cnt = 7;
     attr.rnr_retry = 7;
     attr.sq_psn = 0;
     attr.max_rd_atomic = IBCOM_MAX_RD_ATOMIC;
     //attr.max_rd_atomic = 1;
-    //attr.max_rd_atomic = 0;	/* DCFA */
+    //attr.max_rd_atomic = 0;   /* DCFA */
 
     flags = IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT
-	| IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC;
+        | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC;
     rc = ibv_modify_qp(qp, &attr, flags);
-    if (rc){
+    if (rc) {
         fprintf(stderr, "failed to modify QP state to RTS\n");
     }
     return rc;
 }
 
 /* called from ibcomOpen if needed */
-static int ibcomDeviceinit() {
+static int ibcomDeviceinit()
+{
     int ibcom_errno = 0;
-    int		dev_num;
-    char	*dev_name;
+    int dev_num;
+    char *dev_name;
     int i;
-    
-    if (ib_initialized == 1) { 
+
+    if (ib_initialized == 1) {
         dprintf("ibcomDeviceinit,already initialized\n");
         return 0;
     }
-    if (ib_initialized == -1) return -1;
+    if (ib_initialized == -1)
+        return -1;
 
     /* initialize ibv_reg_mr cache */
     ibcom_RegisterCacheInit();
-    
+
     /* Get the device list */
     ib_devlist = ibv_get_device_list(&dev_num);
-    if(!ib_devlist || !dev_num) {
+    if (!ib_devlist || !dev_num) {
         fprintf(stderr, "No IB device is found\n");
         return -1;
     }
 
 #ifdef DCFA
-    for(i = 0; i < dev_num; i++) {
-        if(ib_devlist[i]) { goto dev_found; }
-    }    
+    for (i = 0; i < dev_num; i++) {
+        if (ib_devlist[i]) {
+            goto dev_found;
+        }
+    }
 #else
-    for(i = 0; i < dev_num; i++) {
-        if(!strcmp(ibv_get_device_name(ib_devlist[i]), "mlx4_0")) { goto dev_found; }
+    for (i = 0; i < dev_num; i++) {
+        if (!strcmp(ibv_get_device_name(ib_devlist[i]), "mlx4_0")) {
+            goto dev_found;
+        }
     }
 #endif
     IBCOM_ERR_SETANDJUMP(-1, printf("IB device not found"));
- dev_found:
+  dev_found:
 
     /* Open the requested device */
-    if(ib_ctx_export) {
+    if (ib_ctx_export) {
         ib_ctx = ib_ctx_export;
-    } else {
+    }
+    else {
         ib_ctx = ibv_open_device(ib_devlist[i]);
     }
     dprintf("ibcomDeviceinit,ib_ctx_export=%p,ib_ctx=%p\n", ib_ctx_export, ib_ctx);
-    if(!ib_ctx){	
+    if (!ib_ctx) {
         fprintf(stderr, "failed to open IB device\n");
         goto err_exit;
     }
@@ -205,56 +219,61 @@ static int ibcomDeviceinit() {
 #ifdef DCFA
 #else
     dev_name = strdup(ibv_get_device_name(ib_devlist[i]));
-	dprintf("ibcomDeviceinit,dev_name=%s\n", dev_name);
+    dprintf("ibcomDeviceinit,dev_name=%s\n", dev_name);
 #endif
     /* Create a PD */
-    if(ib_pd_export) {
+    if (ib_pd_export) {
         ib_pd = ib_pd_export;
-    } else {
+    }
+    else {
         ib_pd = ibv_alloc_pd(ib_ctx);
     }
     dprintf("ibcomDeviceinit,ib_pd_export=%p,ib_pd=%p\n", ib_pd_export, ib_pd);
-    if (!ib_pd){
+    if (!ib_pd) {
         fprintf(stderr, "ibv_alloc_pd failed\n");
         goto err_exit;
     }
     ib_pd_export = ib_pd;
 
     ib_initialized = 1;
- fn_exit:
+  fn_exit:
     return ibcom_errno;
 
- err_exit:
+  err_exit:
     ib_initialized = -1;
-    if (ib_devlist) ibv_free_device_list(ib_devlist);
-    if (ib_ctx) ibv_close_device(ib_ctx);
+    if (ib_devlist)
+        ibv_free_device_list(ib_devlist);
+    if (ib_ctx)
+        ibv_close_device(ib_ctx);
     return -1;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-static void ibcomClean(IbCom *conp) {
-    int	i;
+static void ibcomClean(IbCom * conp)
+{
+    int i;
 
-    if(conp->icom_qp) ibv_destroy_qp(conp->icom_qp);
-    if(conp->icom_mrlist && conp->icom_mrlen > 0){
-        switch(conp->open_flag) {
+    if (conp->icom_qp)
+        ibv_destroy_qp(conp->icom_qp);
+    if (conp->icom_mrlist && conp->icom_mrlen > 0) {
+        switch (conp->open_flag) {
         case IBCOM_OPEN_RC:
-            for(i = 0; i < IBCOM_NBUF_RDMA; i++) {
+            for (i = 0; i < IBCOM_NBUF_RDMA; i++) {
                 if (conp->icom_mrlist[i]) {
                     ibv_dereg_mr(conp->icom_mrlist[i]);
                 }
             }
             break;
         case IBCOM_OPEN_SCRATCH_PAD:
-            for(i = 0; i < IBCOM_NBUF_SCRATCH_PAD; i++) {
+            for (i = 0; i < IBCOM_NBUF_SCRATCH_PAD; i++) {
                 if (conp->icom_mrlist[i]) {
                     ibv_dereg_mr(conp->icom_mrlist[i]);
                 }
             }
             break;
-        case IBCOM_OPEN_UD: 
-            for(i = 0; i < IBCOM_NBUF_UD; i++) {
+        case IBCOM_OPEN_UD:
+            for (i = 0; i < IBCOM_NBUF_UD; i++) {
                 if (conp->icom_mrlist[i]) {
                     ibv_dereg_mr(conp->icom_mrlist[i]);
                 }
@@ -263,69 +282,77 @@ static void ibcomClean(IbCom *conp) {
         }
         free(conp->icom_mrlist);
     }
-    if(conp->icom_mem[IBCOM_RDMAWR_FROM]) {
+    if (conp->icom_mem[IBCOM_RDMAWR_FROM]) {
         munmap(conp->icom_mem[IBCOM_RDMAWR_FROM], IBCOM_RDMABUF_SZ);
     }
-    if(conp->icom_mem[IBCOM_RDMAWR_TO]) {
+    if (conp->icom_mem[IBCOM_RDMAWR_TO]) {
         munmap(conp->icom_mem[IBCOM_RDMAWR_TO], IBCOM_RDMABUF_SZ);
     }
-    if(conp->icom_scq) {
+    if (conp->icom_scq) {
         ibv_destroy_cq(conp->icom_scq);
     }
-    if(conp->icom_rcq) {
+    if (conp->icom_rcq) {
         ibv_destroy_cq(conp->icom_rcq);
     }
-    if(conp->icom_rmem) {
+    if (conp->icom_rmem) {
         free(conp->icom_rmem);
     }
-    if(conp->icom_rsize) {
+    if (conp->icom_rsize) {
         free(conp->icom_rsize);
     }
-    if(conp->icom_rkey) {
+    if (conp->icom_rkey) {
         free(conp->icom_rkey);
     }
     memset(conp, 0, sizeof(IbCom));
     // TODO: free ah, sge, command template, ...
 }
 
-int ibcomOpen(int ib_port, int ibcom_open_flag, int* condesc) {
+int ibcomOpen(int ib_port, int ibcom_open_flag, int *condesc)
+{
     int ibcom_errno = 0, ib_errno;
-    IbCom	*conp;
-    struct ibv_qp_init_attr	qp_init_attr;
-    struct ibv_sge		*sge;
-    struct ibv_send_wr		*sr;
-    struct ibv_recv_wr		*rr, *bad_wr;
-    int		mr_flags;
-    int		i;
+    IbCom *conp;
+    struct ibv_qp_init_attr qp_init_attr;
+    struct ibv_sge *sge;
+    struct ibv_send_wr *sr;
+    struct ibv_recv_wr *rr, *bad_wr;
+    int mr_flags;
+    int i;
 
     dprintf("ibcomOpen,port=%d,flag=%08x\n", ib_port, ibcom_open_flag);
 
     int ibcom_open_flag_conn = ibcom_open_flag;
-    if(ibcom_open_flag_conn != IBCOM_OPEN_RC &&
-       ibcom_open_flag_conn != IBCOM_OPEN_RC_LMT_PUT &&
-       ibcom_open_flag_conn != IBCOM_OPEN_UD &&
-       ibcom_open_flag_conn != IBCOM_OPEN_SCRATCH_PAD) {
+    if (ibcom_open_flag_conn != IBCOM_OPEN_RC &&
+        ibcom_open_flag_conn != IBCOM_OPEN_RC_LMT_PUT &&
+        ibcom_open_flag_conn != IBCOM_OPEN_UD && ibcom_open_flag_conn != IBCOM_OPEN_SCRATCH_PAD) {
         dprintf("ibcomOpen,bad flag\n");
         ibcom_errno = -1;
         goto fn_fail;
     }
 
     /* device open error */
-    if(ibcomDeviceinit() < 0) { ibcom_errno = -1; goto fn_fail; }
+    if (ibcomDeviceinit() < 0) {
+        ibcom_errno = -1;
+        goto fn_fail;
+    }
 
     /* no more connection can be estabilished */
-    if(maxcon == IBCOM_SIZE) { ibcom_errno = -1; goto fn_fail; }
+    if (maxcon == IBCOM_SIZE) {
+        ibcom_errno = -1;
+        goto fn_fail;
+    }
 
-    for(*condesc = 0; *condesc < IBCOM_SIZE; (*condesc)++) {
+    for (*condesc = 0; *condesc < IBCOM_SIZE; (*condesc)++) {
         //dprintf("*condesc=%d,used=%d\n", *condesc, contab[*condesc].icom_used);
-        if(contab[*condesc].icom_used == 0) { goto ok_cont; }
+        if (contab[*condesc].icom_used == 0) {
+            goto ok_cont;
+        }
     }
     /* count says not full, but we couldn't fine vacant slot */
     dprintf("contable has inconsistent\n");
-    ibcom_errno = -1; 
+    ibcom_errno = -1;
     goto fn_fail;
 
-ok_cont:
+  ok_cont:
     dprintf("ibcomOpen,condesc=%d\n", *condesc);
     conp = &contab[*condesc];
     memset(conp, 0, sizeof(IbCom));
@@ -333,7 +360,7 @@ ok_cont:
     conp->icom_port = ib_port;
     conp->open_flag = ibcom_open_flag;
     conp->rsr_seq_num_poll = 0; /* it means slot 0 is polled */
-    conp->rsr_seq_num_tail = -1; /* it means slot 0 is not released */
+    conp->rsr_seq_num_tail = -1;        /* it means slot 0 is not released */
     conp->rsr_seq_num_tail_last_sent = -1;
     conp->lsr_seq_num_tail = -1;
     conp->lsr_seq_num_tail_last_requested = -2;
@@ -343,16 +370,16 @@ ok_cont:
 
 #ifdef DCFA
 #else
-    if(ibv_query_port(ib_ctx, ib_port, &conp->icom_pattr)) {
+    if (ibv_query_port(ib_ctx, ib_port, &conp->icom_pattr)) {
         dprintf("ibv_query_port on port %u failed\n", ib_port);
         goto err_exit;
     }
 #endif
 
     /* Create send/recv CQ */
-    switch(ibcom_open_flag) {
+    switch (ibcom_open_flag) {
     case IBCOM_OPEN_RC:
-        if(!rc_shared_scq) {
+        if (!rc_shared_scq) {
 #ifdef DCFA
             rc_shared_scq = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
 #else
@@ -361,8 +388,8 @@ ok_cont:
             IBCOM_ERR_CHKANDJUMP(!rc_shared_scq, -1, dprintf("rc_shared_scq"));
         }
         conp->icom_scq = rc_shared_scq;
-    
-        if(!rc_shared_rcq) {
+
+        if (!rc_shared_rcq) {
 #ifdef DCFA
             rc_shared_rcq = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
 #else
@@ -373,7 +400,7 @@ ok_cont:
         conp->icom_rcq = rc_shared_rcq;
         break;
     case IBCOM_OPEN_SCRATCH_PAD:
-        if(!rc_shared_scq_scratch_pad) {
+        if (!rc_shared_scq_scratch_pad) {
 #ifdef DCFA
             rc_shared_scq_scratch_pad = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
 #else
@@ -382,8 +409,8 @@ ok_cont:
             IBCOM_ERR_CHKANDJUMP(!rc_shared_scq_scratch_pad, -1, dprintf("rc_shared_scq"));
         }
         conp->icom_scq = rc_shared_scq_scratch_pad;
-    
-        if(!rc_shared_rcq_scratch_pad) {
+
+        if (!rc_shared_rcq_scratch_pad) {
 #ifdef DCFA
             rc_shared_rcq_scratch_pad = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
 #else
@@ -394,7 +421,7 @@ ok_cont:
         conp->icom_rcq = rc_shared_rcq_scratch_pad;
         break;
     case IBCOM_OPEN_RC_LMT_PUT:
-        if(!rc_shared_scq_lmt_put) {
+        if (!rc_shared_scq_lmt_put) {
 #ifdef DCFA
             rc_shared_scq_lmt_put = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
 #else
@@ -403,8 +430,8 @@ ok_cont:
             IBCOM_ERR_CHKANDJUMP(!rc_shared_scq_lmt_put, -1, dprintf("rc_shared_scq"));
         }
         conp->icom_scq = rc_shared_scq_lmt_put;
-    
-        if(!rc_shared_rcq_lmt_put) {
+
+        if (!rc_shared_rcq_lmt_put) {
 #ifdef DCFA
             rc_shared_rcq_lmt_put = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
 #else
@@ -415,7 +442,7 @@ ok_cont:
         conp->icom_rcq = rc_shared_rcq_lmt_put;
         break;
     case IBCOM_OPEN_UD:
-        if(!ud_shared_scq) {
+        if (!ud_shared_scq) {
 #ifdef DCFA
             ud_shared_scq = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
 #else
@@ -425,17 +452,17 @@ ok_cont:
         }
         conp->icom_scq = ud_shared_scq;
 
-        if(!ud_shared_rcq) {
+        if (!ud_shared_rcq) {
 #ifdef DCFA
             ud_shared_rcq = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
 #else
             ud_shared_rcq = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY, NULL, NULL, 0);
 #endif
-            IBCOM_ERR_CHKANDJUMP(!ud_shared_rcq, -1, dprintf("ud_shared_rcq")); 
+            IBCOM_ERR_CHKANDJUMP(!ud_shared_rcq, -1, dprintf("ud_shared_rcq"));
         }
-        conp->icom_rcq = ud_shared_rcq; 
+        conp->icom_rcq = ud_shared_rcq;
         break;
-    }        
+    }
 
     /* Create QP */
     memset(&qp_init_attr, 0, sizeof(qp_init_attr));
@@ -446,7 +473,7 @@ ok_cont:
     qp_init_attr.cap.max_send_sge = IBCOM_MAX_SGE_CAPACITY;
     qp_init_attr.cap.max_recv_sge = IBCOM_MAX_SGE_CAPACITY;
     qp_init_attr.cap.max_inline_data = IBCOM_INLINE_DATA;
-    switch(ibcom_open_flag) {
+    switch (ibcom_open_flag) {
     case IBCOM_OPEN_RC:
     case IBCOM_OPEN_RC_LMT_PUT:
     case IBCOM_OPEN_SCRATCH_PAD:
@@ -468,7 +495,9 @@ ok_cont:
     conp->max_recv_wr = qp_init_attr.cap.max_recv_wr;
     conp->max_inline_data = qp_init_attr.cap.max_inline_data;
 
-    dprintf("ibcomOpen,max_send_wr=%d,max_recv_wr=%d,max_inline_data=%d\n", qp_init_attr.cap.max_send_wr, qp_init_attr.cap.max_recv_wr, qp_init_attr.cap.max_inline_data);
+    dprintf("ibcomOpen,max_send_wr=%d,max_recv_wr=%d,max_inline_data=%d\n",
+            qp_init_attr.cap.max_send_wr, qp_init_attr.cap.max_recv_wr,
+            qp_init_attr.cap.max_inline_data);
     dprintf("ibcomOpen,fd=%d,qpn=%08x\n", *condesc, conp->icom_qp->qp_num);
 #ifdef DCFA
     dprintf("ibcomOpen,fd=%d,lid=%04x\n", *condesc, ib_ctx->lid);
@@ -478,70 +507,80 @@ ok_cont:
 
 #ifdef DCFA
     /* DCFA doesn't use gid */
-    for(i = 0; i < 16; i++) { conp->icom_gid.raw[i] = 0; }
+    for (i = 0; i < 16; i++) {
+        conp->icom_gid.raw[i] = 0;
+    }
 #else
     ib_errno = ibv_query_gid(ib_ctx, ib_port, 0, &conp->icom_gid);
     IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibv_query_gid\n"));
 
     dprintf("ibcomOpen,fd=%d,my_gid=", *condesc);
-    for(i = 0; i < 16; i++) { dprintf("%02x", (int)conp->icom_gid.raw[i]); }
+    for (i = 0; i < 16; i++) {
+        dprintf("%02x", (int) conp->icom_gid.raw[i]);
+    }
     dprintf("\n");
 #endif
 
     /* buffers */
-    switch(ibcom_open_flag) {
+    switch (ibcom_open_flag) {
     case IBCOM_OPEN_RC:
         /* RDMA-write-from and -to local memory area */
-        conp->icom_mrlist = malloc(sizeof(struct ibv_mr*) * IBCOM_NBUF_RDMA);
-        memset(conp->icom_mrlist, 0, sizeof(struct ibv_mr*) * IBCOM_NBUF_RDMA);
+        conp->icom_mrlist = malloc(sizeof(struct ibv_mr *) * IBCOM_NBUF_RDMA);
+        memset(conp->icom_mrlist, 0, sizeof(struct ibv_mr *) * IBCOM_NBUF_RDMA);
         conp->icom_mrlen = IBCOM_NBUF_RDMA;
-        conp->icom_mem = (void **) malloc(sizeof(void**) * IBCOM_NBUF_RDMA);
+        conp->icom_mem = (void **) malloc(sizeof(void **) * IBCOM_NBUF_RDMA);
         //printf("open,icom_mem=%p\n", conp->icom_mem);
-        memset(conp->icom_mem, 0, sizeof(void**) * IBCOM_NBUF_RDMA);
-        conp->icom_msize = (int*) malloc(sizeof(int*)* IBCOM_NBUF_RDMA);
-        memset(conp->icom_msize, 0, sizeof(int*) * IBCOM_NBUF_RDMA);
-        mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE ;
-        
+        memset(conp->icom_mem, 0, sizeof(void **) * IBCOM_NBUF_RDMA);
+        conp->icom_msize = (int *) malloc(sizeof(int *) * IBCOM_NBUF_RDMA);
+        memset(conp->icom_msize, 0, sizeof(int *) * IBCOM_NBUF_RDMA);
+        mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE;
+
         /* RDMA-write-from local memory area */
         conp->icom_msize[IBCOM_RDMAWR_FROM] = IBCOM_RDMABUF_SZ;
-        conp->icom_mem[IBCOM_RDMAWR_FROM] = mmap(0, IBCOM_RDMABUF_SZ, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+        conp->icom_mem[IBCOM_RDMAWR_FROM] =
+            mmap(0, IBCOM_RDMABUF_SZ, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
         dprintf("IbcomOpen,mmap=%p,len=%d\n", conp->icom_mem[IBCOM_RDMAWR_FROM], IBCOM_RDMABUF_SZ);
-        if (conp->icom_mem[IBCOM_RDMAWR_FROM] == (void*)-1){
+        if (conp->icom_mem[IBCOM_RDMAWR_FROM] == (void *) -1) {
             fprintf(stderr, "failed to allocate buffer\n");
             goto err_exit;
         }
-        memset(conp->icom_mem[IBCOM_RDMAWR_FROM], 0 , conp->icom_msize[IBCOM_RDMAWR_FROM]);
-        
-        conp->icom_mrlist[IBCOM_RDMAWR_FROM] = ibcom_reg_mr_fetch(conp->icom_mem[IBCOM_RDMAWR_FROM], conp->icom_msize[IBCOM_RDMAWR_FROM]);
-        if (!conp->icom_mrlist[IBCOM_RDMAWR_FROM]){
+        memset(conp->icom_mem[IBCOM_RDMAWR_FROM], 0, conp->icom_msize[IBCOM_RDMAWR_FROM]);
+
+        conp->icom_mrlist[IBCOM_RDMAWR_FROM] =
+            ibcom_reg_mr_fetch(conp->icom_mem[IBCOM_RDMAWR_FROM],
+                               conp->icom_msize[IBCOM_RDMAWR_FROM]);
+        if (!conp->icom_mrlist[IBCOM_RDMAWR_FROM]) {
             fprintf(stderr, "ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags);
             goto err_exit;
         }
-        
+
         /* RDMA-write-to local memory area */
         conp->icom_msize[IBCOM_RDMAWR_TO] = IBCOM_RDMABUF_SZ;
 #if 0
-	int shmid = shmget(2, IBCOM_RDMABUF_SZ, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
-	IBCOM_ERR_CHKANDJUMP(shmid < 0, -1,  perror("shmget"));
-	conp->icom_mem[IBCOM_RDMAWR_TO] = shmat(shmid, 0, 0);
-	if(conp->icom_mem[IBCOM_RDMAWR_TO] == (char *)-1) {
-	  perror("Shared memory attach failure"); 
-	  shmctl(shmid, IPC_RMID, NULL);
-	  ibcom_errno = -1;
-	  goto fn_fail;
-	}
-#else
-        conp->icom_mem[IBCOM_RDMAWR_TO] = mmap(0, IBCOM_RDMABUF_SZ, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+        int shmid = shmget(2, IBCOM_RDMABUF_SZ, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
+        IBCOM_ERR_CHKANDJUMP(shmid < 0, -1, perror("shmget"));
+        conp->icom_mem[IBCOM_RDMAWR_TO] = shmat(shmid, 0, 0);
+        if (conp->icom_mem[IBCOM_RDMAWR_TO] == (char *) -1) {
+            perror("Shared memory attach failure");
+            shmctl(shmid, IPC_RMID, NULL);
+            ibcom_errno = -1;
+            goto fn_fail;
+        }
+#else
+        conp->icom_mem[IBCOM_RDMAWR_TO] =
+            mmap(0, IBCOM_RDMABUF_SZ, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
         dprintf("IbcomOpen,mmap=%p,len=%d\n", conp->icom_mem[IBCOM_RDMAWR_TO], IBCOM_RDMABUF_SZ);
 #endif
-        if (conp->icom_mem[IBCOM_RDMAWR_TO] == (void*)-1){
+        if (conp->icom_mem[IBCOM_RDMAWR_TO] == (void *) -1) {
             fprintf(stderr, "failed to allocate buffer\n");
             goto err_exit;
         }
-        memset(conp->icom_mem[IBCOM_RDMAWR_TO], 0 , conp->icom_msize[IBCOM_RDMAWR_TO]);
-        
-        conp->icom_mrlist[IBCOM_RDMAWR_TO] = ibv_reg_mr(ib_pd, conp->icom_mem[IBCOM_RDMAWR_TO], conp->icom_msize[IBCOM_RDMAWR_TO], mr_flags);
-        if (!conp->icom_mrlist[IBCOM_RDMAWR_TO]){
+        memset(conp->icom_mem[IBCOM_RDMAWR_TO], 0, conp->icom_msize[IBCOM_RDMAWR_TO]);
+
+        conp->icom_mrlist[IBCOM_RDMAWR_TO] =
+            ibv_reg_mr(ib_pd, conp->icom_mem[IBCOM_RDMAWR_TO], conp->icom_msize[IBCOM_RDMAWR_TO],
+                       mr_flags);
+        if (!conp->icom_mrlist[IBCOM_RDMAWR_TO]) {
             fprintf(stderr, "ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags);
             goto err_exit;
         }
@@ -551,78 +590,89 @@ ok_cont:
         dprintf("ibcomOpen,fd=%d,rmem=%p\n", *condesc, conp->icom_mrlist[IBCOM_RDMAWR_TO]->addr);
 #endif
         dprintf("ibcomOpen,fd=%d,rkey=%08x\n", *condesc, conp->icom_mrlist[IBCOM_RDMAWR_TO]->rkey);
-        
+
         /* RDMA-write-to remote memory area */
-        conp->icom_rmem = (void**) malloc(sizeof(void**) * IBCOM_NBUF_RDMA);
-        if (conp->icom_rmem == 0) goto err_exit;
-        memset(conp->icom_rmem, 0, sizeof(void**) * IBCOM_NBUF_RDMA);
-        
-        conp->icom_rsize = (size_t*) malloc(sizeof(void**) * IBCOM_NBUF_RDMA);
-        if (conp->icom_rsize == 0) goto err_exit;
-        memset(conp->icom_rsize, 0, sizeof(void**) * IBCOM_NBUF_RDMA);
-        
-        conp->icom_rkey = (int*) malloc(sizeof(int) * IBCOM_NBUF_RDMA);
-        if (conp->icom_rkey == 0) goto err_exit;
-        memset(conp->icom_rkey, 0, sizeof(int) *  IBCOM_NBUF_RDMA);
+        conp->icom_rmem = (void **) malloc(sizeof(void **) * IBCOM_NBUF_RDMA);
+        if (conp->icom_rmem == 0)
+            goto err_exit;
+        memset(conp->icom_rmem, 0, sizeof(void **) * IBCOM_NBUF_RDMA);
+
+        conp->icom_rsize = (size_t *) malloc(sizeof(void **) * IBCOM_NBUF_RDMA);
+        if (conp->icom_rsize == 0)
+            goto err_exit;
+        memset(conp->icom_rsize, 0, sizeof(void **) * IBCOM_NBUF_RDMA);
+
+        conp->icom_rkey = (int *) malloc(sizeof(int) * IBCOM_NBUF_RDMA);
+        if (conp->icom_rkey == 0)
+            goto err_exit;
+        memset(conp->icom_rkey, 0, sizeof(int) * IBCOM_NBUF_RDMA);
         break;
     case IBCOM_OPEN_SCRATCH_PAD:
         /* RDMA-write-from and -to local memory area */
-        conp->icom_mrlist = malloc(sizeof(struct ibv_mr*) * IBCOM_NBUF_SCRATCH_PAD);
-        memset(conp->icom_mrlist, 0, sizeof(struct ibv_mr*) * IBCOM_NBUF_SCRATCH_PAD);
+        conp->icom_mrlist = malloc(sizeof(struct ibv_mr *) * IBCOM_NBUF_SCRATCH_PAD);
+        memset(conp->icom_mrlist, 0, sizeof(struct ibv_mr *) * IBCOM_NBUF_SCRATCH_PAD);
         conp->icom_mrlen = IBCOM_NBUF_SCRATCH_PAD;
-        conp->icom_mem = (void **) malloc(sizeof(void**) * IBCOM_NBUF_SCRATCH_PAD);
-        memset(conp->icom_mem, 0, sizeof(void**) * IBCOM_NBUF_SCRATCH_PAD);
-        conp->icom_msize = (int*) malloc(sizeof(int*) * IBCOM_NBUF_SCRATCH_PAD);
-        memset(conp->icom_msize, 0, sizeof(int*) * IBCOM_NBUF_SCRATCH_PAD);
-        mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE ;
-        
+        conp->icom_mem = (void **) malloc(sizeof(void **) * IBCOM_NBUF_SCRATCH_PAD);
+        memset(conp->icom_mem, 0, sizeof(void **) * IBCOM_NBUF_SCRATCH_PAD);
+        conp->icom_msize = (int *) malloc(sizeof(int *) * IBCOM_NBUF_SCRATCH_PAD);
+        memset(conp->icom_msize, 0, sizeof(int *) * IBCOM_NBUF_SCRATCH_PAD);
+        mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE;
+
         /* RDMA-write-to remote memory area */
-        conp->icom_rmem = (void**)malloc(sizeof(void**) * IBCOM_NBUF_SCRATCH_PAD);
+        conp->icom_rmem = (void **) malloc(sizeof(void **) * IBCOM_NBUF_SCRATCH_PAD);
         IBCOM_ERR_CHKANDJUMP(conp->icom_rmem == 0, -1, dprintf("malloc failed\n"));
-        memset(conp->icom_rmem, 0, sizeof(void**) * IBCOM_NBUF_SCRATCH_PAD);
-        
-        conp->icom_rsize = (size_t*)malloc(sizeof(void**) * IBCOM_NBUF_SCRATCH_PAD);
+        memset(conp->icom_rmem, 0, sizeof(void **) * IBCOM_NBUF_SCRATCH_PAD);
+
+        conp->icom_rsize = (size_t *) malloc(sizeof(void **) * IBCOM_NBUF_SCRATCH_PAD);
         IBCOM_ERR_CHKANDJUMP(conp->icom_rsize == 0, -1, dprintf("malloc failed\n"));
-        memset(conp->icom_rsize, 0, sizeof(void**) * IBCOM_NBUF_SCRATCH_PAD);
+        memset(conp->icom_rsize, 0, sizeof(void **) * IBCOM_NBUF_SCRATCH_PAD);
 
-        conp->icom_rkey = (int*)malloc(sizeof(int) * IBCOM_NBUF_SCRATCH_PAD);
+        conp->icom_rkey = (int *) malloc(sizeof(int) * IBCOM_NBUF_SCRATCH_PAD);
         IBCOM_ERR_CHKANDJUMP(conp->icom_rkey == 0, -1, dprintf("malloc failed\n"));
-        memset(conp->icom_rkey, 0, sizeof(int) *  IBCOM_NBUF_SCRATCH_PAD);
+        memset(conp->icom_rkey, 0, sizeof(int) * IBCOM_NBUF_SCRATCH_PAD);
         break;
 
     case IBCOM_OPEN_UD:
         /* UD-write-from and -to local memory area */
-        conp->icom_mrlist = malloc(sizeof(struct ibv_mr*) * IBCOM_NBUF_UD);
-        memset(conp->icom_mrlist, 0, sizeof(struct ibv_mr*) * IBCOM_NBUF_UD);
+        conp->icom_mrlist = malloc(sizeof(struct ibv_mr *) * IBCOM_NBUF_UD);
+        memset(conp->icom_mrlist, 0, sizeof(struct ibv_mr *) * IBCOM_NBUF_UD);
         conp->icom_mrlen = IBCOM_NBUF_UD;
-        conp->icom_mem = (void **) malloc(sizeof(void**) * IBCOM_NBUF_UD);
-        memset(conp->icom_mem, 0, sizeof(void**) * IBCOM_NBUF_UD);
-        conp->icom_msize = (int*) malloc(sizeof(int*)* IBCOM_NBUF_UD);
-        memset(conp->icom_msize, 0, sizeof(int*) * IBCOM_NBUF_UD);
-        mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE ;
-        
+        conp->icom_mem = (void **) malloc(sizeof(void **) * IBCOM_NBUF_UD);
+        memset(conp->icom_mem, 0, sizeof(void **) * IBCOM_NBUF_UD);
+        conp->icom_msize = (int *) malloc(sizeof(int *) * IBCOM_NBUF_UD);
+        memset(conp->icom_msize, 0, sizeof(int *) * IBCOM_NBUF_UD);
+        mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE;
+
         /* UD-write-from local memory area */
-        IBCOM_ERR_CHKANDJUMP(IBCOM_UDBUF_SZ <= 40, -1, dprintf("buf_size too short\n")); 
+        IBCOM_ERR_CHKANDJUMP(IBCOM_UDBUF_SZ <= 40, -1, dprintf("buf_size too short\n"));
         conp->icom_msize[IBCOM_UDWR_FROM] = IBCOM_UDBUF_SZ;
-        conp->icom_mem[IBCOM_UDWR_FROM] = mmap(0, IBCOM_UDBUF_SZ, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+        conp->icom_mem[IBCOM_UDWR_FROM] =
+            mmap(0, IBCOM_UDBUF_SZ, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
         dprintf("IbcomOpen,mmap=%p,len=%d\n", conp->icom_mem[IBCOM_UDWR_FROM], IBCOM_UDBUF_SZ);
-        IBCOM_ERR_CHKANDJUMP(conp->icom_mem[IBCOM_UDWR_FROM] == (void*)-1, -1, dprintf("failed to allocate buffer\n"));
-        memset(conp->icom_mem[IBCOM_UDWR_FROM], 0 , conp->icom_msize[IBCOM_UDWR_FROM]);
-        
-        conp->icom_mrlist[IBCOM_UDWR_FROM] = ibcom_reg_mr_fetch(conp->icom_mem[IBCOM_UDWR_FROM], conp->icom_msize[IBCOM_UDWR_FROM]);
-        IBCOM_ERR_CHKANDJUMP(!conp->icom_mrlist[IBCOM_UDWR_FROM], -1, dprintf("ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags));
+        IBCOM_ERR_CHKANDJUMP(conp->icom_mem[IBCOM_UDWR_FROM] == (void *) -1, -1,
+                             dprintf("failed to allocate buffer\n"));
+        memset(conp->icom_mem[IBCOM_UDWR_FROM], 0, conp->icom_msize[IBCOM_UDWR_FROM]);
+
+        conp->icom_mrlist[IBCOM_UDWR_FROM] =
+            ibcom_reg_mr_fetch(conp->icom_mem[IBCOM_UDWR_FROM], conp->icom_msize[IBCOM_UDWR_FROM]);
+        IBCOM_ERR_CHKANDJUMP(!conp->icom_mrlist[IBCOM_UDWR_FROM], -1,
+                             dprintf("ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags));
 
         /* UD-write-to local memory area */
         /* addr to addr+39 are not filled, addr+40 to addr+length-1 are filled with payload */
-        IBCOM_ERR_CHKANDJUMP(IBCOM_UDBUF_SZ <= 40, -1, dprintf("buf_size too short\n")); 
+        IBCOM_ERR_CHKANDJUMP(IBCOM_UDBUF_SZ <= 40, -1, dprintf("buf_size too short\n"));
         conp->icom_msize[IBCOM_UDWR_TO] = IBCOM_UDBUF_SZ;
-        conp->icom_mem[IBCOM_UDWR_TO] = mmap(0, IBCOM_UDBUF_SZ, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+        conp->icom_mem[IBCOM_UDWR_TO] =
+            mmap(0, IBCOM_UDBUF_SZ, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
         dprintf("IbcomOpen,mmap=%p,len=%d\n", conp->icom_mem[IBCOM_UDWR_TO], IBCOM_UDBUF_SZ);
-        IBCOM_ERR_CHKANDJUMP(conp->icom_mem[IBCOM_UDWR_TO] == (void*)-1, -1, dprintf("failed to allocate buffer\n"));
-        memset(conp->icom_mem[IBCOM_UDWR_TO], 0 , conp->icom_msize[IBCOM_UDWR_TO]);
-        
-        conp->icom_mrlist[IBCOM_UDWR_TO] = ibcom_reg_mr_fetch(conp->icom_mem[IBCOM_UDWR_TO], conp->icom_msize[IBCOM_UDWR_TO]);
-        IBCOM_ERR_CHKANDJUMP(!conp->icom_mrlist[IBCOM_UDWR_TO], -1, dprintf("ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags));
+        IBCOM_ERR_CHKANDJUMP(conp->icom_mem[IBCOM_UDWR_TO] == (void *) -1, -1,
+                             dprintf("failed to allocate buffer\n"));
+        memset(conp->icom_mem[IBCOM_UDWR_TO], 0, conp->icom_msize[IBCOM_UDWR_TO]);
+
+        conp->icom_mrlist[IBCOM_UDWR_TO] =
+            ibcom_reg_mr_fetch(conp->icom_mem[IBCOM_UDWR_TO], conp->icom_msize[IBCOM_UDWR_TO]);
+        IBCOM_ERR_CHKANDJUMP(!conp->icom_mrlist[IBCOM_UDWR_TO], -1,
+                             dprintf("ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags));
 
         /* initialize arena allocator for IBCOM_UDWR_TO */
         //ibcom_udbuf_init(conp->icom_mem[IBCOM_UDWR_TO]);
@@ -636,38 +686,48 @@ ok_cont:
     }
 
     /* command templates */
-    switch(ibcom_open_flag) {
-    case IBCOM_OPEN_RC: 
-        
+    switch (ibcom_open_flag) {
+    case IBCOM_OPEN_RC:
+
         /* SR (send request) template */
-        conp->icom_sr = (struct ibv_send_wr*)malloc(sizeof(struct ibv_send_wr) * IBCOM_RC_SR_NTEMPLATE);
+        conp->icom_sr =
+            (struct ibv_send_wr *) malloc(sizeof(struct ibv_send_wr) * IBCOM_RC_SR_NTEMPLATE);
         memset(conp->icom_sr, 0, sizeof(struct ibv_send_wr) * IBCOM_RC_SR_NTEMPLATE);
-        
+
         int i;
-        for(i = 0; i < IBCOM_SMT_INLINE_NCHAIN; i++) {
+        for (i = 0; i < IBCOM_SMT_INLINE_NCHAIN; i++) {
             /* SGE (RDMA-send-from memory) template */
 #ifdef DCFA
-            memset(&(conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[0]), 0, sizeof(struct ibv_sge) * WR_SG_NUM);
+            memset(&(conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[0]), 0,
+                   sizeof(struct ibv_sge) * WR_SG_NUM);
 #else
-            struct ibv_sge *sge = (struct ibv_sge*)malloc(sizeof(struct ibv_sge) * IBCOM_SMT_INLINE_INITIATOR_NSGE);
+            struct ibv_sge *sge =
+                (struct ibv_sge *) malloc(sizeof(struct ibv_sge) * IBCOM_SMT_INLINE_INITIATOR_NSGE);
             memset(sge, 0, sizeof(struct ibv_sge) * IBCOM_SMT_INLINE_INITIATOR_NSGE);
-#endif            
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].next = (i == IBCOM_SMT_INLINE_NCHAIN - 1) ? NULL : &conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i + 1];
+#endif
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].next =
+                (i ==
+                 IBCOM_SMT_INLINE_NCHAIN - 1) ? NULL : &conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 +
+                                                                      i + 1];
 #ifdef DCFA
 #else
             conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list = sge;
 #endif
             conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].opcode = IBV_WR_RDMA_WRITE;
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].send_flags =
+                IBV_SEND_SIGNALED | IBV_SEND_INLINE;
         }
 
         {
 #ifdef DCFA
-            memset(&(conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[0]), 0, sizeof(struct ibv_sge) * WR_SG_NUM);
+            memset(&(conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[0]), 0,
+                   sizeof(struct ibv_sge) * WR_SG_NUM);
 #else
-            struct ibv_sge *sge = (struct ibv_sge*) malloc(sizeof(struct ibv_sge) * IBCOM_SMT_NOINLINE_INITIATOR_NSGE);
+            struct ibv_sge *sge =
+                (struct ibv_sge *) malloc(sizeof(struct ibv_sge) *
+                                          IBCOM_SMT_NOINLINE_INITIATOR_NSGE);
             memset(sge, 0, sizeof(struct ibv_sge) * IBCOM_SMT_NOINLINE_INITIATOR_NSGE);
-#endif            
+#endif
             conp->icom_sr[IBCOM_SMT_NOINLINE].next = NULL;
 #ifdef DCFA
 #else
@@ -679,9 +739,11 @@ ok_cont:
         {
             /* SR (send request) template for IBCOM_LMT_INITIATOR */
 #ifdef DCFA
-            memset(&(conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[0]), 0, sizeof(struct ibv_sge) * WR_SG_NUM);
+            memset(&(conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[0]), 0,
+                   sizeof(struct ibv_sge) * WR_SG_NUM);
 #else
-            struct ibv_sge *sge = (struct ibv_sge*) malloc(sizeof(struct ibv_sge) * IBCOM_LMT_INITIATOR_NSGE);
+            struct ibv_sge *sge =
+                (struct ibv_sge *) malloc(sizeof(struct ibv_sge) * IBCOM_LMT_INITIATOR_NSGE);
             memset(sge, 0, sizeof(struct ibv_sge) * IBCOM_LMT_INITIATOR_NSGE);
 #endif
             conp->icom_sr[IBCOM_LMT_INITIATOR].next = NULL;
@@ -691,13 +753,13 @@ ok_cont:
 #endif
             conp->icom_sr[IBCOM_LMT_INITIATOR].opcode = IBV_WR_RDMA_READ;
             conp->icom_sr[IBCOM_LMT_INITIATOR].send_flags = IBV_SEND_SIGNALED;
-        }            
+        }
 
-        /* SR (send request) template for IBCOM_LMT_PUT */ /* for lmt-put-done */
+        /* SR (send request) template for IBCOM_LMT_PUT *//* for lmt-put-done */
 #ifdef DCFA
         memset(&(conp->icom_sr[IBCOM_LMT_PUT].sg_list[0]), 0, sizeof(struct ibv_sge) * WR_SG_NUM);
 #else
-        sge = (struct ibv_sge*) malloc(sizeof(struct ibv_sge) * IBCOM_LMT_PUT_NSGE);
+        sge = (struct ibv_sge *) malloc(sizeof(struct ibv_sge) * IBCOM_LMT_PUT_NSGE);
         memset(sge, 0, sizeof(struct ibv_sge) * IBCOM_LMT_PUT_NSGE);
 #endif
         conp->icom_sr[IBCOM_LMT_PUT].next = NULL;
@@ -710,14 +772,15 @@ ok_cont:
 
         /* SR (send request) template for IBCOM_RDMAWR_FRMFIXED */
         /* not implemented */
-        
+
         /* SGE (scatter gather element) template for recv */
         /* nothing is required for RDMA-write */
-        
+
         /* RR (receive request) template for IBCOM_RDMAWR_RESPONDER */
-        conp->icom_rr = (struct ibv_recv_wr*)malloc(sizeof(struct ibv_recv_wr) * IBCOM_RC_RR_NTEMPLATE);
+        conp->icom_rr =
+            (struct ibv_recv_wr *) malloc(sizeof(struct ibv_recv_wr) * IBCOM_RC_RR_NTEMPLATE);
         memset(conp->icom_rr, 0, sizeof(struct ibv_recv_wr) * IBCOM_RC_RR_NTEMPLATE);
-        
+
         /* create one dummy RR to ibv_post_recv */
         conp->icom_rr[IBCOM_RDMAWR_RESPONDER].next = NULL;
 #ifdef DCFA
@@ -727,37 +790,45 @@ ok_cont:
         conp->icom_rr[IBCOM_RDMAWR_RESPONDER].num_sge = 0;
         break;
 
-    case IBCOM_OPEN_SCRATCH_PAD: {
-        /* SR (send request) template */
-        conp->icom_sr = (struct ibv_send_wr*)malloc(sizeof(struct ibv_send_wr) * IBCOM_SCRATCH_PAD_SR_NTEMPLATE);
-        memset(conp->icom_sr, 0, sizeof(struct ibv_send_wr) * IBCOM_SCRATCH_PAD_SR_NTEMPLATE);
+    case IBCOM_OPEN_SCRATCH_PAD:{
+            /* SR (send request) template */
+            conp->icom_sr =
+                (struct ibv_send_wr *) malloc(sizeof(struct ibv_send_wr) *
+                                              IBCOM_SCRATCH_PAD_SR_NTEMPLATE);
+            memset(conp->icom_sr, 0, sizeof(struct ibv_send_wr) * IBCOM_SCRATCH_PAD_SR_NTEMPLATE);
 
-        /* SR (send request) template for IBCOM_SCRATCH_PAD_INITIATOR */
+            /* SR (send request) template for IBCOM_SCRATCH_PAD_INITIATOR */
 #ifdef DCFA
-        memset(&(conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0]), 0, sizeof(struct ibv_sge) * WR_SG_NUM);
+            memset(&(conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0]), 0,
+                   sizeof(struct ibv_sge) * WR_SG_NUM);
 #else
-        struct ibv_sge *sge = (struct ibv_sge*) malloc(sizeof(struct ibv_sge) * IBCOM_SCRATCH_PAD_INITIATOR_NSGE);
-        memset(sge, 0, sizeof(struct ibv_sge) * IBCOM_SCRATCH_PAD_INITIATOR_NSGE);
+            struct ibv_sge *sge =
+                (struct ibv_sge *) malloc(sizeof(struct ibv_sge) *
+                                          IBCOM_SCRATCH_PAD_INITIATOR_NSGE);
+            memset(sge, 0, sizeof(struct ibv_sge) * IBCOM_SCRATCH_PAD_INITIATOR_NSGE);
 #endif
-        conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].next = NULL;
+            conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].next = NULL;
 #ifdef DCFA
 #else
-        conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list = sge;
+            conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list = sge;
 #endif
-        conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].num_sge = 1;
-        conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].opcode = IBV_WR_RDMA_WRITE;
-        conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].send_flags = IBV_SEND_SIGNALED;
-        break; }
+            conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].num_sge = 1;
+            conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].opcode = IBV_WR_RDMA_WRITE;
+            conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].send_flags = IBV_SEND_SIGNALED;
+            break;
+        }
 
     case IBCOM_OPEN_RC_LMT_PUT:
         /* SR (send request) template */
-        conp->icom_sr = (struct ibv_send_wr*)malloc(sizeof(struct ibv_send_wr) * IBCOM_RC_SR_LMT_PUT_NTEMPLATE);
+        conp->icom_sr =
+            (struct ibv_send_wr *) malloc(sizeof(struct ibv_send_wr) *
+                                          IBCOM_RC_SR_LMT_PUT_NTEMPLATE);
         memset(conp->icom_sr, 0, sizeof(struct ibv_send_wr) * IBCOM_RC_SR_LMT_PUT_NTEMPLATE);
         /* SR (send request) template for IBCOM_LMT_PUT */
 #ifdef DCFA
         memset(&(conp->icom_sr[IBCOM_LMT_PUT].sg_list[0]), 0, sizeof(struct ibv_sge) * WR_SG_NUM);
 #else
-        sge = (struct ibv_sge*) malloc(sizeof(struct ibv_sge) * IBCOM_LMT_PUT_NSGE);
+        sge = (struct ibv_sge *) malloc(sizeof(struct ibv_sge) * IBCOM_LMT_PUT_NSGE);
         memset(sge, 0, sizeof(struct ibv_sge) * IBCOM_LMT_PUT_NSGE);
 #endif
         conp->icom_sr[IBCOM_LMT_PUT].next = NULL;
@@ -775,33 +846,35 @@ ok_cont:
         sge = &(conp->icom_sr[IBCOM_UD_INITIATOR].sg_list[0]);
         memset(sge, 0, sizeof(struct ibv_sge) * WR_SG_NUM);
 #else
-        sge = (struct ibv_sge*)calloc(1, sizeof(struct ibv_sge));
+        sge = (struct ibv_sge *) calloc(1, sizeof(struct ibv_sge));
 #endif
         /* addr to addr + length - 1 will be on the payload, but search backword for "<= 40" */
-        sge[0].addr = (uint64_t)conp->icom_mem[IBCOM_UDWR_FROM] + 40; 
+        sge[0].addr = (uint64_t) conp->icom_mem[IBCOM_UDWR_FROM] + 40;
         sge[0].length = IBCOM_UDBUF_SZSEG - 40;
         sge[0].lkey = conp->icom_mrlist[IBCOM_UDWR_FROM]->lkey;
-        
 
-        conp->icom_ah_attr = (struct ibv_ah_attr*)calloc(IBCOM_UD_SR_NTEMPLATE, sizeof(struct ibv_ah_attr));
+
+        conp->icom_ah_attr =
+            (struct ibv_ah_attr *) calloc(IBCOM_UD_SR_NTEMPLATE, sizeof(struct ibv_ah_attr));
 
         conp->icom_ah_attr[IBCOM_UD_INITIATOR].sl = 0;
         conp->icom_ah_attr[IBCOM_UD_INITIATOR].src_path_bits = 0;
         conp->icom_ah_attr[IBCOM_UD_INITIATOR].static_rate = 0; /* not limit on static rate (100% port speed) */
         conp->icom_ah_attr[IBCOM_UD_INITIATOR].is_global = 0;
         conp->icom_ah_attr[IBCOM_UD_INITIATOR].port_num = conp->icom_port;
-        
+
 #if 0
         conp->icom_ah_attr[IBCOM_UD_INITIATOR].is_global = 1;
         conp->icom_ah_attr[IBCOM_UD_INITIATOR].grh.flow_label = 0;
-        conp->icom_ah_attr[IBCOM_UD_INITIATOR].grh.sgid_index = 0; /* what is this? */
+        conp->icom_ah_attr[IBCOM_UD_INITIATOR].grh.sgid_index = 0;      /* what is this? */
         conp->icom_ah_attr[IBCOM_UD_INITIATOR].grh.hop_limit = 1;
         conp->icom_ah_attr[IBCOM_UD_INITIATOR].grh.traffic_class = 0;
 #endif
 
         /* SR (send request) template for IBCOM_UD_INITIATOR */
-        conp->icom_sr = (struct ibv_send_wr*)calloc(IBCOM_UD_SR_NTEMPLATE, sizeof(struct ibv_send_wr));
-        
+        conp->icom_sr =
+            (struct ibv_send_wr *) calloc(IBCOM_UD_SR_NTEMPLATE, sizeof(struct ibv_send_wr));
+
         conp->icom_sr[IBCOM_UD_INITIATOR].next = NULL;
 #ifdef DCFA
 #else
@@ -812,21 +885,22 @@ ok_cont:
         conp->icom_sr[IBCOM_UD_INITIATOR].send_flags = IBV_SEND_SIGNALED;
 
         conp->icom_sr[IBCOM_UD_INITIATOR].wr.ud.remote_qkey = IBCOM_QKEY;
-        
+
         /* SGE (scatter gather element) template for recv */
 #ifdef DCFA
         sge = &(conp->icom_rr[IBCOM_UD_RESPONDER].sg_list[0]);
         memset(sge, 0, sizeof(struct ibv_sge) * WR_SG_NUM);
 #else
-        sge = (struct ibv_sge*)calloc(1, sizeof(struct ibv_sge));
+        sge = (struct ibv_sge *) calloc(1, sizeof(struct ibv_sge));
 #endif
-        sge[0].addr = (uint64_t)conp->icom_mem[IBCOM_UDWR_TO];
+        sge[0].addr = (uint64_t) conp->icom_mem[IBCOM_UDWR_TO];
         sge[0].length = IBCOM_UDBUF_SZ;
-        sge[0].lkey =  conp->icom_mrlist[IBCOM_UDWR_TO]->lkey;
-        
+        sge[0].lkey = conp->icom_mrlist[IBCOM_UDWR_TO]->lkey;
+
         /* RR (receive request) template for IBCOM_UD_RESPONDER */
-        conp->icom_rr = (struct ibv_recv_wr*)calloc(IBCOM_UD_RR_NTEMPLATE, sizeof(struct ibv_recv_wr));
-        
+        conp->icom_rr =
+            (struct ibv_recv_wr *) calloc(IBCOM_UD_RR_NTEMPLATE, sizeof(struct ibv_recv_wr));
+
         /* create one dummy RR to ibv_post_recv */
         conp->icom_rr[IBCOM_UD_RESPONDER].next = NULL;
 #ifdef DCFA
@@ -836,15 +910,15 @@ ok_cont:
         conp->icom_rr[IBCOM_UD_RESPONDER].num_sge = 1;
         break;
     }
-    
+
     maxcon++;
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- err_exit:
+  err_exit:
     ibcomClean(conp);
     return -1;
- fn_fail:
+  fn_fail:
     ibcomClean(conp);
     goto fn_exit;
 }
@@ -852,71 +926,82 @@ ok_cont:
 /* 1. allocate memory area if it's not allocated or reuse it if it's allocated
    2. ibv_reg_mr it and store rkey to conp->icom_mrlist
    buf is output */
-int ibcom_alloc(int condesc, int sz) {
-    IbCom	*conp;
+int ibcom_alloc(int condesc, int sz)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
     int mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
 
-    switch(conp->open_flag) {
-        
+    switch (conp->open_flag) {
+
     case IBCOM_OPEN_SCRATCH_PAD:
         /* RDMA-write-to local memory area */
-        if(!scratch_pad) {
-            scratch_pad = mmap(0, sz, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+        if (!scratch_pad) {
+            scratch_pad = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
             dprintf("ibcom_alloc,mmap=%p,len=%d\n", scratch_pad, sz);
-            IBCOM_ERR_CHKANDJUMP(scratch_pad == (void*)-1, -1, dprintf("failed to allocate buffer\n")); 
+            IBCOM_ERR_CHKANDJUMP(scratch_pad == (void *) -1, -1,
+                                 dprintf("failed to allocate buffer\n"));
             dprintf("ibcom_alloc,scratch_pad=%p\n", scratch_pad);
-            memset(scratch_pad, 0 , sz);
+            memset(scratch_pad, 0, sz);
         }
         conp->icom_mem[IBCOM_SCRATCH_PAD_TO] = scratch_pad;
         conp->icom_msize[IBCOM_SCRATCH_PAD_TO] = sz;
-        
-        conp->icom_mrlist[IBCOM_SCRATCH_PAD_TO] = ibcom_reg_mr_fetch(conp->icom_mem[IBCOM_SCRATCH_PAD_TO], conp->icom_msize[IBCOM_SCRATCH_PAD_TO]);
-        IBCOM_ERR_CHKANDJUMP(!conp->icom_mrlist[IBCOM_SCRATCH_PAD_TO], -1, dprintf("ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags));
-        
+
+        conp->icom_mrlist[IBCOM_SCRATCH_PAD_TO] =
+            ibcom_reg_mr_fetch(conp->icom_mem[IBCOM_SCRATCH_PAD_TO],
+                               conp->icom_msize[IBCOM_SCRATCH_PAD_TO]);
+        IBCOM_ERR_CHKANDJUMP(!conp->icom_mrlist[IBCOM_SCRATCH_PAD_TO], -1,
+                             dprintf("ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags));
+
 #ifdef DCFA
-        dprintf("ibcom_alloc,fd=%d,rmem=%p\n", condesc, conp->icom_mrlist[IBCOM_SCRATCH_PAD_TO]->buf);
+        dprintf("ibcom_alloc,fd=%d,rmem=%p\n", condesc,
+                conp->icom_mrlist[IBCOM_SCRATCH_PAD_TO]->buf);
 #else
-        dprintf("ibcom_alloc,fd=%d,rmem=%p\n", condesc, conp->icom_mrlist[IBCOM_SCRATCH_PAD_TO]->addr);
+        dprintf("ibcom_alloc,fd=%d,rmem=%p\n", condesc,
+                conp->icom_mrlist[IBCOM_SCRATCH_PAD_TO]->addr);
 #endif
-        dprintf("ibcom_alloc,fd=%d,rkey=%08x\n", condesc, conp->icom_mrlist[IBCOM_SCRATCH_PAD_TO]->rkey);
+        dprintf("ibcom_alloc,fd=%d,rkey=%08x\n", condesc,
+                conp->icom_mrlist[IBCOM_SCRATCH_PAD_TO]->rkey);
         break;
     default:
-        IBCOM_ERR_CHKANDJUMP(1, -1, dprintf("ibcom_alloc, invalid open_flag=%d\n", conp->open_flag));
+        IBCOM_ERR_CHKANDJUMP(1, -1,
+                             dprintf("ibcom_alloc, invalid open_flag=%d\n", conp->open_flag));
         break;
     }
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_close(int condesc) {
-    IbCom	*conp;
+int ibcom_close(int condesc)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
-    
+
     dprintf("ibcom_close,condesc=%d\n", condesc);
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
     ibcomClean(conp);
     --maxcon;
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_rts(int condesc, int remote_qpnum, uint16_t remote_lid, union ibv_gid *remote_gid) {
+int ibcom_rts(int condesc, int remote_qpnum, uint16_t remote_lid, union ibv_gid *remote_gid)
+{
     IbCom *conp;
     int ibcom_errno = 0;
     int ib_errno;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
-    if(conp->icom_connected == 1) {
+    if (conp->icom_connected == 1) {
         ibcom_errno = -1;
         goto fn_fail;
     }
@@ -924,28 +1009,30 @@ int ibcom_rts(int condesc, int remote_qpnum, uint16_t remote_lid, union ibv_gid
     struct ibv_qp_attr attr;
     int flags;
 
-    switch(conp->open_flag) {
+    switch (conp->open_flag) {
     case IBCOM_OPEN_RC:
     case IBCOM_OPEN_RC_LMT_PUT:
     case IBCOM_OPEN_SCRATCH_PAD:
         /* Init QP  */
         ib_errno = modify_qp_to_init(conp->icom_qp, conp->icom_port);
-        if(ib_errno) {
+        if (ib_errno) {
             fprintf(stderr, "change QP state to INIT failed\n");
             ibcom_errno = ib_errno;
             goto fn_fail;
         }
         /* Modify QP TO RTR status */
-        ib_errno = modify_qp_to_rtr(conp->icom_qp, remote_qpnum, remote_lid, remote_gid, conp->icom_port, 0);
-        conp->remote_lid = remote_lid; /* for debug */
-        if(ib_errno){
+        ib_errno =
+            modify_qp_to_rtr(conp->icom_qp, remote_qpnum, remote_lid, remote_gid, conp->icom_port,
+                             0);
+        conp->remote_lid = remote_lid;  /* for debug */
+        if (ib_errno) {
             fprintf(stderr, "failed to modify QP state to RTR\n");
             ibcom_errno = ib_errno;
             goto fn_fail;
         }
         /* Modify QP TO RTS status */
         ib_errno = modify_qp_to_rts(conp->icom_qp);
-        if(ib_errno) {
+        if (ib_errno) {
             fprintf(stderr, "failed to modify QP state to RTS\n");
             ibcom_errno = ib_errno;
             goto fn_fail;
@@ -968,7 +1055,7 @@ int ibcom_rts(int condesc, int remote_qpnum, uint16_t remote_lid, union ibv_gid
         flags = IBV_QP_STATE;
         ib_errno = ibv_modify_qp(conp->icom_qp, &attr, flags);
         IBCOM_ERR_CHKANDJUMP(ib_errno, -1, perror("ibv_modify_qp"));
-      
+
         /* RTS */
         memset(&attr, 0, sizeof(attr));
         attr.qp_state = IBV_QPS_RTS;
@@ -980,63 +1067,76 @@ int ibcom_rts(int condesc, int remote_qpnum, uint16_t remote_lid, union ibv_gid
     }
     conp->icom_connected = 1;
 
-fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
 #define MPID_NEM_DCFA_ENABLE_INLINE
-int ibcom_isend(int condesc, uint64_t wr_id, void* prefix, int sz_prefix, void* hdr, int sz_hdr, void* data, int sz_data, int* copied) {
-    IbCom	*conp;
+int ibcom_isend(int condesc, uint64_t wr_id, void *prefix, int sz_prefix, void *hdr, int sz_hdr,
+                void *data, int sz_data, int *copied)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
-    struct ibv_send_wr	*bad_wr;
-    int	ib_errno;
+    struct ibv_send_wr *bad_wr;
+    int ib_errno;
     int num_sge;
 
-    dprintf("ibcom_isend,prefix=%p,sz_prefix=%d,hdr=%p,sz_hdr=%d,data=%p,sz_data=%d\n", prefix, sz_prefix, hdr, sz_hdr, data, sz_data);
+    dprintf("ibcom_isend,prefix=%p,sz_prefix=%d,hdr=%p,sz_hdr=%d,data=%p,sz_data=%d\n", prefix,
+            sz_prefix, hdr, sz_hdr, data, sz_data);
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
-    if (conp->icom_connected == 0) { return -1; }
+    if (conp->icom_connected == 0) {
+        return -1;
+    }
     int sz_data_pow2;
     DCFA_NEM_SZ_DATA_POW2(sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr + sz_data);
     uint32_t sumsz = sz_data_pow2 + sizeof(tailmagic_t);
-    if(sz_data>16000) {
+    if (sz_data > 16000) {
         //dprintf("ibcom_isend,sz_data=%d,sz_data_pow2=%d,sz_max=%ld\n", sz_data, sz_data_pow2, DCFA_NEM_MAX_DATA_POW2);
     }
-    
+
     num_sge = 0;
 
-    void* buf_from = conp->icom_mem[IBCOM_RDMAWR_FROM] + IBCOM_RDMABUF_SZSEG * (conp->sseq_num % IBCOM_RDMABUF_NSEG);
+    void *buf_from =
+        conp->icom_mem[IBCOM_RDMAWR_FROM] +
+        IBCOM_RDMABUF_SZSEG * (conp->sseq_num % IBCOM_RDMABUF_NSEG);
 
-    sz_hdrmagic_t* sz_hdrmagic = (sz_hdrmagic_t*)buf_from;
+    sz_hdrmagic_t *sz_hdrmagic = (sz_hdrmagic_t *) buf_from;
     sz_hdrmagic->sz = sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr + sz_data + sizeof(tailmagic_t);
     sz_hdrmagic->magic = IBCOM_MAGIC;
 
     /* memcpy hdr is needed because hdr resides in stack when sending close-VC command */
     /* memcpy is performed onto IBCOM_RDMAWR_FROM buffer */
-        void* hdr_copy = buf_from + sizeof(sz_hdrmagic_t);
-        memcpy(hdr_copy, prefix, sz_prefix);
-        memcpy(hdr_copy + sz_prefix, hdr, sz_hdr);
+    void *hdr_copy = buf_from + sizeof(sz_hdrmagic_t);
+    memcpy(hdr_copy, prefix, sz_prefix);
+    memcpy(hdr_copy + sz_prefix, hdr, sz_hdr);
 #ifdef DCFA
-        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].mic_addr = (uint64_t)sz_hdrmagic;
-        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->host_addr + ((uint64_t)sz_hdrmagic - (uint64_t)conp->icom_mem[IBCOM_RDMAWR_FROM]);
-#else
-        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr = (uint64_t)sz_hdrmagic;
-#endif
-        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].length = sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr;
-        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].lkey = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->lkey;
-        num_sge += 1;
+    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].mic_addr = (uint64_t) sz_hdrmagic;
+    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr =
+        conp->icom_mrlist[IBCOM_RDMAWR_FROM]->host_addr + ((uint64_t) sz_hdrmagic -
+                                                           (uint64_t) conp->
+                                                           icom_mem[IBCOM_RDMAWR_FROM]);
+#else
+    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr = (uint64_t) sz_hdrmagic;
+#endif
+    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].length =
+        sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr;
+    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].lkey =
+        conp->icom_mrlist[IBCOM_RDMAWR_FROM]->lkey;
+    num_sge += 1;
 
-    if(sz_data) {
+    if (sz_data) {
         //dprintf("ibcom_isend,data=%p,sz_data=%d\n", data, sz_data);
         struct ibv_mr *mr_data = ibcom_reg_mr_fetch(data, sz_data);
         IBCOM_ERR_CHKANDJUMP(!mr_data, -1, printf("ibcom_isend,ibv_reg_mr_fetch failed\n"));
 #ifdef DCFA
-        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].mic_addr = (uint64_t)data;
-        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr = mr_data->host_addr + ((uint64_t)data - (uint64_t)data);
+        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].mic_addr = (uint64_t) data;
+        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr =
+            mr_data->host_addr + ((uint64_t) data - (uint64_t) data);
 #else
-        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr = (uint64_t)data;
+        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr = (uint64_t) data;
 #endif
         conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].length = sz_data;
         conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].lkey = mr_data->lkey;
@@ -1044,55 +1144,70 @@ int ibcom_isend(int condesc, uint64_t wr_id, void* prefix, int sz_prefix, void*
     }
 
     int sz_pad = sz_data_pow2 - (sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr + sz_data);
-    tailmagic_t* tailmagic = (tailmagic_t*)(buf_from + sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr + sz_pad);
-    tailmagic->magic = IBCOM_MAGIC; 
+    tailmagic_t *tailmagic =
+        (tailmagic_t *) (buf_from + sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr + sz_pad);
+    tailmagic->magic = IBCOM_MAGIC;
 #ifdef DCFA
-    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].mic_addr = (uint64_t)buf_from + sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr;
-    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->host_addr + ((uint64_t)buf_from + sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr - (uint64_t)conp->icom_mem[IBCOM_RDMAWR_FROM]);
-#else
-    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr = (uint64_t)buf_from + sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr;
+    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].mic_addr =
+        (uint64_t) buf_from + sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr;
+    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr =
+        conp->icom_mrlist[IBCOM_RDMAWR_FROM]->host_addr + ((uint64_t) buf_from +
+                                                           sizeof(sz_hdrmagic_t) + sz_prefix +
+                                                           sz_hdr -
+                                                           (uint64_t) conp->
+                                                           icom_mem[IBCOM_RDMAWR_FROM]);
+#else
+    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr =
+        (uint64_t) buf_from + sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr;
 #endif
     conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].length = sz_pad + sizeof(tailmagic_t);
-    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].lkey = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->lkey;
+    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].lkey =
+        conp->icom_mrlist[IBCOM_RDMAWR_FROM]->lkey;
     num_sge += 1;
-    dprintf("ibcom_isend,sz_data=%d,pow2=%d,sz_pad=%d,num_sge=%d\n", sz_data, sz_data_pow2, sz_pad, num_sge);
+    dprintf("ibcom_isend,sz_data=%d,pow2=%d,sz_pad=%d,num_sge=%d\n", sz_data, sz_data_pow2, sz_pad,
+            num_sge);
 
     conp->icom_sr[IBCOM_SMT_NOINLINE].num_sge = num_sge;
     conp->icom_sr[IBCOM_SMT_NOINLINE].wr_id = wr_id;
-    conp->icom_sr[IBCOM_SMT_NOINLINE].wr.rdma.remote_addr = (uint64_t) conp->icom_rmem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZSEG * (conp->sseq_num % IBCOM_RDMABUF_NSEG);
+    conp->icom_sr[IBCOM_SMT_NOINLINE].wr.rdma.remote_addr =
+        (uint64_t) conp->icom_rmem[IBCOM_RDMAWR_TO] +
+        IBCOM_RDMABUF_SZSEG * (conp->sseq_num % IBCOM_RDMABUF_NSEG);
     /* rkey is defined in ibcom_reg_mr_connect */
 
     //dprintf("ibcom_isend,condesc=%d,num_sge=%d,opcode=%08x,imm_data=%08x,wr_id=%016lx, raddr=%p, rkey=%08x\n", condesc, conp->icom_sr[IBCOM_SMT_NOINLINE].num_sge, conp->icom_sr[IBCOM_SMT_NOINLINE].opcode, conp->icom_sr[IBCOM_SMT_NOINLINE].imm_data, conp->icom_sr[IBCOM_SMT_NOINLINE].wr_id, conp->icom_sr[IBCOM_SMT_NOINLINE].wr.rdma.remote_addr, conp->icom_sr[IBCOM_SMT_NOINLINE].wr.rdma.rkey);
-    
+
     /* other commands can executed RDMA-rd command */
     /* see the "Ordering and the Fence Indicator" section in "InfiniBand Architecture" by William T. Futral */
 #if 0
-    if(conp->after_rdma_rd) {
+    if (conp->after_rdma_rd) {
         conp->icom_sr[IBCOM_SMT_NOINLINE].send_flags |= IBV_SEND_FENCE;
     }
 #endif
 #ifdef MPID_NEM_DCFA_ENABLE_INLINE
-	if(sumsz <= conp->max_inline_data) {
-		conp->icom_sr[IBCOM_SMT_NOINLINE].send_flags |= IBV_SEND_INLINE;
+    if (sumsz <= conp->max_inline_data) {
+        conp->icom_sr[IBCOM_SMT_NOINLINE].send_flags |= IBV_SEND_INLINE;
         *copied = 1;
-	} else {
+    }
+    else {
         *copied = 0;
     }
-#endif    
+#endif
 #ifdef DCFA
     ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_SMT_NOINLINE]);
     IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_isend, ibv_post_send, rc=%d\n", ib_errno));
 #else
     ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_SMT_NOINLINE], &bad_wr);
-    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_isend, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno, bad_wr));
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1,
+                         dprintf("ibcom_isend, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno,
+                                 bad_wr));
 #endif
 #ifdef MPID_NEM_DCFA_ENABLE_INLINE
-	if(sumsz <= conp->max_inline_data) {
-		conp->icom_sr[IBCOM_SMT_NOINLINE].send_flags &= ~IBV_SEND_INLINE;
-	}
-#endif    
+    if (sumsz <= conp->max_inline_data) {
+        conp->icom_sr[IBCOM_SMT_NOINLINE].send_flags &= ~IBV_SEND_INLINE;
+    }
+#endif
 #if 0
-    if(conp->after_rdma_rd) {
+    if (conp->after_rdma_rd) {
         conp->icom_sr[IBCOM_SMT_NOINLINE].send_flags &= ~IBV_SEND_FENCE;
         conp->after_rdma_rd = 0;
     }
@@ -1101,17 +1216,18 @@ int ibcom_isend(int condesc, uint64_t wr_id, void* prefix, int sz_prefix, void*
     conp->sseq_num += 1;
     assert(conp->sseq_num > 0);
     conp->ncom += 1;
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_isend_chain(int condesc, uint64_t wr_id, void* hdr, int sz_hdr, void* data, int sz_data) {
-    IbCom	*conp;
+int ibcom_isend_chain(int condesc, uint64_t wr_id, void *hdr, int sz_hdr, void *data, int sz_data)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
-    struct ibv_send_wr	*bad_wr;
-    int	ib_errno;
+    struct ibv_send_wr *bad_wr;
+    int ib_errno;
     int sz_data_rem = sz_data;
     int i;
     struct ibv_mr *mr_data;
@@ -1120,120 +1236,164 @@ int ibcom_isend_chain(int condesc, uint64_t wr_id, void* hdr, int sz_hdr, void*
 
     dprintf("ibcom_isend_chain,enter\n");
     RANGE_CHECK_WITH_ERROR(condesc, conp);
-    IBCOM_ERR_CHKANDJUMP(conp->icom_connected == 0, -1, printf("ibcom_isend_chain,icom_connected==0\n"));
+    IBCOM_ERR_CHKANDJUMP(conp->icom_connected == 0, -1,
+                         printf("ibcom_isend_chain,icom_connected==0\n"));
 
-    void* buf_from = conp->icom_mem[IBCOM_RDMAWR_FROM] + IBCOM_RDMABUF_SZSEG * (conp->sseq_num % IBCOM_RDMABUF_NSEG);
+    void *buf_from =
+        conp->icom_mem[IBCOM_RDMAWR_FROM] +
+        IBCOM_RDMABUF_SZSEG * (conp->sseq_num % IBCOM_RDMABUF_NSEG);
 
     /* make a tail-magic position is in a fixed set */
     int sz_data_pow2;
     DCFA_NEM_SZ_DATA_POW2(sizeof(sz_hdrmagic_t) + sz_hdr + sz_data);
 
     /* let the last command icom_sr[IBCOM_SMT_INLINE_CHAIN-1] which has IBV_WR_RDMA_WRITE_WITH_IMM */
-    int s = IBCOM_SMT_INLINE_NCHAIN - (sizeof(sz_hdrmagic_t) + sz_hdr + sz_data_pow2 + sizeof(tailmagic_t) + IBCOM_INLINE_DATA - 1) / IBCOM_INLINE_DATA;
-    IBCOM_ERR_CHKANDJUMP((sizeof(sz_hdrmagic_t) + sz_hdr + sz_data_pow2) % 4 != 0, -1, printf("ibcom_isend_chain,tail-magic gets over packet-boundary\n"));
-    IBCOM_ERR_CHKANDJUMP(s < 0 || s >= IBCOM_SMT_INLINE_NCHAIN, -1, printf("ibcom_isend_chain,s\n"));
+    int s =
+        IBCOM_SMT_INLINE_NCHAIN - (sizeof(sz_hdrmagic_t) + sz_hdr + sz_data_pow2 +
+                                   sizeof(tailmagic_t) + IBCOM_INLINE_DATA - 1) / IBCOM_INLINE_DATA;
+    IBCOM_ERR_CHKANDJUMP((sizeof(sz_hdrmagic_t) + sz_hdr + sz_data_pow2) % 4 != 0, -1,
+                         printf("ibcom_isend_chain,tail-magic gets over packet-boundary\n"));
+    IBCOM_ERR_CHKANDJUMP(s < 0 ||
+                         s >= IBCOM_SMT_INLINE_NCHAIN, -1, printf("ibcom_isend_chain,s\n"));
     dprintf("ibcom_isend_chain,sz_hdr=%d,sz_data=%d,s=%d\n", sz_hdr, sz_data, s);
 
-    for(i = s; i < IBCOM_SMT_INLINE_NCHAIN; i++) {
+    for (i = s; i < IBCOM_SMT_INLINE_NCHAIN; i++) {
 
         //tscs = MPID_nem_dcfa_rdtsc();
-        int sz_used = 0; /* how much of the payload of a IB packet is used? */
+        int sz_used = 0;        /* how much of the payload of a IB packet is used? */
         int num_sge = 0;
-        if(i == s) { 
-            sz_hdrmagic_t* sz_hdrmagic = (sz_hdrmagic_t*)buf_from;
+        if (i == s) {
+            sz_hdrmagic_t *sz_hdrmagic = (sz_hdrmagic_t *) buf_from;
             sz_hdrmagic->sz = sumsz;
             sz_hdrmagic->magic = IBCOM_MAGIC;
             memcpy(buf_from + sizeof(sz_hdrmagic_t), hdr, sz_hdr);
 #ifdef DCFA
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].mic_addr = (uint64_t)buf_from;
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->host_addr + ((uint64_t)buf_from - (uint64_t)conp->icom_mem[IBCOM_RDMAWR_FROM]);
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].mic_addr =
+                (uint64_t) buf_from;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr =
+                conp->icom_mrlist[IBCOM_RDMAWR_FROM]->host_addr + ((uint64_t) buf_from -
+                                                                   (uint64_t) conp->
+                                                                   icom_mem[IBCOM_RDMAWR_FROM]);
 #else
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = (uint64_t)buf_from;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr =
+                (uint64_t) buf_from;
 #endif
             buf_from += sizeof(sz_hdrmagic_t) + sz_hdr;
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].length = sizeof(sz_hdrmagic_t) + sz_hdr;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].length =
+                sizeof(sz_hdrmagic_t) + sz_hdr;
             sz_used += sizeof(sz_hdrmagic_t) + sz_hdr;
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].lkey = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->lkey;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].lkey =
+                conp->icom_mrlist[IBCOM_RDMAWR_FROM]->lkey;
             num_sge += 1;
             dprintf("ibcom_isend_chain,i=%d,sz_used=%d\n", i, sz_used);
         }
         //tsce = MPID_nem_dcfa_rdtsc(); printf("0,%ld\n", tsce-tscs);
 
         //tscs = MPID_nem_dcfa_rdtsc();
-        if(sz_data_rem > 0) { 
+        if (sz_data_rem > 0) {
 #ifdef DCFA
 #else
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = (uint64_t)data + sz_data - sz_data_rem;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr =
+                (uint64_t) data + sz_data - sz_data_rem;
 #endif
-            int sz_data_red = sz_used + sz_data_rem + sizeof(tailmagic_t) <= IBCOM_INLINE_DATA ? sz_data_rem : sz_data_rem <= IBCOM_INLINE_DATA - sz_used ? sz_data_rem : IBCOM_INLINE_DATA - sz_used;
+            int sz_data_red =
+                sz_used + sz_data_rem + sizeof(tailmagic_t) <=
+                IBCOM_INLINE_DATA ? sz_data_rem : sz_data_rem <=
+                IBCOM_INLINE_DATA - sz_used ? sz_data_rem : IBCOM_INLINE_DATA - sz_used;
             conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].length = sz_data_red;
             sz_used += sz_data_red;
             sz_data_rem -= sz_data_red;
             IBCOM_ERR_CHKANDJUMP(sz_data_rem < 0, -1, printf("ibcom_isend_chain,sz_data_rem\n"));
-    
-            if(i == s) {
+
+            if (i == s) {
                 IBCOM_ERR_CHKANDJUMP(!sz_data, -1, printf("ibcom_isend_chain,sz_data==0\n"));
                 mr_data = ibcom_reg_mr_fetch(data, sz_data);
                 IBCOM_ERR_CHKANDJUMP(!mr_data, -1, printf("ibcom_isend,ibv_reg_mr_fetch failed\n"));
             }
 #ifdef DCFA
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].mic_addr = (uint64_t)data + sz_data - sz_data_rem;
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = mr_data->host_addr + ((uint64_t)data + sz_data - sz_data_rem - (uint64_t)data);
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].mic_addr =
+                (uint64_t) data + sz_data - sz_data_rem;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr =
+                mr_data->host_addr + ((uint64_t) data + sz_data - sz_data_rem - (uint64_t) data);
 #endif
             conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].lkey = mr_data->lkey;
             num_sge += 1;
             dprintf("ibcom_isend_chain,i=%d,sz_used=%d,sz_data_rem=%d\n", i, sz_used, sz_data_rem);
-        } else { /* tailmagic only packet is being generated */
+        }
+        else {  /* tailmagic only packet is being generated */
 
         }
         //tsce = MPID_nem_dcfa_rdtsc(); printf("1,%ld\n", tsce-tscs);
 
         //tscs = MPID_nem_dcfa_rdtsc();
-        if(i == IBCOM_SMT_INLINE_NCHAIN - 1) { /* append tailmagic */
+        if (i == IBCOM_SMT_INLINE_NCHAIN - 1) { /* append tailmagic */
             int sz_pad = sz_data_pow2 - sz_data;
-            tailmagic_t* tailmagic = (tailmagic_t*)(buf_from + sz_pad);
-            tailmagic->magic = IBCOM_MAGIC; 
+            tailmagic_t *tailmagic = (tailmagic_t *) (buf_from + sz_pad);
+            tailmagic->magic = IBCOM_MAGIC;
 #ifdef DCFA
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].mic_addr = (uint64_t)buf_from;
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->host_addr + ((uint64_t)buf_from - (uint64_t)conp->icom_mem[IBCOM_RDMAWR_FROM]);
-#else
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = (uint64_t)buf_from;
-#endif
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].length = sz_pad + sizeof(tailmagic_t);
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].mic_addr =
+                (uint64_t) buf_from;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr =
+                conp->icom_mrlist[IBCOM_RDMAWR_FROM]->host_addr + ((uint64_t) buf_from -
+                                                                   (uint64_t) conp->
+                                                                   icom_mem[IBCOM_RDMAWR_FROM]);
+#else
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr =
+                (uint64_t) buf_from;
+#endif
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].length =
+                sz_pad + sizeof(tailmagic_t);
             sz_used += sz_pad + sizeof(tailmagic_t);
             IBCOM_ERR_CHKANDJUMP(sz_data_rem != 0, -1, printf("ibcom_isend_chain, sz_data_rem\n"));
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].lkey = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->lkey;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].lkey =
+                conp->icom_mrlist[IBCOM_RDMAWR_FROM]->lkey;
             num_sge += 1;
 
             conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].imm_data = conp->sseq_num;
-            dprintf("ibcom_isend_chain,i=%d,sz_pad=%d,sz_used=%d,num_sge=%d\n", i, sz_pad, sz_used, num_sge);
-        } else if(IBCOM_INLINE_DATA - sz_used > 0) { /* data fell short of the packet, so pad */
-            IBCOM_ERR_CHKANDJUMP(1, -1, printf("ibcom_isend_chain,tail-magic gets over packet-boundary\n"));
+            dprintf("ibcom_isend_chain,i=%d,sz_pad=%d,sz_used=%d,num_sge=%d\n", i, sz_pad, sz_used,
+                    num_sge);
+        }
+        else if (IBCOM_INLINE_DATA - sz_used > 0) {     /* data fell short of the packet, so pad */
+            IBCOM_ERR_CHKANDJUMP(1, -1,
+                                 printf
+                                 ("ibcom_isend_chain,tail-magic gets over packet-boundary\n"));
             int sz_pad = IBCOM_INLINE_DATA - sz_used;
 #ifdef DCFA
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].mic_addr = (uint64_t)buf_from;
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->host_addr + ((uint64_t)buf_from - (uint64_t)conp->icom_mem[IBCOM_RDMAWR_FROM]);
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].mic_addr =
+                (uint64_t) buf_from;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr =
+                conp->icom_mrlist[IBCOM_RDMAWR_FROM]->host_addr + ((uint64_t) buf_from -
+                                                                   (uint64_t) conp->
+                                                                   icom_mem[IBCOM_RDMAWR_FROM]);
 #else
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = (uint64_t)buf_from;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr =
+                (uint64_t) buf_from;
 #endif
             buf_from += sz_pad;
             conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].length = sz_pad;
             sz_used += sz_pad;
-            IBCOM_ERR_CHKANDJUMP(sz_used != IBCOM_INLINE_DATA, -1, printf("ibcom_isend_chain, sz_used\n"));
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].lkey = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->lkey;
+            IBCOM_ERR_CHKANDJUMP(sz_used != IBCOM_INLINE_DATA, -1,
+                                 printf("ibcom_isend_chain, sz_used\n"));
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].lkey =
+                conp->icom_mrlist[IBCOM_RDMAWR_FROM]->lkey;
             num_sge += 1;
             dprintf("ibcom_isend_chain,i=%d,sz_pad=%d,sz_used=%d\n", i, sz_pad, sz_used);
-        } else { /* packet is full with data */
-            IBCOM_ERR_CHKANDJUMP(sz_used != IBCOM_INLINE_DATA, -1, printf("ibcom_isend_chain, sz_used\n"));
+        }
+        else {  /* packet is full with data */
+            IBCOM_ERR_CHKANDJUMP(sz_used != IBCOM_INLINE_DATA, -1,
+                                 printf("ibcom_isend_chain, sz_used\n"));
         }
         //tsce = MPID_nem_dcfa_rdtsc(); printf("2,%ld\n", tsce-tscs);
 
         conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].num_sge = num_sge;
         conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].wr_id = wr_id;
-        conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].wr.rdma.remote_addr = (uint64_t) conp->icom_rmem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZSEG * (conp->sseq_num % IBCOM_RDMABUF_NSEG) + IBCOM_INLINE_DATA * (i - s);
+        conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].wr.rdma.remote_addr =
+            (uint64_t) conp->icom_rmem[IBCOM_RDMAWR_TO] +
+            IBCOM_RDMABUF_SZSEG * (conp->sseq_num % IBCOM_RDMABUF_NSEG) + IBCOM_INLINE_DATA * (i -
+                                                                                               s);
     }
 #if 0
-    if(conp->after_rdma_rd) {
+    if (conp->after_rdma_rd) {
         conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + s].send_flags |= IBV_SEND_FENCE;
     }
 #endif
@@ -1243,7 +1403,7 @@ int ibcom_isend_chain(int condesc, uint64_t wr_id, void* hdr, int sz_hdr, void*
     ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + s], &bad_wr);
 #endif
 #if 0
-    if(i == 0 && conp->after_rdma_rd) {
+    if (i == 0 && conp->after_rdma_rd) {
         conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + s].send_flags &= ~IBV_SEND_FENCE;
         conp->after_rdma_rd = 0;
     }
@@ -1251,23 +1411,26 @@ int ibcom_isend_chain(int condesc, uint64_t wr_id, void* hdr, int sz_hdr, void*
 #ifdef DCFA
     IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_isend, ibv_post_send, rc=%d\n", ib_errno));
 #else
-    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_isend, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno, bad_wr));
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1,
+                         dprintf("ibcom_isend, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno,
+                                 bad_wr));
 #endif
     conp->ncom += (IBCOM_SMT_INLINE_NCHAIN - s);
     conp->sseq_num += 1;
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_irecv(int condesc, uint64_t wr_id) {
+int ibcom_irecv(int condesc, uint64_t wr_id)
+{
 
-    IbCom	*conp;
+    IbCom *conp;
     int ib_errno;
     int ibcom_errno = 0;
-    struct ibv_recv_wr	*bad_wr;
-    
+    struct ibv_recv_wr *bad_wr;
+
     RANGE_CHECK_WITH_ERROR(condesc, conp);
     //    if (conp->icom_connected == 0) { return -1; }
 
@@ -1283,140 +1446,149 @@ int ibcom_irecv(int condesc, uint64_t wr_id) {
 #ifdef DCFA
         fprintf(stderr, "ibcom_irecv: failed to post receive, ib_errno=%d\n", ib_errno);
 #else
-        fprintf(stderr, "ibcom_irecv: failed to post receive, ib_errno=%d,bad_wr=%p\n", ib_errno, bad_wr);
+        fprintf(stderr, "ibcom_irecv: failed to post receive, ib_errno=%d,bad_wr=%p\n", ib_errno,
+                bad_wr);
 #endif
         ibcom_errno = ib_errno;
         goto fn_fail;
     }
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
- 
-int ibcom_udsend(int condesc, union ibv_gid* remote_gid, uint16_t remote_lid, uint32_t remote_qpn, uint32_t imm_data, uint64_t wr_id) {
-    IbCom	*conp;
-	struct ibv_send_wr *bad_wr;
+
+int ibcom_udsend(int condesc, union ibv_gid *remote_gid, uint16_t remote_lid, uint32_t remote_qpn,
+                 uint32_t imm_data, uint64_t wr_id)
+{
+    IbCom *conp;
+    struct ibv_send_wr *bad_wr;
     int ibcom_errno = 0, ib_errno;
-    
+
     RANGE_CHECK_WITH_ERROR(condesc, conp);
 
 #ifdef DCFA
-    IBCOM_ERR_CHKANDJUMP(1, -1, dprintf("ibcom_udsend not supported by DCFA because DCFA doesn't have ibv_create_ah\n"));
+    IBCOM_ERR_CHKANDJUMP(1, -1,
+                         dprintf
+                         ("ibcom_udsend not supported by DCFA because DCFA doesn't have ibv_create_ah\n"));
 #else
     /* prepare ibv_ah_attr */
     conp->icom_ah_attr[IBCOM_UD_INITIATOR].dlid = remote_lid;
 #if 0
     conp->icom_ah_attr[IBCOM_UD_INITIATOR].grh.dgid = *remote_gid;
 #endif
-    
+
     /* prepare ibv_ah */
     struct ibv_ah *ah;
     ah = ibv_create_ah(ib_pd, &conp->icom_ah_attr[IBCOM_UD_INITIATOR]);
     IBCOM_ERR_CHKANDJUMP(!ah, -1, dprintf("ibv_crate_ah\n"));
-    
+
     conp->icom_sr[IBCOM_UD_INITIATOR].wr.ud.ah = ah;
     conp->icom_sr[IBCOM_UD_INITIATOR].wr.ud.remote_qpn = remote_qpn;
     /* qkey is defined in open */
 
     //dprintf("lid=%04x\n", conp->icom_ah_attr[IBCOM_UD_INITIATOR].dlid);
     //dprintf("qpn=%08x\n", conp->icom_sr[IBCOM_UD_INITIATOR].wr.ud.remote_qpn);
-    
+
     /* recv doesn't know the length, so we can't optimize it */
     //    conp->icom_sr[IBCOM_UD_INITIATOR].sg_list[0].length = length;
 
-	conp->icom_sr[IBCOM_UD_INITIATOR].wr_id = wr_id;
+    conp->icom_sr[IBCOM_UD_INITIATOR].wr_id = wr_id;
     conp->icom_sr[IBCOM_UD_INITIATOR].imm_data = imm_data;
 
 #if 0
-	if(length <= qpinfo->max_inline_data){
-		conp->icom_sr[IBCOM_UD_INITIATOR].send_flags |= IBV_SEND_INLINE;
-	}
-#endif    
+    if (length <= qpinfo->max_inline_data) {
+        conp->icom_sr[IBCOM_UD_INITIATOR].send_flags |= IBV_SEND_INLINE;
+    }
+#endif
 
 #ifdef DCFA
-	ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_UD_INITIATOR]);
+    ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_UD_INITIATOR]);
 #else
-	ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_UD_INITIATOR], &bad_wr);
+    ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_UD_INITIATOR], &bad_wr);
 #endif
-	IBCOM_ERR_CHKANDJUMP(ib_errno, -1, perror("ibv_post_send"));
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, perror("ibv_post_send"));
 #endif /* DCFA */
 
     conp->ncom += 1;
 
- fn_exit:
-	return ibcom_errno;
- fn_fail:
+  fn_exit:
+    return ibcom_errno;
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_udrecv(int condesc){
-    IbCom	*conp;
-	struct ibv_recv_wr *bad_wr;
-	int ibcom_errno = 0, ib_errno;
+int ibcom_udrecv(int condesc)
+{
+    IbCom *conp;
+    struct ibv_recv_wr *bad_wr;
+    int ibcom_errno = 0, ib_errno;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
 
-	/* Create RR */
-	conp->icom_rr[IBCOM_UD_RESPONDER].wr_id = 0;
+    /* Create RR */
+    conp->icom_rr[IBCOM_UD_RESPONDER].wr_id = 0;
 
-	/* Post RR to RQ */
+    /* Post RR to RQ */
 #ifdef DCFA
-	ib_errno = ibv_post_recv(conp->icom_qp, &conp->icom_rr[IBCOM_UD_RESPONDER]);
+    ib_errno = ibv_post_recv(conp->icom_qp, &conp->icom_rr[IBCOM_UD_RESPONDER]);
 #else
-	ib_errno = ibv_post_recv(conp->icom_qp, &conp->icom_rr[IBCOM_UD_RESPONDER],
- &bad_wr);
+    ib_errno = ibv_post_recv(conp->icom_qp, &conp->icom_rr[IBCOM_UD_RESPONDER], &bad_wr);
 #endif
-	IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibv_post_recv ib_errno=%d\n", ib_errno));
-	
- fn_exit:
-	return ibcom_errno;
- fn_fail:
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibv_post_recv ib_errno=%d\n", ib_errno));
+
+  fn_exit:
+    return ibcom_errno;
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_lrecv(int condesc, uint64_t wr_id, void* raddr, int sz_data, uint32_t rkey, void* laddr) {
-    IbCom	*conp;
+int ibcom_lrecv(int condesc, uint64_t wr_id, void *raddr, int sz_data, uint32_t rkey, void *laddr)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
-    struct ibv_send_wr	*bad_wr;
-    int	ib_errno;
+    struct ibv_send_wr *bad_wr;
+    int ib_errno;
     int num_sge;
-    
+
     dprintf("ibcom_lrecv,enter,raddr=%p,sz_data=%d,laddr=%p\n", raddr, sz_data, laddr);
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
     IBCOM_ERR_CHKANDJUMP(!conp->icom_connected, -1, dprintf("ibcom_lrecv,not connected\n"));
     IBCOM_ERR_CHKANDJUMP(!sz_data, -1, dprintf("ibcom_lrecv,sz_data==0\n"));
-    
+
     num_sge = 0;
-    
+
     /* register memory area containing data */
     struct ibv_mr *mr_data = ibcom_reg_mr_fetch(laddr, sz_data);
     IBCOM_ERR_CHKANDJUMP(!mr_data, -1, dprintf("ibcom_lrecv,ibv_reg_mr_fetch failed\n"));
-    
+
     /* Erase magic, super bug!! */
     //((tailmagic_t*)(laddr + sz_data - sizeof(tailmagic_t)))->magic = 0;
 #ifdef DCFA
-    conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[num_sge].mic_addr = (uint64_t)laddr;
-    conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[num_sge].addr = mr_data->host_addr + ((uint64_t)laddr - (uint64_t)laddr);
+    conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[num_sge].mic_addr = (uint64_t) laddr;
+    conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[num_sge].addr =
+        mr_data->host_addr + ((uint64_t) laddr - (uint64_t) laddr);
 #else
-    conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[num_sge].addr = (uint64_t)laddr;
+    conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[num_sge].addr = (uint64_t) laddr;
 #endif
     conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[num_sge].length = sz_data;
     conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[num_sge].lkey = mr_data->lkey;
     num_sge += 1;
-    
+
     conp->icom_sr[IBCOM_LMT_INITIATOR].num_sge = num_sge;
     conp->icom_sr[IBCOM_LMT_INITIATOR].wr_id = wr_id;
-    conp->icom_sr[IBCOM_LMT_INITIATOR].wr.rdma.remote_addr = (uint64_t)raddr;
+    conp->icom_sr[IBCOM_LMT_INITIATOR].wr.rdma.remote_addr = (uint64_t) raddr;
     conp->icom_sr[IBCOM_LMT_INITIATOR].wr.rdma.rkey = rkey;
-    
+
 #ifdef DCFA
     ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_LMT_INITIATOR]);
     IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_lrecv, ibv_post_send, rc=%d\n", ib_errno));
 #else
     ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_LMT_INITIATOR], &bad_wr);
-    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_lrecv, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno, bad_wr));
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1,
+                         dprintf("ibcom_lrecv, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno,
+                                 bad_wr));
 #endif
 
     /* other commands can be executed before RDMA-rd command */
@@ -1426,175 +1598,201 @@ int ibcom_lrecv(int condesc, uint64_t wr_id, void* raddr, int sz_data, uint32_t
 #endif
     conp->ncom += 1;
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
 /* use the same QP as isend */
-int ibcom_put_lmt(int condesc, uint64_t wr_id, void* raddr, int sz_data, uint32_t rkey, void* laddr) {
-    IbCom	*conp;
+int ibcom_put_lmt(int condesc, uint64_t wr_id, void *raddr, int sz_data, uint32_t rkey, void *laddr)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
-    struct ibv_send_wr	*bad_wr;
-    int	ib_errno;
+    struct ibv_send_wr *bad_wr;
+    int ib_errno;
     int num_sge;
-    
+
     dprintf("ibcom_put_lmt,enter,sz_data=%d,laddr=%p\n", sz_data, laddr);
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
     IBCOM_ERR_CHKANDJUMP(!conp->icom_connected, -1, dprintf("ibcom_put_lmt,not connected\n"));
     IBCOM_ERR_CHKANDJUMP(!sz_data, -1, dprintf("ibcom_put_lmt,sz_data==0\n"));
-    
+
     num_sge = 0;
-    
+
     /* register memory area containing data */
     struct ibv_mr *mr_data = ibcom_reg_mr_fetch(laddr, sz_data);
     IBCOM_ERR_CHKANDJUMP(!mr_data, -1, dprintf("ibcom_put_lmt,ibv_reg_mr_fetch failed\n"));
-    
+
 #ifdef DCFA
-    conp->icom_sr[IBCOM_LMT_PUT].sg_list[num_sge].mic_addr = (uint64_t)laddr;
-    conp->icom_sr[IBCOM_LMT_PUT].sg_list[num_sge].addr = mr_data->host_addr + ((uint64_t)laddr - (uint64_t)laddr);
+    conp->icom_sr[IBCOM_LMT_PUT].sg_list[num_sge].mic_addr = (uint64_t) laddr;
+    conp->icom_sr[IBCOM_LMT_PUT].sg_list[num_sge].addr =
+        mr_data->host_addr + ((uint64_t) laddr - (uint64_t) laddr);
 #else
-    conp->icom_sr[IBCOM_LMT_PUT].sg_list[num_sge].addr = (uint64_t)laddr;
+    conp->icom_sr[IBCOM_LMT_PUT].sg_list[num_sge].addr = (uint64_t) laddr;
 #endif
     conp->icom_sr[IBCOM_LMT_PUT].sg_list[num_sge].length = sz_data;
     conp->icom_sr[IBCOM_LMT_PUT].sg_list[num_sge].lkey = mr_data->lkey;
     num_sge += 1;
-    
+
     conp->icom_sr[IBCOM_LMT_PUT].num_sge = num_sge;
     conp->icom_sr[IBCOM_LMT_PUT].wr_id = wr_id;
-    conp->icom_sr[IBCOM_LMT_PUT].wr.rdma.remote_addr = (uint64_t)raddr;
+    conp->icom_sr[IBCOM_LMT_PUT].wr.rdma.remote_addr = (uint64_t) raddr;
     conp->icom_sr[IBCOM_LMT_PUT].wr.rdma.rkey = rkey;
-    
+
 #ifdef DCFA
     ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_LMT_PUT]);
     IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_put_lmt, ibv_post_send, rc=%d\n", ib_errno));
 #else
     ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_LMT_PUT], &bad_wr);
-    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_put_lmt, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno, bad_wr));
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1,
+                         dprintf("ibcom_put_lmt, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno,
+                                 bad_wr));
 #endif
 
     conp->ncom += 1;
     dprintf("ibcom_put_lmt,exit\n");
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_put_scratch_pad(int condesc, uint64_t wr_id, uint64_t offset, int sz, void* laddr) {
-    IbCom	*conp;
+int ibcom_put_scratch_pad(int condesc, uint64_t wr_id, uint64_t offset, int sz, void *laddr)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
-    struct ibv_send_wr	*bad_wr;
-    int	ib_errno;
-    
-    dprintf("ibcom_put_scratch_pad,enter,wr_id=%llx,offset=%llx,sz=%d,laddr=%p\n", (unsigned long long)wr_id, (unsigned long long)offset, sz, laddr);
-    dprintf("ibcom_put_scratch_pad,data=%08x\n", *((uint32_t*)laddr));
+    struct ibv_send_wr *bad_wr;
+    int ib_errno;
+
+    dprintf("ibcom_put_scratch_pad,enter,wr_id=%llx,offset=%llx,sz=%d,laddr=%p\n",
+            (unsigned long long) wr_id, (unsigned long long) offset, sz, laddr);
+    dprintf("ibcom_put_scratch_pad,data=%08x\n", *((uint32_t *) laddr));
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
-    IBCOM_ERR_CHKANDJUMP(conp->open_flag != IBCOM_OPEN_SCRATCH_PAD, -1, dprintf("ibcom_put_scratch_pad,invalid open_flag=%d\n", conp->open_flag));
-    IBCOM_ERR_CHKANDJUMP(!conp->icom_connected, -1, dprintf("ibcom_put_scratch_pad,not connected\n"));
+    IBCOM_ERR_CHKANDJUMP(conp->open_flag != IBCOM_OPEN_SCRATCH_PAD, -1,
+                         dprintf("ibcom_put_scratch_pad,invalid open_flag=%d\n", conp->open_flag));
+    IBCOM_ERR_CHKANDJUMP(!conp->icom_connected, -1,
+                         dprintf("ibcom_put_scratch_pad,not connected\n"));
     IBCOM_ERR_CHKANDJUMP(!sz, -1, dprintf("ibcom_put_scratch_pad,sz==0\n"));
-    
+
     /* register memory area containing data */
     struct ibv_mr *mr_data = ibcom_reg_mr_fetch(laddr, sz);
     IBCOM_ERR_CHKANDJUMP(!mr_data, -1, dprintf("ibcom_put_scratch_pad,ibv_reg_mr_fetch failed\n"));
     dprintf("ibcom_put_scratch_pad,");
-    
+
 #ifdef DCFA
-    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].mic_addr = (uint64_t)laddr;
-    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].addr = mr_data->host_addr + ((uint64_t)laddr - (uint64_t)laddr);
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].mic_addr = (uint64_t) laddr;
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].addr =
+        mr_data->host_addr + ((uint64_t) laddr - (uint64_t) laddr);
 #else
-    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].addr = (uint64_t)laddr;
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].addr = (uint64_t) laddr;
 #endif
     conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].length = sz;
     conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].lkey = mr_data->lkey;
-    
+
     /* num_sge is defined in ibcomOpen */
     conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr_id = wr_id;
-    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.rdma.remote_addr = (uint64_t)conp->icom_rmem[IBCOM_SCRATCH_PAD_TO] + offset;
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.rdma.remote_addr =
+        (uint64_t) conp->icom_rmem[IBCOM_SCRATCH_PAD_TO] + offset;
     /* rkey is defined in ibcom_reg_mr_connect */
 
-    dprintf("ibcom_put_scratch_pad,wr.rdma.remote_addr=%llx\n", (unsigned long long)conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.rdma.remote_addr);
-    
+    dprintf("ibcom_put_scratch_pad,wr.rdma.remote_addr=%llx\n",
+            (unsigned long long) conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.rdma.remote_addr);
+
 #ifdef DCFA
     ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR]);
-    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_put_scratch_pad, ibv_post_send, rc=%d\n", ib_errno));
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1,
+                         dprintf("ibcom_put_scratch_pad, ibv_post_send, rc=%d\n", ib_errno));
 #else
     ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR], &bad_wr);
-    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_put_scratch_pad, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno, bad_wr));
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1,
+                         dprintf("ibcom_put_scratch_pad, ibv_post_send, rc=%d, bad_wr=%p\n",
+                                 ib_errno, bad_wr));
 #endif
 
     conp->ncom_scratch_pad += 1;
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
 #ifdef DCFA_ONDEMAND
-int ibcom_cas_scratch_pad(int condesc, uint64_t wr_id, uint64_t offset, uint64_t compare, uint64_t swap) {
-    IbCom	*conp;
+int ibcom_cas_scratch_pad(int condesc, uint64_t wr_id, uint64_t offset, uint64_t compare,
+                          uint64_t swap)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
-    struct ibv_send_wr	*bad_wr;
-    int	ib_errno;
-    
-    dprintf("ibcom_put_scratch_pad,enter,wr_id=%llx,offset=%llx,sz=%d,laddr=%p\n", (unsigned long long)wr_id, (unsigned long long)offset, sz, laddr);
-    dprintf("ibcom_put_scratch_pad,data=%08x\n", *((uint32_t*)laddr));
+    struct ibv_send_wr *bad_wr;
+    int ib_errno;
+
+    dprintf("ibcom_put_scratch_pad,enter,wr_id=%llx,offset=%llx,sz=%d,laddr=%p\n",
+            (unsigned long long) wr_id, (unsigned long long) offset, sz, laddr);
+    dprintf("ibcom_put_scratch_pad,data=%08x\n", *((uint32_t *) laddr));
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
-    IBCOM_ERR_CHKANDJUMP(conp->open_flag != IBCOM_OPEN_SCRATCH_PAD, -1, dprintf("ibcom_put_scratch_pad,invalid open_flag=%d\n", conp->open_flag));
-    IBCOM_ERR_CHKANDJUMP(!conp->icom_connected, -1, dprintf("ibcom_put_scratch_pad,not connected\n"));
+    IBCOM_ERR_CHKANDJUMP(conp->open_flag != IBCOM_OPEN_SCRATCH_PAD, -1,
+                         dprintf("ibcom_put_scratch_pad,invalid open_flag=%d\n", conp->open_flag));
+    IBCOM_ERR_CHKANDJUMP(!conp->icom_connected, -1,
+                         dprintf("ibcom_put_scratch_pad,not connected\n"));
     IBCOM_ERR_CHKANDJUMP(!sz, -1, dprintf("ibcom_put_scratch_pad,sz==0\n"));
-    
+
     /* register memory area containing data */
     struct ibv_mr *mr_data = ibcom_reg_mr_fetch(laddr, sz);
     IBCOM_ERR_CHKANDJUMP(!mr_data, -1, dprintf("ibcom_put_scratch_pad,ibv_reg_mr_fetch failed\n"));
     dprintf("ibcom_put_scratch_pad,");
-    
+
 #ifdef DCFA
-    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].mic_addr = (uint64_t)laddr;
-    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].addr = mr_data->host_addr + ((uint64_t)laddr - (uint64_t)laddr);
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].mic_addr = (uint64_t) laddr;
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].addr =
+        mr_data->host_addr + ((uint64_t) laddr - (uint64_t) laddr);
 #else
-    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].addr = (uint64_t)laddr;
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].addr = (uint64_t) laddr;
 #endif
     conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].length = sz;
     conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].lkey = mr_data->lkey;
-    
+
     /* num_sge is defined in ibcomOpen */
     conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr_id = wr_id;
-    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.atomic.remote_addr = (uint64_t)conp->icom_rmem[IBCOM_SCRATCH_PAD_TO] + offset;
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.atomic.remote_addr =
+        (uint64_t) conp->icom_rmem[IBCOM_SCRATCH_PAD_TO] + offset;
     /* rkey is defined in ibcom_reg_mr_connect */
     conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.atomic.compare_add = compare;
     conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.atomic.swap = swap;
 
-    dprintf("ibcom_put_scratch_pad,wr.rdma.remote_addr=%llx\n", (unsigned long long)conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.rdma.remote_addr);
-    
+    dprintf("ibcom_put_scratch_pad,wr.rdma.remote_addr=%llx\n",
+            (unsigned long long) conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.rdma.remote_addr);
+
 #ifdef DCFA
     ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR]);
-    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_put_scratch_pad, ibv_post_send, rc=%d\n", ib_errno));
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1,
+                         dprintf("ibcom_put_scratch_pad, ibv_post_send, rc=%d\n", ib_errno));
 #else
     ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR], &bad_wr);
-    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_put_scratch_pad, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno, bad_wr));
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1,
+                         dprintf("ibcom_put_scratch_pad, ibv_post_send, rc=%d, bad_wr=%p\n",
+                                 ib_errno, bad_wr));
 #endif
 
     conp->ncom_scratch_pad += 1;
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 #endif
 
 /* poll completion queue */
-int ibcom_poll_cq(int which_cq, struct ibv_wc* wc, int* result) {
+int ibcom_poll_cq(int which_cq, struct ibv_wc *wc, int *result)
+{
     int ibcom_errno = 0;
 
-    switch(which_cq) {
+    switch (which_cq) {
     case IBCOM_RC_SHARED_RCQ:
         *result = ibv_poll_cq(rc_shared_rcq, 1, wc);
         break;
@@ -1610,38 +1808,42 @@ int ibcom_poll_cq(int which_cq, struct ibv_wc* wc, int* result) {
     }
 
     if (*result < 0) {
-        dprintf("ibcom_poll_cq,status=%08x,vendor_err=%08x,len=%d,opcode=%08x,wr_id=%016lx\n", wc->status, wc->vendor_err, wc->byte_len, wc->opcode, wc->wr_id);
+        dprintf("ibcom_poll_cq,status=%08x,vendor_err=%08x,len=%d,opcode=%08x,wr_id=%016lx\n",
+                wc->status, wc->vendor_err, wc->byte_len, wc->opcode, wc->wr_id);
         ibcom_errno = *result;
         goto fn_fail;
     }
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_reg_mr_connect(int condesc, void *rmem, int rkey) {
+int ibcom_reg_mr_connect(int condesc, void *rmem, int rkey)
+{
     int ibcom_errno = 0;
-    IbCom	*conp;
+    IbCom *conp;
     int i;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
-    switch(conp->open_flag) {
+    switch (conp->open_flag) {
     case IBCOM_OPEN_RC:
     case IBCOM_OPEN_RC_LMT_PUT:
         conp->icom_rmem[IBCOM_RDMAWR_TO] = rmem;
         conp->icom_rkey[IBCOM_RDMAWR_TO] = rkey;
         conp->icom_sr[IBCOM_SMT_NOINLINE].wr.rdma.rkey = conp->icom_rkey[IBCOM_RDMAWR_TO];
-        for(i = 0; i < IBCOM_SMT_INLINE_NCHAIN; i++) {
-            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].wr.rdma.rkey = conp->icom_rkey[IBCOM_RDMAWR_TO];
+        for (i = 0; i < IBCOM_SMT_INLINE_NCHAIN; i++) {
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].wr.rdma.rkey =
+                conp->icom_rkey[IBCOM_RDMAWR_TO];
         }
         break;
 
     case IBCOM_OPEN_SCRATCH_PAD:
         conp->icom_rmem[IBCOM_SCRATCH_PAD_TO] = rmem;
         conp->icom_rkey[IBCOM_SCRATCH_PAD_TO] = rkey;
-        conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.rdma.rkey = conp->icom_rkey[IBCOM_SCRATCH_PAD_TO];
+        conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.rdma.rkey =
+            conp->icom_rkey[IBCOM_SCRATCH_PAD_TO];
         break;
 
     default:
@@ -1649,19 +1851,20 @@ int ibcom_reg_mr_connect(int condesc, void *rmem, int rkey) {
         break;
     }
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_get_info_conn(int condesc, int key, void *out, uint32_t out_len) {
+int ibcom_get_info_conn(int condesc, int key, void *out, uint32_t out_len)
+{
     int ibcom_errno = 0;
-    IbCom	*conp;
+    IbCom *conp;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
 
-    switch(key) {
+    switch (key) {
     case IBCOM_INFOKEY_QP_QPN:
         memcpy(out, &conp->icom_qp->qp_num, out_len);
         break;
@@ -1677,52 +1880,56 @@ int ibcom_get_info_conn(int condesc, int key, void *out, uint32_t out_len) {
     case IBCOM_INFOKEY_PORT_GID:
         memcpy(out, &conp->icom_gid, out_len);
         break;
-    case IBCOM_INFOKEY_PATTR_MAX_MSG_SZ: {
+    case IBCOM_INFOKEY_PATTR_MAX_MSG_SZ:{
 #ifdef DCFA
-        uint32_t max_msg_sz = 1073741824; /* ConnectX-3 */
-        memcpy(out, &max_msg_sz, out_len);
+            uint32_t max_msg_sz = 1073741824;   /* ConnectX-3 */
+            memcpy(out, &max_msg_sz, out_len);
 #else
-        memcpy(out, &conp->icom_pattr.max_msg_sz, out_len);
+            memcpy(out, &conp->icom_pattr.max_msg_sz, out_len);
 #endif
-        break; }
+            break;
+        }
     default:
         ibcom_errno = -1;
         break;
     }
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_get_info_mr(int condesc, int memid, int key, void *out, int out_len) {
+int ibcom_get_info_mr(int condesc, int memid, int key, void *out, int out_len)
+{
     int ibcom_errno = 0;
-    IbCom	*conp;
-    struct ibv_mr	*mr;
+    IbCom *conp;
+    struct ibv_mr *mr;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
-    IBCOM_ERR_CHKANDJUMP(memid >= conp->icom_mrlen, -1, dprintf("ibcom_get_info_mr,wrong mem_id=%d\n", memid));
+    IBCOM_ERR_CHKANDJUMP(memid >= conp->icom_mrlen, -1,
+                         dprintf("ibcom_get_info_mr,wrong mem_id=%d\n", memid));
     mr = conp->icom_mrlist[memid];
 
-    switch(key) {
+    switch (key) {
     case IBCOM_INFOKEY_MR_ADDR:
 #ifdef DCFA
         /* host_addr is created by ibv_reg_mr in ibcomOpen, */
-        /* dcfa_init read this host-addr, put it into KVS, the counter-party read it through KVS*/
+        /* dcfa_init read this host-addr, put it into KVS, the counter-party read it through KVS */
         memcpy(out, &mr->host_addr, out_len);
 #else
         memcpy(out, &mr->addr, out_len);
 #endif
         break;
-    case IBCOM_INFOKEY_MR_LENGTH: {
+    case IBCOM_INFOKEY_MR_LENGTH:{
 #ifdef DCFA
-        assert(out_len == sizeof(size_t));
-        size_t length = mr->size; /* type of mr->size is int */
-        memcpy(out, &length, out_len);
+            assert(out_len == sizeof(size_t));
+            size_t length = mr->size;   /* type of mr->size is int */
+            memcpy(out, &length, out_len);
 #else
-        memcpy(out, &mr->length, out_len);
+            memcpy(out, &mr->length, out_len);
 #endif
-        break; }
+            break;
+        }
     case IBCOM_INFOKEY_MR_RKEY:
         memcpy(out, &mr->rkey, out_len);
         break;
@@ -1731,123 +1938,134 @@ int ibcom_get_info_mr(int condesc, int memid, int key, void *out, int out_len) {
         ibcom_errno = -1;
         break;
     }
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_mem_rdmawr_from(int condesc, void** out) {
-    IbCom	*conp;
+int ibcom_mem_rdmawr_from(int condesc, void **out)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
-    *out = conp->icom_mem[IBCOM_RDMAWR_FROM] + IBCOM_RDMABUF_SZSEG * (conp->sseq_num % IBCOM_RDMABUF_NSEG);
+    *out =
+        conp->icom_mem[IBCOM_RDMAWR_FROM] +
+        IBCOM_RDMABUF_SZSEG * (conp->sseq_num % IBCOM_RDMABUF_NSEG);
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_mem_rdmawr_to(int condesc, int seq_num, void** out) {
-    IbCom	*conp;
+int ibcom_mem_rdmawr_to(int condesc, int seq_num, void **out)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
     *out = conp->icom_mem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZSEG * (seq_num % IBCOM_RDMABUF_NSEG);
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_mem_udwr_from(int condesc, void** out) {
-    IbCom	*conp;
+int ibcom_mem_udwr_from(int condesc, void **out)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
     *out = conp->icom_mem[IBCOM_UDWR_FROM];
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_mem_udwr_to(int condesc, void** out) {
-    IbCom	*conp;
+int ibcom_mem_udwr_to(int condesc, void **out)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
     *out = conp->icom_mem[IBCOM_UDWR_TO];
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_sseq_num_get(int condesc, int* seq_num) {
-    IbCom	*conp;
+int ibcom_sseq_num_get(int condesc, int *seq_num)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
     *seq_num = conp->sseq_num;
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_lsr_seq_num_tail_get(int condesc, int** seq_num) {
-    IbCom	*conp;
+int ibcom_lsr_seq_num_tail_get(int condesc, int **seq_num)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
     *seq_num = &(conp->lsr_seq_num_tail);
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_rsr_seq_num_tail_get(int condesc, int** seq_num) {
-    IbCom	*conp;
+int ibcom_rsr_seq_num_tail_get(int condesc, int **seq_num)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
     *seq_num = &(conp->rsr_seq_num_tail);
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_rsr_seq_num_tail_last_sent_get(int condesc, int** seq_num) {
-    IbCom	*conp;
+int ibcom_rsr_seq_num_tail_last_sent_get(int condesc, int **seq_num)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
     *seq_num = &(conp->rsr_seq_num_tail_last_sent);
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_rdmabuf_occupancy_notify_rate_get(int condesc, int* notify_rate) {
-    IbCom	*conp;
+int ibcom_rdmabuf_occupancy_notify_rate_get(int condesc, int *notify_rate)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
 
-    switch(conp->rdmabuf_occupancy_notify_lstate) {
+    switch (conp->rdmabuf_occupancy_notify_lstate) {
     case IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW:
         *notify_rate = IBCOM_RDMABUF_OCCUPANCY_NOTIFY_RATE_HW;
         break;
@@ -1855,59 +2073,64 @@ int ibcom_rdmabuf_occupancy_notify_rate_get(int condesc, int* notify_rate) {
         *notify_rate = IBCOM_RDMABUF_OCCUPANCY_NOTIFY_RATE_LW;
         break;
     default:
-        ibcom_errno = -1; goto fn_fail;
+        ibcom_errno = -1;
+        goto fn_fail;
         break;
     }
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_rdmabuf_occupancy_notify_rstate_get(int condesc, int** rstate) {
-    IbCom	*conp;
+int ibcom_rdmabuf_occupancy_notify_rstate_get(int condesc, int **rstate)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
     *rstate = &(conp->rdmabuf_occupancy_notify_rstate);
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_rdmabuf_occupancy_notify_lstate_get(int condesc, int** lstate) {
-    IbCom	*conp;
+int ibcom_rdmabuf_occupancy_notify_lstate_get(int condesc, int **lstate)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
     *lstate = &(conp->rdmabuf_occupancy_notify_lstate);
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_obtain_pointer(int condesc, IbCom** ibcom) {
-    IbCom	*conp;
+int ibcom_obtain_pointer(int condesc, IbCom ** ibcom)
+{
+    IbCom *conp;
     int ibcom_errno = 0;
 
     RANGE_CHECK_WITH_ERROR(condesc, conp);
     *ibcom = conp;
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-void ibcomShow(int condesc) {
-    IbCom	*conp;
-    uint8_t	*p;
-    int		i;
+void ibcomShow(int condesc)
+{
+    IbCom *conp;
+    uint8_t *p;
+    int i;
 
     RANGE_CHECK(condesc, conp);
     fprintf(stdout, "qp_num = %d\n", conp->icom_qp->qp_num);
@@ -1916,7 +2139,7 @@ void ibcomShow(int condesc) {
 #else
     fprintf(stdout, "lid    = %d\n", conp->icom_pattr.lid);
 #endif
-    p = (uint8_t*) &conp->icom_gid;
+    p = (uint8_t *) & conp->icom_gid;
     fprintf(stdout, "gid    = %02x", p[0]);
     for (i = 1; i < 16; i++) {
         fprintf(stdout, ":%02x", p[i]);
@@ -1925,47 +2148,55 @@ void ibcomShow(int condesc) {
 }
 
 static char *strerror_tbl[] = {
-	[0] = "zero",
-	[1] = "one",
-	[2] = "two",
-	[3] = "three",
+    [0] = "zero",
+    [1] = "one",
+    [2] = "two",
+    [3] = "three",
 };
 
-char* ibcom_strerror(int errno) {
-    char* r;
-    if(-errno > 3) {
+char *ibcom_strerror(int errno)
+{
+    char *r;
+    if (-errno > 3) {
         r = malloc(256);
         sprintf(r, "%d", -errno);
         goto fn_exit;
-    } else {
+    }
+    else {
         r = strerror_tbl[-errno];
     }
- fn_exit:
+  fn_exit:
     return r;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_reg_mr(void *addr, int len, struct ibv_mr **mr) {
+int ibcom_reg_mr(void *addr, int len, struct ibv_mr **mr)
+{
     int ibcom_errno = 0;
-	dprintf("ibcom_reg_mr,addr=%p,len=%d,mr=%p\n", addr, len, mr);
+    dprintf("ibcom_reg_mr,addr=%p,len=%d,mr=%p\n", addr, len, mr);
 
-    *mr = ibv_reg_mr(ib_pd, addr, len, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
+    *mr =
+        ibv_reg_mr(ib_pd, addr, len,
+                   IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
 
     IBCOM_ERR_CHKANDJUMP(*mr == 0, -1, dprintf("ibcom_reg_mr,cannot register memory\n"));
 
- fn_exit:
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
-int ibcom_dereg_mr(struct ibv_mr *mr) {
-	int i;
-	int ib_errno;
+int ibcom_dereg_mr(struct ibv_mr *mr)
+{
+    int i;
+    int ib_errno;
     int ibcom_errno = 0;
 
-    if (!mr) { goto fn_exit; }
+    if (!mr) {
+        goto fn_exit;
+    }
 
     ib_errno = ibv_dereg_mr(mr);
     if (ib_errno < 0) {
@@ -1977,10 +2208,9 @@ int ibcom_dereg_mr(struct ibv_mr *mr) {
 #else
     dprintf("ibcom_dereg_mr, addr=%p\n", mr->addr);
 #endif
-    
- fn_exit:
+
+  fn_exit:
     return ibcom_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
-
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.h b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.h
index 3da5c2d..3a3f5b4 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.h
@@ -14,17 +14,17 @@
 #include <sys/types.h>
 #include <sys/mman.h>
 
-//#define DEBUG_ON	1
+//#define DEBUG_ON      1
 #ifdef DEBUG_ON
-#define DEBUG	if(dflag)
+#define DEBUG	if (dflag)
 #else
-#define DEBUG	if(0)
+#define DEBUG	if (0)
 #endif
 
 #ifdef DCFA
 #include "dcfa.h"
 
-/* 
+/*
 *** diff -p verbs.h dcfa.h (structures)
 same name, same fields
    struct ibv_device { };
@@ -46,7 +46,7 @@ same name, different fields
 +  int size;
 -  uint32_t handle;
 +  uint64_t handle;
-+  int flag;  1: offload 
++  int flag;  1: offload
 -  uint32_t lkey;
 +  int lkey;
 -  uint32_t rkey;
@@ -147,10 +147,10 @@ same name, same arguments
 same name, different arguments
 -  int ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr)
 +  int ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr);
-   
+
 -  int ibv_post_recv(struct ibv_qp *qp, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr)
 +  int ibv_post_recv(struct ibv_qp *qp, struct ibv_recv_wr *wr);
-   
+
 -  struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe, void *cq_context, struct ibv_comp_channel *channel, int comp_vector);
 +  struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe_max);
 
@@ -167,37 +167,35 @@ struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr);
 #include <infiniband/verbs.h>
 #endif
 
-static inline unsigned long long
-getCPUCounter(void)
+static inline unsigned long long getCPUCounter(void)
 {
     unsigned int lo, hi;
-    __asm__ __volatile__ (      // serialize
-	"xorl %%eax,%%eax \n        cpuid"
-	::: "%rax", "%rbx", "%rcx", "%rdx");
-    __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
-    return (unsigned long long)hi << 32 | lo;
+    __asm__ __volatile__(// serialize
+                            "xorl %%eax,%%eax \n        cpuid":::"%rax", "%rbx", "%rcx", "%rdx");
+    __asm__ __volatile__("rdtsc":"=a"(lo), "=d"(hi));
+    return (unsigned long long) hi << 32 | lo;
 }
 
-extern struct ibv_cq		*rc_shared_scq;
-extern struct ibv_cq		*rc_shared_scq_lmt_put;
-extern struct ibv_cq		*rc_shared_scq_scratch_pad;
+extern struct ibv_cq *rc_shared_scq;
+extern struct ibv_cq *rc_shared_scq_lmt_put;
+extern struct ibv_cq *rc_shared_scq_scratch_pad;
 
-#define IBCOM_SIZE		2048 /* one process uses 2-4 fds */
-#define IBCOM_INLINE_DATA (512-64) /* experimented max is 884 */ /* this is lower bound and more than this value is set. the more this value is, the more the actual value set is. you need to check it */
+#define IBCOM_SIZE		2048    /* one process uses 2-4 fds */
+#define IBCOM_INLINE_DATA (512-64) /* experimented max is 884 */        /* this is lower bound and more than this value is set. the more this value is, the more the actual value set is. you need to check it */
 
 #define IBCOM_MAX_SQ_CAPACITY (256/1)
-#define IBCOM_MAX_RQ_CAPACITY ((IBCOM_MAX_SQ_CAPACITY)+16) /* We pre-post_recv IBCOM_MAX_SQ_CAPACITY of commands */
-#define IBCOM_MAX_SGE_CAPACITY (32/2) /* maximum for ConnectX-3 looks like 32 */
+#define IBCOM_MAX_RQ_CAPACITY ((IBCOM_MAX_SQ_CAPACITY)+16)      /* We pre-post_recv IBCOM_MAX_SQ_CAPACITY of commands */
+#define IBCOM_MAX_SGE_CAPACITY (32/2)   /* maximum for ConnectX-3 looks like 32 */
 #define IBCOM_MAX_CQ_CAPACITY IBCOM_MAX_RQ_CAPACITY
-#define IBCOM_MAX_CQ_HEIGHT_DRAIN (((IBCOM_MAX_CQ_CAPACITY)>>2)+((IBCOM_MAX_CQ_CAPACITY)>>1)) /* drain when reaching this amount */
-#define IBCOM_MAX_SQ_HEIGHT_DRAIN (((IBCOM_MAX_SQ_CAPACITY)>>2)+((IBCOM_MAX_SQ_CAPACITY)>>1)) /* drain when reaching this amount */
+#define IBCOM_MAX_CQ_HEIGHT_DRAIN (((IBCOM_MAX_CQ_CAPACITY)>>2)+((IBCOM_MAX_CQ_CAPACITY)>>1))   /* drain when reaching this amount */
+#define IBCOM_MAX_SQ_HEIGHT_DRAIN (((IBCOM_MAX_SQ_CAPACITY)>>2)+((IBCOM_MAX_SQ_CAPACITY)>>1))   /* drain when reaching this amount */
 #define IBCOM_AMT_CQ_DRAIN ((IBCOM_MAX_CQ_CAPACITY)>>2) /* drain this amount */
 #define IBCOM_MAX_RD_ATOMIC 4
 
 #define IBCOM_MAX_TRIES		 1
 #define IBCOM_SCQ_FLG		 1
 #define IBCOM_RCQ_FLG		 2
-    
+
 #define IBCOM_INFOKEY_PATTR_MAX_MSG_SZ 100
 #define IBCOM_INFOKEY_MR_ADDR 200
 #define IBCOM_INFOKEY_MR_LENGTH 201
@@ -208,126 +206,129 @@ extern struct ibv_cq		*rc_shared_scq_scratch_pad;
 
 
 /* buffers */
-#define IBCOM_NBUF_RDMA 2 /* number of <addr, sz, lkey, rkey> */
-#define IBCOM_RDMAWR_FROM 0 /* index to RDMA-write-from buffer */
-#define IBCOM_RDMAWR_TO 1 /* index to RDMA-write-to buffer */
-/* assuming that the unit (32768) is equals to eager-RDMA-write threashold 
-   assuming that the multiplier (256) is 
+#define IBCOM_NBUF_RDMA 2       /* number of <addr, sz, lkey, rkey> */
+#define IBCOM_RDMAWR_FROM 0     /* index to RDMA-write-from buffer */
+#define IBCOM_RDMAWR_TO 1       /* index to RDMA-write-to buffer */
+/* assuming that the unit (32768) is equals to eager-RDMA-write threashold
+   assuming that the multiplier (256) is
    equals to max number of outstanding eager-RDMA-write transactions */
-#define IBCOM_RDMABUF_SZSEG (16384/4)//(16384+8+40+1) /* this size minus magics and headers must be 2^n because data might grow to the next 2^m boundary, see dcfa_impl.h, dcfa_ibcom.c, src/mpid/ch3/src/mpid_isend.c */
-#define IBCOM_RDMABUF_SZ ((IBCOM_RDMABUF_SZSEG) * 16) /* (32768 * 256) */  
-#define IBCOM_RDMABUF_NSEG ((IBCOM_RDMABUF_SZ) / (IBCOM_RDMABUF_SZSEG)) 
-#define IBCOM_SMT_INLINE_NCHAIN 8 /* maximum number of chained inline-send commands */
+#define IBCOM_RDMABUF_SZSEG (16384/4)   //(16384+8+40+1) /* this size minus magics and headers must be 2^n because data might grow to the next 2^m boundary, see dcfa_impl.h, dcfa_ibcom.c, src/mpid/ch3/src/mpid_isend.c */
+#define IBCOM_RDMABUF_SZ ((IBCOM_RDMABUF_SZSEG) * 16)   /* (32768 * 256) */
+#define IBCOM_RDMABUF_NSEG ((IBCOM_RDMABUF_SZ) / (IBCOM_RDMABUF_SZSEG))
+#define IBCOM_SMT_INLINE_NCHAIN 8       /* maximum number of chained inline-send commands */
 #define IBCOM_RDMABUF_HIGH_WATER_MARK (((IBCOM_RDMABUF_NSEG)>>1)+((IBCOM_RDMABUF_NSEG)>>2))
 #define IBCOM_RDMABUF_LOW_WATER_MARK (((IBCOM_RDMABUF_NSEG)>>2))
 #define IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW 1
 #define IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_LW 2
 #define IBCOM_RDMABUF_OCCUPANCY_NOTIFY_RATE_HW /*1*/(((IBCOM_RDMABUF_NSEG)>>4) == 0 ? 1 : ((IBCOM_RDMABUF_NSEG)>>4))
-#define IBCOM_RDMABUF_OCCUPANCY_NOTIFY_RATE_LW (((IBCOM_RDMABUF_NSEG)>>2)) /*12*/ /* receiver tries to notify sender the number of releases when receiver find not-noticed releases of more than this number */
-#define IBCOM_RDMABUF_OCCUPANCY_NOTIFY_RATE_DELAY_MULTIPLIER(notify_rate) /*(notify_rate + (notify_rate>>1))*/(notify_rate) /* send seq_num to the sender side if there is no chance to embed seq_num into a packet bound for the sender side for this number of release events */
+#define IBCOM_RDMABUF_OCCUPANCY_NOTIFY_RATE_LW (((IBCOM_RDMABUF_NSEG)>>2)) /*12*/       /* receiver tries to notify sender the number of releases when receiver find not-noticed releases of more than this number */
+#define IBCOM_RDMABUF_OCCUPANCY_NOTIFY_RATE_DELAY_MULTIPLIER(notify_rate) /*(notify_rate + (notify_rate>>1))*/(notify_rate)     /* send seq_num to the sender side if there is no chance to embed seq_num into a packet bound for the sender side for this number of release events */
 
 #define IBCOM_NBUF_UD 2 /* number of <addr, sz, lkey, rkey> */
-#define IBCOM_UDWR_FROM 0 /* index to UD-write-from buffer */
+#define IBCOM_UDWR_FROM 0       /* index to UD-write-from buffer */
 #define IBCOM_UDWR_TO 1 /* index to UD-write-to buffer */
-#define IBCOM_UDBUF_SZ (128 * 8192) /* supporting 100K ranks with 10 rounds */ 
+#define IBCOM_UDBUF_SZ (128 * 8192)     /* supporting 100K ranks with 10 rounds */
 #define IBCOM_UDBUF_SZSEG (128)
-#define IBCOM_UDBUF_NSEG (IBCOM_UDBUF_SZ / IBCOM_UDBUF_SZSEG) 
+#define IBCOM_UDBUF_NSEG (IBCOM_UDBUF_SZ / IBCOM_UDBUF_SZSEG)
 
-#define IBCOM_NBUF_SCRATCH_PAD 1 /* number of <addr, sz, lkey, rkey> */
-#define IBCOM_SCRATCH_PAD_TO 0 /* index to RDMA-write-to buffer */
+#define IBCOM_NBUF_SCRATCH_PAD 1        /* number of <addr, sz, lkey, rkey> */
+#define IBCOM_SCRATCH_PAD_TO 0  /* index to RDMA-write-to buffer */
 
 /* send command templates */
-#define IBCOM_RC_SR_NTEMPLATE (8+1+2) /* number of request templates, 8 for inline-chained-smt, 1 for smt, 1 for lmt */
-#define IBCOM_SMT_INLINE_CHAINED0 0 /* index to it */
-#define IBCOM_SMT_INLINE_CHAINED7 7 
-#define IBCOM_SMT_NOINLINE 8 
-#define IBCOM_LMT_INITIATOR 9 /* FIXME: bad naming */
+#define IBCOM_RC_SR_NTEMPLATE (8+1+2)   /* number of request templates, 8 for inline-chained-smt, 1 for smt, 1 for lmt */
+#define IBCOM_SMT_INLINE_CHAINED0 0     /* index to it */
+#define IBCOM_SMT_INLINE_CHAINED7 7
+#define IBCOM_SMT_NOINLINE 8
+#define IBCOM_LMT_INITIATOR 9   /* FIXME: bad naming */
 
-#define IBCOM_RC_SR_LMT_PUT_NTEMPLATE IBCOM_RC_SR_NTEMPLATE /* FIXME: TEMPLATE named IBCOM_RC_SR shares IBCOM_LMT_PUT */
+#define IBCOM_RC_SR_LMT_PUT_NTEMPLATE IBCOM_RC_SR_NTEMPLATE     /* FIXME: TEMPLATE named IBCOM_RC_SR shares IBCOM_LMT_PUT */
 #define IBCOM_LMT_PUT 10
 
 /* recv command templates */
-#define IBCOM_RC_RR_NTEMPLATE 1  /* 1 for smt, */
-#define IBCOM_RDMAWR_RESPONDER  0 /* index to recv request template */
+#define IBCOM_RC_RR_NTEMPLATE 1 /* 1 for smt, */
+#define IBCOM_RDMAWR_RESPONDER  0       /* index to recv request template */
 
 /* sge template */
-#define IBCOM_SMT_INLINE_INITIATOR_NSGE 4 /* MPI header, (sz;magic), data x1, magic */
-#define IBCOM_SMT_NOINLINE_INITIATOR_NSGE 4 /* MPI header, (sz;magic), data x1, magic */
-#define IBCOM_LMT_INITIATOR_NSGE 1 /* data x1 */
-#define IBCOM_LMT_PUT_NSGE 1 /* data x1 */
-#define IBCOM_SCRATCH_PAD_INITIATOR_NSGE 1 /* QP state */
+#define IBCOM_SMT_INLINE_INITIATOR_NSGE 4       /* MPI header, (sz;magic), data x1, magic */
+#define IBCOM_SMT_NOINLINE_INITIATOR_NSGE 4     /* MPI header, (sz;magic), data x1, magic */
+#define IBCOM_LMT_INITIATOR_NSGE 1      /* data x1 */
+#define IBCOM_LMT_PUT_NSGE 1    /* data x1 */
+#define IBCOM_SCRATCH_PAD_INITIATOR_NSGE 1      /* QP state */
 
-#define IBCOM_UD_SR_NTEMPLATE 1 
-#define IBCOM_UD_RR_NTEMPLATE 1 
-#define IBCOM_UD_INITIATOR 0 /* index to send request template */
-#define IBCOM_UD_RESPONDER 0 /* index to recv request template */
+#define IBCOM_UD_SR_NTEMPLATE 1
+#define IBCOM_UD_RR_NTEMPLATE 1
+#define IBCOM_UD_INITIATOR 0    /* index to send request template */
+#define IBCOM_UD_RESPONDER 0    /* index to recv request template */
 
-#define IBCOM_SCRATCH_PAD_SR_NTEMPLATE 2 
-#define IBCOM_SCRATCH_PAD_RR_NTEMPLATE 1 
-#define IBCOM_SCRATCH_PAD_INITIATOR 0 /* index to send request template */
-#define IBCOM_SCRATCH_PAD_CAS       1 
-#define IBCOM_SCRATCH_PAD_RESPONDER 0 /* index to recv request template */
+#define IBCOM_SCRATCH_PAD_SR_NTEMPLATE 2
+#define IBCOM_SCRATCH_PAD_RR_NTEMPLATE 1
+#define IBCOM_SCRATCH_PAD_INITIATOR 0   /* index to send request template */
+#define IBCOM_SCRATCH_PAD_CAS       1
+#define IBCOM_SCRATCH_PAD_RESPONDER 0   /* index to recv request template */
 
 
 typedef struct IbCom {
-    short			icom_used;
-    short			icom_connected;
-    int				icom_port;
+    short icom_used;
+    short icom_connected;
+    int icom_port;
 #ifdef DCFA
 #else
-    struct ibv_port_attr	icom_pattr;	/* IB port attributes */
+    struct ibv_port_attr icom_pattr;    /* IB port attributes */
 #endif
-    struct ibv_qp		*icom_qp;
-    struct ibv_cq		*icom_scq;
-    struct ibv_cq		*icom_rcq;
-    struct ibv_mr		**icom_mrlist;
-    int				icom_mrlen;
-    union  ibv_gid		icom_gid;
-    void			**icom_mem;	/* 0: send 1: recv 2..: rdma */
-    int				*icom_msize;	/* 0: send 1: recv 2..: rdma */
+    struct ibv_qp *icom_qp;
+    struct ibv_cq *icom_scq;
+    struct ibv_cq *icom_rcq;
+    struct ibv_mr **icom_mrlist;
+    int icom_mrlen;
+    union ibv_gid icom_gid;
+    void **icom_mem;            /* 0: send 1: recv 2..: rdma */
+    int *icom_msize;            /* 0: send 1: recv 2..: rdma */
     struct ibv_send_wr *icom_sr;
     struct ibv_ah_attr *icom_ah_attr;
     struct ibv_recv_wr *icom_rr;
-    void			**icom_rmem;
-    int				*icom_rkey;
-    size_t			*icom_rsize;
+    void **icom_rmem;
+    int *icom_rkey;
+    size_t *icom_rsize;
     int sseq_num;
     int rsr_seq_num_poll;
-    int rsr_seq_num_tail; /* occupation status of remote Send Request (SR) queue (it covers occupation status of local RDMA-wr-to buffer) */
-    int rsr_seq_num_tail_last_sent; /* latest one sent to remote rank */
-    int lsr_seq_num_tail; /* occupation status of local Send Request (SR) queue */
-    int lsr_seq_num_tail_last_requested;  /* value when lmt_start_send issued req_seq_num */
+    int rsr_seq_num_tail;       /* occupation status of remote Send Request (SR) queue (it covers occupation status of local RDMA-wr-to buffer) */
+    int rsr_seq_num_tail_last_sent;     /* latest one sent to remote rank */
+    int lsr_seq_num_tail;       /* occupation status of local Send Request (SR) queue */
+    int lsr_seq_num_tail_last_requested;        /* value when lmt_start_send issued req_seq_num */
     int rdmabuf_occupancy_notify_rstate, rdmabuf_occupancy_notify_lstate;
-    int ncom, ncom_lmt_put, ncom_scratch_pad; /* number of entries in the command queue */
+    int ncom, ncom_lmt_put, ncom_scratch_pad;   /* number of entries in the command queue */
 
-    uint32_t max_inline_data; /* actual value obtained after ibv_create_qp */
+    uint32_t max_inline_data;   /* actual value obtained after ibv_create_qp */
     uint32_t max_send_wr;
     uint32_t max_recv_wr;
 
-    uint32_t open_flag; /* IBCOM_OPEN_UD, ... */
-    uint16_t remote_lid; /* for debug */
-    
+    uint32_t open_flag;         /* IBCOM_OPEN_UD, ... */
+    uint16_t remote_lid;        /* for debug */
+
     /* other commands can be executed before RDMA-rd command */
     /* see the "Ordering and the Fence Indicator" section in "InfiniBand Architecture" by William T. Futral */
     uint16_t after_rdma_rd;
-    
-    uint64_t rsr_seq_num_released[(IBCOM_RDMABUF_NSEG+63)/64];
+
+    uint64_t rsr_seq_num_released[(IBCOM_RDMABUF_NSEG + 63) / 64];
 
 } IbCom;
 
-extern int ibcomOpen(int ib_port, int ibcom_open_flag, int* condesc);
+extern int ibcomOpen(int ib_port, int ibcom_open_flag, int *condesc);
 extern int ibcom_alloc(int condesc, int sz);
 extern int ibcom_close(int);
 extern int ibcom_rts(int condesc, int remote_qpnum, uint16_t remote_lid, union ibv_gid *remote_gid);
 
 extern int ibcom_reg_mr_connect(int condesc, void *rmem, int rkey);
-extern int ibcom_isend(int condesc, uint64_t wr_id, void* prefix, int sz_prefix, void* hdr, int sz_hdr, void* data, int sz_data, int* copied);
+extern int ibcom_isend(int condesc, uint64_t wr_id, void *prefix, int sz_prefix, void *hdr,
+                       int sz_hdr, void *data, int sz_data, int *copied);
 //extern int ibcom_isend(int condesc, uint64_t wr_id, void* hdr, int sz_hdr, void* data, int sz_data);
 extern int ibcom_irecv(int condesc, uint64_t wr_id);
-extern int ibcom_udsend(int condesc, union ibv_gid* remote_gid, uint16_t remote_lid, uint32_t remote_qpn, uint32_t imm_data, uint64_t wr_id);
+extern int ibcom_udsend(int condesc, union ibv_gid *remote_gid, uint16_t remote_lid,
+                        uint32_t remote_qpn, uint32_t imm_data, uint64_t wr_id);
 extern int ibcom_udrecv(int condesc);
-extern int ibcom_lrecv(int condesc, uint64_t wr_id, void* raddr, int sz_data, uint32_t rkey, void* laddr);
-extern int ibcom_poll_cq(int which_cq, struct ibv_wc* wc, int* result);
+extern int ibcom_lrecv(int condesc, uint64_t wr_id, void *raddr, int sz_data, uint32_t rkey,
+                       void *laddr);
+extern int ibcom_poll_cq(int which_cq, struct ibv_wc *wc, int *result);
 
 /* for dcfa_reg_mr.c */
 extern int ibcom_reg_mr(void *addr, int len, struct ibv_mr **mr);
@@ -335,28 +336,28 @@ extern int ibcom_reg_mr(void *addr, int len, struct ibv_mr **mr);
 extern int ibcom_get_info_conn(int condesc, int key, void *out, uint32_t out_len);
 extern int ibcom_get_info_mr(int condesc, int memid, int key, void *out, int out_len);
 
-extern int ibcom_lsr_seq_num_tail_get(int condesc, int** seq_num);
-extern int ibcom_rsr_seq_num_tail_get(int condesc, int** seq_num);
-extern int ibcom_rsr_seq_num_tail_last_sent_get(int condesc, int** seq_num);
-extern int ibcom_rdmabuf_occupancy_notify_rate_get(int condesc, int* notify_rate);
-extern int ibcom_rdmabuf_occupancy_notify_rstate_get(int condesc, int** rstate);
-extern int ibcom_rdmabuf_occupancy_notify_lstate_get(int condesc, int** lstate);
+extern int ibcom_lsr_seq_num_tail_get(int condesc, int **seq_num);
+extern int ibcom_rsr_seq_num_tail_get(int condesc, int **seq_num);
+extern int ibcom_rsr_seq_num_tail_last_sent_get(int condesc, int **seq_num);
+extern int ibcom_rdmabuf_occupancy_notify_rate_get(int condesc, int *notify_rate);
+extern int ibcom_rdmabuf_occupancy_notify_rstate_get(int condesc, int **rstate);
+extern int ibcom_rdmabuf_occupancy_notify_lstate_get(int condesc, int **lstate);
 
-extern int ibcomMemInfo(int, int, void**, size_t*, int*);
-extern char* ibcom_strerror(int);
+extern int ibcomMemInfo(int, int, void **, size_t *, int *);
+extern char *ibcom_strerror(int);
 extern int dflag;
 
-extern int ibcom_mem_rdmawr_from(int condesc, void** out);
-extern int ibcom_mem_rdmawr_to(int condesc, int seq_num, void** out);
-extern int ibcom_mem_udwr_from(int condesc, void** out);
-extern int ibcom_mem_udwr_to(int condesc, void** out);
+extern int ibcom_mem_rdmawr_from(int condesc, void **out);
+extern int ibcom_mem_rdmawr_to(int condesc, int seq_num, void **out);
+extern int ibcom_mem_udwr_from(int condesc, void **out);
+extern int ibcom_mem_udwr_to(int condesc, void **out);
 
 /* dcfa_reg_mr.c */
 extern void ibcom_RegisterCacheInit();
-extern struct ibv_mr *ibcom_reg_mr_fetch(void *addr, int len); 
+extern struct ibv_mr *ibcom_reg_mr_fetch(void *addr, int len);
 
 /* dcfa_ctlmsg.c */
-extern int ibcom_udbuf_init(void* q);
+extern int ibcom_udbuf_init(void *q);
 
 #define IBCOM_RC_SHARED_RCQ 0
 #define IBCOM_RC_SHARED_SCQ 1
@@ -365,7 +366,7 @@ extern int ibcom_udbuf_init(void* q);
 #define IBCOM_RC_SHARED_SCQ_LMT_PUT 4
 
 /* flag for open */
-#define IBCOM_OPEN_RC            0x01 
+#define IBCOM_OPEN_RC            0x01
 /* for MPI control message, eager send, rendezvous protocol,
    so via RC-send/recv or RDMA-write/RDMA-read */
 
@@ -381,7 +382,7 @@ extern int ibcom_udbuf_init(void* q);
    so via RDMA-write */
 
 #define IBCOM_ERR_SETANDJUMP(errno, stmt) { stmt; ibcom_errno = errno; goto fn_fail; }
-#define IBCOM_ERR_CHKANDJUMP(cond, errno, stmt) if(cond) { stmt; ibcom_errno = errno; goto fn_fail; }
+#define IBCOM_ERR_CHKANDJUMP(cond, errno, stmt) if (cond) { stmt; ibcom_errno = errno; goto fn_fail; }
 
 #define IBCOM_QKEY 0x1234
 #define IBCOM_MAGIC 0x55
@@ -398,8 +399,8 @@ typedef struct tailmagic_t {
 } tailmagic_t;
 
 #define DCFA_NEM_SZ_DATA_POW2(sz) \
-    for(sz_data_pow2 = 15; sz_data_pow2 < (sz); sz_data_pow2 = ( (((sz_data_pow2 + 1) << 1) - 1) > IBCOM_RDMABUF_SZSEG - sizeof(tailmagic_t) ) ? IBCOM_RDMABUF_SZSEG - sizeof(tailmagic_t) : (((sz_data_pow2 + 1) << 1) - 1) ) { } \
-        if(sz_data_pow2 > IBCOM_RDMABUF_SZSEG - sizeof(tailmagic_t)) { printf("assertion failed\n"); }; \
+    for(sz_data_pow2 = 15; sz_data_pow2 < (sz); sz_data_pow2 = ((((sz_data_pow2 + 1) << 1) - 1) > IBCOM_RDMABUF_SZSEG - sizeof(tailmagic_t)) ? IBCOM_RDMABUF_SZSEG - sizeof(tailmagic_t) : (((sz_data_pow2 + 1) << 1) - 1)) { } \
+        if (sz_data_pow2 > IBCOM_RDMABUF_SZSEG - sizeof(tailmagic_t)) { printf("assertion failed\n"); }; \
 
 #define DCFA_NEM_MAX_DATA_POW2 (IBCOM_RDMABUF_SZSEG - sizeof(tailmagic_t))
 
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_impl.h b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_impl.h
index d41a372..82fa93c 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_impl.h
@@ -13,12 +13,12 @@
 #include <sys/types.h>
 #include <errno.h>
 
-#define LMT_GET_CQE /* detect RDMA completion by CQE */
+#define LMT_GET_CQE     /* detect RDMA completion by CQE */
 //#define LMT_PUT_DONE
 #define DISABLE_VAR_OCC_NOTIFY_RATE
-/* lmt-put: 
+/* lmt-put:
    (1) receiver sends cts to sender (2) sender RDMA-write to receiver
-   (3) sender fetch CQE (4) receiver polls on end-flag 
+   (3) sender fetch CQE (4) receiver polls on end-flag
 */
 //#define DCFA_ONDEMAND
 
@@ -41,11 +41,11 @@ typedef GENERIC_Q_DECL(struct MPID_Request) MPID_nem_dcfa_sendq_t;
    on the network module, facilitating dynamic module loading. */
 typedef struct {
     dcfaconn_t *sc;
-    int pending_sends; /* number of send in flight */
+    int pending_sends;          /* number of send in flight */
     IbCom *ibcom, *ibcom_lmt_put;
-    MPID_nem_dcfa_sendq_t sendq; /* overflow queue for IB commands */
+    MPID_nem_dcfa_sendq_t sendq;        /* overflow queue for IB commands */
     MPID_nem_dcfa_sendq_t sendq_lmt_put;
-    int is_connected; /* dynamic connection, checked in iSendContig, protocol processed there and in progress engine */
+    int is_connected;           /* dynamic connection, checked in iSendContig, protocol processed there and in progress engine */
 } MPID_nem_dcfa_vc_area;
 
 /* macro for secret area in vc */
@@ -57,16 +57,16 @@ typedef struct {
    private fields This removes all dependencies from the req structure
    on the network module, facilitating dynamic module loading. */
 typedef struct {
-    int seq_num; /* NOT USED, DELETE IT: sequence number of SR which RDMA-RD for lmt releases in dcfa_poll */
-    struct MPID_Request *lmt_next; /* for lmtq */
-    struct MPID_Request *sendq_next; /* for sendq */
-    void* lmt_raddr; /* remember this for sendq, it might be better to use sreq->dev.iov[0].MPID_IOV_BUF instead */
-    uint32_t lmt_rkey; /* remember this for sendq, survive over lrecv and referenced when dequeueing from sendq */
-    uint32_t lmt_szsend; /* remember this for sendq */
-    uint8_t lmt_tail, lmt_sender_tail, lmt_receiver_tail; /* survive over lrecv and referenced when polling */
-    MPI_Aint lmt_dt_true_lb; /* to locate the last byte of receive buffer */
-    void* lmt_write_to_buf; /* user buffer or temporary buffer for pack and remember it for lmt_orderq */
-    void* lmt_pack_buf; /* to pack non-contiguous data */
+    int seq_num;                /* NOT USED, DELETE IT: sequence number of SR which RDMA-RD for lmt releases in dcfa_poll */
+    struct MPID_Request *lmt_next;      /* for lmtq */
+    struct MPID_Request *sendq_next;    /* for sendq */
+    void *lmt_raddr;            /* remember this for sendq, it might be better to use sreq->dev.iov[0].MPID_IOV_BUF instead */
+    uint32_t lmt_rkey;          /* remember this for sendq, survive over lrecv and referenced when dequeueing from sendq */
+    uint32_t lmt_szsend;        /* remember this for sendq */
+    uint8_t lmt_tail, lmt_sender_tail, lmt_receiver_tail;       /* survive over lrecv and referenced when polling */
+    MPI_Aint lmt_dt_true_lb;    /* to locate the last byte of receive buffer */
+    void *lmt_write_to_buf;     /* user buffer or temporary buffer for pack and remember it for lmt_orderq */
+    void *lmt_pack_buf;         /* to pack non-contiguous data */
 } MPID_nem_dcfa_req_area;
 
 /* macro for secret area in req */
@@ -77,8 +77,8 @@ typedef struct {
 /* sreq is never enqueued into posted-queue nor unexpected-queue, so we can reuse sreq->dev.next */
 #define MPID_nem_dcfa_sendq_empty(q) GENERICM_Q_EMPTY (q)
 #define MPID_nem_dcfa_sendq_head(q) GENERICM_Q_HEAD (q)
-#define MPID_nem_dcfa_sendq_next_field(ep, next_field) REQ_FIELD(ep, next_field) 
-#define MPID_nem_dcfa_sendq_next(ep) REQ_FIELD(ep, sendq_next) 
+#define MPID_nem_dcfa_sendq_next_field(ep, next_field) REQ_FIELD(ep, next_field)
+#define MPID_nem_dcfa_sendq_next(ep) REQ_FIELD(ep, sendq_next)
 //#define MPID_nem_dcfa_sendq_next(ep) (ep->dev.next) /*takagi*/
 #define MPID_nem_dcfa_sendq_enqueue(qp, ep) GENERICM_Q_ENQUEUE (qp, ep, MPID_nem_dcfa_sendq_next_field, sendq_next);
 #define MPID_nem_dcfa_sendq_enqueue_at_head(qp, ep) GENERICM_Q_ENQUEUE_AT_HEAD(qp, ep, MPID_nem_dcfa_sendq_next_field, sendq_next);
@@ -89,19 +89,19 @@ typedef GENERIC_Q_DECL(struct MPID_Request) MPID_nem_dcfa_lmtq_t;
 
 /* connection manager */
 typedef struct {
-    int remote_rank; 
-    uint32_t type; /* SYN */
-    uint32_t qpn; /* QPN for eager-send channel */
-    uint32_t rkey; /* key for RDMA-write-to buffer of eager-send channel */
-    void* rmem; /* address of RDMA-write-to buffer of eager-send channel */
+    int remote_rank;
+    uint32_t type;              /* SYN */
+    uint32_t qpn;               /* QPN for eager-send channel */
+    uint32_t rkey;              /* key for RDMA-write-to buffer of eager-send channel */
+    void *rmem;                 /* address of RDMA-write-to buffer of eager-send channel */
 } MPID_nem_dcfa_cm_pkt_syn_t;
 
 typedef struct {
-    int remote_rank; 
-    uint32_t type; /* SYNACK */
-    uint32_t qpn; /* QPN for eager-send channel */
-    uint32_t rkey; /* key for RDMA-write-to buffer of eager-send channel */
-    void* rmem; /* address of RDMA-write-to buffer of eager-send channel */
+    int remote_rank;
+    uint32_t type;              /* SYNACK */
+    uint32_t qpn;               /* QPN for eager-send channel */
+    uint32_t rkey;              /* key for RDMA-write-to buffer of eager-send channel */
+    void *rmem;                 /* address of RDMA-write-to buffer of eager-send channel */
 } MPID_nem_dcfa_cm_pkt_synack_t;
 
 typedef union {
@@ -111,7 +111,7 @@ typedef union {
 
 typedef struct MPID_nem_dcfa_cm_sendq_entry {
     MPID_nem_dcfa_cm_pkt_t pending_pkt;
-    struct MPID_nem_dcfa_cm_sendq_entry *sendq_next; /* for software command queue */
+    struct MPID_nem_dcfa_cm_sendq_entry *sendq_next;    /* for software command queue */
 } MPID_nem_dcfa_cm_sendq_entry_t;
 
 #ifdef DCFA_ONDEMAND
@@ -126,7 +126,7 @@ typedef struct {
     uint32_t qpnum;
     uint16_t lid;
     union ibv_gid gid;
-    void* rmem;
+    void *rmem;
     uint32_t rkey;
 } MPID_nem_dcfa_cm_cmd_t;
 #endif
@@ -135,8 +135,8 @@ typedef GENERIC_Q_DECL(struct MPID_Request) MPID_nem_dcfa_cm_sendq_t;
 
 #define MPID_nem_dcfa_cm_sendq_empty(q) GENERICM_Q_EMPTY (q)
 #define MPID_nem_dcfa_cm_sendq_head(q) GENERICM_Q_HEAD (q)
-#define MPID_nem_dcfa_cm_sendq_next_field(ep, next_field) ((ep)->next_field) 
-#define MPID_nem_dcfa_cm_sendq_next(ep) ((ep)->sendq_next) 
+#define MPID_nem_dcfa_cm_sendq_next_field(ep, next_field) ((ep)->next_field)
+#define MPID_nem_dcfa_cm_sendq_next(ep) ((ep)->sendq_next)
 #define MPID_nem_dcfa_cm_sendq_enqueue(qp, ep) GENERICM_Q_ENQUEUE (qp, ep, MPID_nem_dcfa_cm_sendq_next_field, sendq_next);
 #define MPID_nem_dcfa_cm_sendq_enqueue_at_head(qp, ep) GENERICM_Q_ENQUEUE_AT_HEAD(qp, ep, MPID_nem_dcfa_cm_sendq_next_field, sendq_next);
 #define MPID_nem_dcfa_cm_sendq_dequeue(qp, ep) GENERICM_Q_DEQUEUE (qp, ep, MPID_nem_dcfa_cm_sendq_next_field, sendq_next);
@@ -146,8 +146,8 @@ typedef GENERIC_Q_DECL(struct MPID_Request) MPID_nem_dcfa_cm_sendq_t;
    so we can reuse rreq->dev.next */
 #define MPID_nem_dcfa_lmtq_empty(q) GENERICM_Q_EMPTY(q)
 #define MPID_nem_dcfa_lmtq_head(q) GENERICM_Q_HEAD(q)
-#define MPID_nem_dcfa_lmtq_next_field(ep, next_field) REQ_FIELD(ep, next_field) 
-#define MPID_nem_dcfa_lmtq_next(ep) REQ_FIELD(ep, lmt_next) 
+#define MPID_nem_dcfa_lmtq_next_field(ep, next_field) REQ_FIELD(ep, next_field)
+#define MPID_nem_dcfa_lmtq_next(ep) REQ_FIELD(ep, lmt_next)
 #define MPID_nem_dcfa_lmtq_enqueue(qp, ep) GENERICM_Q_ENQUEUE(qp, ep, MPID_nem_dcfa_lmtq_next_field, lmt_next);
 
 #define MPID_nem_dcfa_diff32(a, b) ((uint32_t)((a + (1ULL<<32) - b) & ((1ULL<<32)-1)))
@@ -156,10 +156,10 @@ typedef GENERIC_Q_DECL(struct MPID_Request) MPID_nem_dcfa_cm_sendq_t;
 
 /* counting bloom filter to detect multiple lmt-sends in one send-wait period to
    avoid overwriting the last byte in the receive buffer */
-#define MPID_nem_dcfa_cbf_nslot 16 /* slots */
-#define MPID_nem_dcfa_cbf_bitsperslot 4  /* one slot can accomodate multiple bits */
+#define MPID_nem_dcfa_cbf_nslot 16      /* slots */
+#define MPID_nem_dcfa_cbf_bitsperslot 4 /* one slot can accomodate multiple bits */
 #define MPID_nem_dcfa_cbf_lognslot 4
-#define MPID_nem_dcfa_cbf_nhash 3 /* number of hash functions */
+#define MPID_nem_dcfa_cbf_nhash 3       /* number of hash functions */
 #define MPID_nem_dcfa_getpos \
     int pos_8b = pos / (8 / MPID_nem_dcfa_cbf_bitsperslot);\
     assert(0 <= pos_8b && pos_8b < MPID_nem_dcfa_cbf_nslot * MPID_nem_dcfa_cbf_bitsperslot / 8);\
@@ -169,56 +169,68 @@ typedef GENERIC_Q_DECL(struct MPID_Request) MPID_nem_dcfa_cm_sendq_t;
 #define MPID_nem_dcfa_maskset \
     array[pos_8b] &= ~(((1ULL<<MPID_nem_dcfa_cbf_bitsperslot) - 1) << (pos_bps * MPID_nem_dcfa_cbf_bitsperslot)); \
     array[pos_8b] |= (bits & ((1ULL<<MPID_nem_dcfa_cbf_bitsperslot)-1)) << (pos_bps * MPID_nem_dcfa_cbf_bitsperslot)
-static inline int MPID_nem_dcfa_cbf_get(uint8_t* array, int pos) {
+static inline int MPID_nem_dcfa_cbf_get(uint8_t * array, int pos)
+{
     MPID_nem_dcfa_getpos;
     return MPID_nem_dcfa_shift;
 }
-static inline void MPID_nem_dcfa_cbf_set(uint8_t* array, int pos, uint16_t bits) {
+
+static inline void MPID_nem_dcfa_cbf_set(uint8_t * array, int pos, uint16_t bits)
+{
     MPID_nem_dcfa_getpos;
     MPID_nem_dcfa_maskset;
 }
-static inline void MPID_nem_dcfa_cbf_inc(uint8_t* array, int pos) {
+
+static inline void MPID_nem_dcfa_cbf_inc(uint8_t * array, int pos)
+{
     MPID_nem_dcfa_getpos;
     int16_t bits = MPID_nem_dcfa_shift;
     assert(bits != (1ULL << MPID_nem_dcfa_cbf_bitsperslot) - 1);
     bits++;
     MPID_nem_dcfa_maskset;
 }
-static inline void MPID_nem_dcfa_cbf_dec(uint8_t* array, int pos) {
+
+static inline void MPID_nem_dcfa_cbf_dec(uint8_t * array, int pos)
+{
     MPID_nem_dcfa_getpos;
     int16_t bits = MPID_nem_dcfa_shift;
     assert(bits != 0);
     bits--;
     MPID_nem_dcfa_maskset;
 }
-static inline int MPID_nem_dcfa_cbf_hash1(uint64_t addr) {
+
+static inline int MPID_nem_dcfa_cbf_hash1(uint64_t addr)
+{
     return
-        (
-         ((addr >> (MPID_nem_dcfa_cbf_lognslot * 0)) & (MPID_nem_dcfa_cbf_nslot - 1)) ^ 
+        (((addr >> (MPID_nem_dcfa_cbf_lognslot * 0)) & (MPID_nem_dcfa_cbf_nslot - 1)) ^
          ((addr >> (MPID_nem_dcfa_cbf_lognslot * 3)) & (MPID_nem_dcfa_cbf_nslot - 1)) ^
          ((addr >> (MPID_nem_dcfa_cbf_lognslot * 6)) & (MPID_nem_dcfa_cbf_nslot - 1))
          + 1) & (MPID_nem_dcfa_cbf_nslot - 1);
 }
-static inline int MPID_nem_dcfa_cbf_hash2(uint64_t addr) {
+
+static inline int MPID_nem_dcfa_cbf_hash2(uint64_t addr)
+{
     /* adding one because addr tends to have a postfix of "fff" */
-    return 
-        (
-         ((addr >> (MPID_nem_dcfa_cbf_lognslot * 1)) & (MPID_nem_dcfa_cbf_nslot - 1)) ^
+    return
+        (((addr >> (MPID_nem_dcfa_cbf_lognslot * 1)) & (MPID_nem_dcfa_cbf_nslot - 1)) ^
          ((addr >> (MPID_nem_dcfa_cbf_lognslot * 4)) & (MPID_nem_dcfa_cbf_nslot - 1)) ^
-         ((addr >> (MPID_nem_dcfa_cbf_lognslot * 7)) & (MPID_nem_dcfa_cbf_nslot - 1)) 
+         ((addr >> (MPID_nem_dcfa_cbf_lognslot * 7)) & (MPID_nem_dcfa_cbf_nslot - 1))
          + 1) & (MPID_nem_dcfa_cbf_nslot - 1);
 }
-static inline int MPID_nem_dcfa_cbf_hash3(uint64_t addr) {
+
+static inline int MPID_nem_dcfa_cbf_hash3(uint64_t addr)
+{
     /* adding two because addr tends to have a postfix of "fff" */
-    return 
-        (
-         ((addr >> (MPID_nem_dcfa_cbf_lognslot * 2)) & (MPID_nem_dcfa_cbf_nslot - 1)) ^
+    return
+        (((addr >> (MPID_nem_dcfa_cbf_lognslot * 2)) & (MPID_nem_dcfa_cbf_nslot - 1)) ^
          ((addr >> (MPID_nem_dcfa_cbf_lognslot * 5)) & (MPID_nem_dcfa_cbf_nslot - 1)) ^
          ((addr >> (MPID_nem_dcfa_cbf_lognslot * 8)) & (MPID_nem_dcfa_cbf_nslot - 1))
          + 2) & (MPID_nem_dcfa_cbf_nslot - 1);
 
 }
-static inline void MPID_nem_dcfa_cbf_add(uint64_t addr, uint8_t* array) {
+
+static inline void MPID_nem_dcfa_cbf_add(uint64_t addr, uint8_t * array)
+{
     //dprintf("cbf_add,addr=%08lx,%08x,%08x,%08x\n", addr, MPID_nem_dcfa_cbf_hash1(addr), MPID_nem_dcfa_cbf_hash2(addr), MPID_nem_dcfa_cbf_hash3(addr));
     //dprintf("cbf_add,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
     MPID_nem_dcfa_cbf_inc(array, MPID_nem_dcfa_cbf_hash1(addr));
@@ -226,7 +238,9 @@ static inline void MPID_nem_dcfa_cbf_add(uint64_t addr, uint8_t* array) {
     MPID_nem_dcfa_cbf_inc(array, MPID_nem_dcfa_cbf_hash3(addr));
     //dprintf("cbf_add,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
 }
-static inline void MPID_nem_dcfa_cbf_delete(uint64_t addr, uint8_t* array) {
+
+static inline void MPID_nem_dcfa_cbf_delete(uint64_t addr, uint8_t * array)
+{
     //dprintf("cbf_delete,addr=%08lx,%08x,%08x,%08x\n", addr, MPID_nem_dcfa_cbf_hash1(addr), MPID_nem_dcfa_cbf_hash2(addr), MPID_nem_dcfa_cbf_hash3(addr));
     //dprintf("cbf_delete,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
     MPID_nem_dcfa_cbf_dec(array, MPID_nem_dcfa_cbf_hash1(addr));
@@ -234,21 +248,31 @@ static inline void MPID_nem_dcfa_cbf_delete(uint64_t addr, uint8_t* array) {
     MPID_nem_dcfa_cbf_dec(array, MPID_nem_dcfa_cbf_hash3(addr));
     //dprintf("cbf_delete,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
 }
-static inline int MPID_nem_dcfa_cbf_query(uint64_t addr, uint8_t* array) {
+
+static inline int MPID_nem_dcfa_cbf_query(uint64_t addr, uint8_t * array)
+{
     //dprintf("cbf_query,addr=%08lx,%08x,%08x,%08x\n", addr, MPID_nem_dcfa_cbf_hash1(addr), MPID_nem_dcfa_cbf_hash2(addr), MPID_nem_dcfa_cbf_hash3(addr));
     //dprintf("cbf_query,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
-    return 
+    return
         MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)) > 0 &&
         MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)) > 0 &&
         MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)) > 0;
 }
-static inline int MPID_nem_dcfa_cbf_would_overflow(uint64_t addr, uint8_t* array) {
+
+static inline int MPID_nem_dcfa_cbf_would_overflow(uint64_t addr, uint8_t * array)
+{
     //dprintf("cbf_would_overflow,addr=%08lx,%08x,%08x,%08x\n", addr, MPID_nem_dcfa_cbf_hash1(addr), MPID_nem_dcfa_cbf_hash2(addr), MPID_nem_dcfa_cbf_hash3(addr));
     //dprintf("cbf_would_overflow,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
-    return 
-        MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)) == (1ULL << MPID_nem_dcfa_cbf_bitsperslot) - 1 ||
-        MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)) == (1ULL << MPID_nem_dcfa_cbf_bitsperslot) - 1 ||
-        MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)) == (1ULL << MPID_nem_dcfa_cbf_bitsperslot) - 1;
+    return
+        MPID_nem_dcfa_cbf_get(array,
+                              MPID_nem_dcfa_cbf_hash1(addr)) ==
+        (1ULL << MPID_nem_dcfa_cbf_bitsperslot) - 1 ||
+        MPID_nem_dcfa_cbf_get(array,
+                              MPID_nem_dcfa_cbf_hash2(addr)) ==
+        (1ULL << MPID_nem_dcfa_cbf_bitsperslot) - 1 ||
+        MPID_nem_dcfa_cbf_get(array,
+                              MPID_nem_dcfa_cbf_hash3(addr)) ==
+        (1ULL << MPID_nem_dcfa_cbf_bitsperslot) - 1;
 }
 
 /* functions */
@@ -259,38 +283,42 @@ int MPID_nem_dcfa_drain_scq();
 int MPID_nem_dcfa_drain_scq_lmt_put();
 int MPID_nem_dcfa_drain_scq_scratch_pad();
 int MPID_nem_dcfa_poll(int in_blocking_poll);
-int MPID_nem_dcfa_poll_eager(MPIDI_VC_t *vc);
+int MPID_nem_dcfa_poll_eager(MPIDI_VC_t * vc);
 
 int MPID_nem_dcfa_get_business_card(int my_rank, char **bc_val_p, int *val_max_sz_p);
 int MPID_nem_dcfa_connect_to_root(const char *business_card, MPIDI_VC_t * new_vc);
 int MPID_nem_dcfa_vc_init(MPIDI_VC_t * vc);
 int MPID_nem_dcfa_vc_destroy(MPIDI_VC_t * vc);
 int MPID_nem_dcfa_vc_terminate(MPIDI_VC_t * vc);
-int MPID_nem_dcfa_pkthandler_init(MPIDI_CH3_PktHandler_Fcn *pktArray[], int arraySize);
+int MPID_nem_dcfa_pkthandler_init(MPIDI_CH3_PktHandler_Fcn * pktArray[], int arraySize);
 
-int MPID_nem_dcfa_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *header, MPIDI_msg_sz_t hdr_sz);
+int MPID_nem_dcfa_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *header,
+                                MPIDI_msg_sz_t hdr_sz);
 
 /* CH3 send/recv functions */
-int MPID_nem_dcfa_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz);
-int MPID_nem_dcfa_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz, MPID_Request ** sreq_ptr);
+int MPID_nem_dcfa_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
+                              MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz);
+int MPID_nem_dcfa_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data,
+                                  MPIDI_msg_sz_t data_sz, MPID_Request ** sreq_ptr);
 
 /* used by dcfa_poll.c */
-int MPID_nem_dcfa_send_progress(MPID_nem_dcfa_vc_area *vc_dcfa);
+int MPID_nem_dcfa_send_progress(MPID_nem_dcfa_vc_area * vc_dcfa);
 
 /* CH3--lmt send/recv functions */
-int MPID_nem_dcfa_lmt_initiate_lmt(struct MPIDI_VC *vc, union MPIDI_CH3_Pkt *rts_pkt, struct MPID_Request *req);
+int MPID_nem_dcfa_lmt_initiate_lmt(struct MPIDI_VC *vc, union MPIDI_CH3_Pkt *rts_pkt,
+                                   struct MPID_Request *req);
 int MPID_nem_dcfa_lmt_start_recv(struct MPIDI_VC *vc, struct MPID_Request *req, MPID_IOV s_cookie);
 int MPID_nem_dcfa_lmt_handle_cookie(struct MPIDI_VC *vc, struct MPID_Request *req, MPID_IOV cookie);
 int MPID_nem_dcfa_lmt_done_send(struct MPIDI_VC *vc, struct MPID_Request *req);
 int MPID_nem_dcfa_lmt_done_recv(struct MPIDI_VC *vc, struct MPID_Request *req);
 int MPID_nem_dcfa_lmt_vc_terminated(struct MPIDI_VC *vc);
-/* overriding functions 
-   initialize the value of a member named "recv_posted" 
+/* overriding functions
+   initialize the value of a member named "recv_posted"
    in BSS-variable named "comm_ops" with type of MPIDI_Comm_ops_t
    to "MPID_nem_dcfa_recv_posted" in dcfa_init.c
    MPIDI_Comm_ops_t is defined in src/mpid/ch3/include/mpidimpl.h */
 int MPID_nem_dcfa_recv_posted(struct MPIDI_VC *vc, struct MPID_Request *req);
-int MPID_nem_dcfa_recv_buf_released(struct MPIDI_VC *vc, void* user_data);
+int MPID_nem_dcfa_recv_buf_released(struct MPIDI_VC *vc, void *user_data);
 
 void pht_update(uint64_t vpc, uint32_t hist, int32_t taken);
 int pht_pred(uint64_t vpc, uint32_t hist);
@@ -311,27 +339,28 @@ extern dcfaconn_t *MPID_nem_dcfa_conns;
 extern MPIDI_VC_t **MPID_nem_dcfa_pollingset;
 extern int *MPID_nem_dcfa_scratch_pad_fds;
 extern int MPID_nem_dcfa_npollingset;
-extern void* MPID_nem_dcfa_fl[18];
+extern void *MPID_nem_dcfa_fl[18];
 extern int MPID_nem_dcfa_nranks;
 //extern char *MPID_nem_dcfa_recv_buf;
 extern int MPID_nem_dcfa_myrank;
 extern uint64_t MPID_nem_dcfa_tsc_poll; /* to throttle dcfa_poll in recv_posted (in dcfa_poll.c) */
-extern int MPID_nem_dcfa_ncqe; /* for lazy poll scq */
-extern int MPID_nem_dcfa_ncqe_lmt_put; /* lmt-put uses another QP, SQ, CQ to speed-up fetching CQE */
+extern int MPID_nem_dcfa_ncqe;  /* for lazy poll scq */
+extern int MPID_nem_dcfa_ncqe_lmt_put;  /* lmt-put uses another QP, SQ, CQ to speed-up fetching CQE */
 #ifdef DCFA_ONDEMAND
 extern MPID_nem_dcfa_cm_map_t MPID_nem_dcfa_cm_state;
-extern int MPID_nem_dcfa_ncqe_connect; /* couting outstanding connection requests */
+extern int MPID_nem_dcfa_ncqe_connect;  /* couting outstanding connection requests */
 #endif
 extern int MPID_nem_dcfa_ncqe_scratch_pad;
 extern int MPID_nem_dcfa_ncqe_to_drain; /* count put in lmt-put-done protocol */
-extern int MPID_nem_dcfa_ncqe_nces; /* counting non-copied eager-send */
+extern int MPID_nem_dcfa_ncqe_nces;     /* counting non-copied eager-send */
 extern MPID_nem_dcfa_lmtq_t MPID_nem_dcfa_lmtq; /* poll queue for lmt */
-extern MPID_nem_dcfa_lmtq_t MPID_nem_dcfa_lmt_orderq; /* force order when two or more rts_to_sender randomizes the last byte of receive buffer */
-extern MPID_nem_dcfa_vc_area* MPID_nem_dcfa_debug_current_vc_dcfa;
+extern MPID_nem_dcfa_lmtq_t MPID_nem_dcfa_lmt_orderq;   /* force order when two or more rts_to_sender randomizes the last byte of receive buffer */
+extern MPID_nem_dcfa_vc_area *MPID_nem_dcfa_debug_current_vc_dcfa;
 
 /* to detect multiple lmt-sends in one send-wait period to
    avoid overwriting the last byte in the receive buffer */
-extern uint8_t MPID_nem_dcfa_lmt_tail_addr_cbf[MPID_nem_dcfa_cbf_nslot * MPID_nem_dcfa_cbf_bitsperslot / 8];
+extern uint8_t MPID_nem_dcfa_lmt_tail_addr_cbf[MPID_nem_dcfa_cbf_nslot *
+                                               MPID_nem_dcfa_cbf_bitsperslot / 8];
 
 #define MPID_NEM_DCFA_MAX_POLLINGSET 64
 
@@ -339,19 +368,19 @@ extern uint8_t MPID_nem_dcfa_lmt_tail_addr_cbf[MPID_nem_dcfa_cbf_nslot * MPID_ne
 #define MPID_NEM_DCFA_MEMID_RDMA 0
 
 /* command using IB UD */
-#define MPID_NEM_DCFA_SYNC_SYN 0 
-#define MPID_NEM_DCFA_SYNC_SYNACK 1 
-#define MPID_NEM_DCFA_SYNC_NACK 2 
+#define MPID_NEM_DCFA_SYNC_SYN 0
+#define MPID_NEM_DCFA_SYNC_SYNACK 1
+#define MPID_NEM_DCFA_SYNC_NACK 2
 
-#define MPID_NEM_DCFA_EAGER_MAX_MSG_SZ (IBCOM_RDMABUF_SZSEG/*1024*/-sizeof(MPIDI_CH3_Pkt_t)+sizeof(MPIDI_CH3_Pkt_eager_send_t)-sizeof(sz_hdrmagic_t)-sizeof(MPID_nem_dcfa_pkt_prefix_t)-sizeof(tailmagic_t)) /* when > this size, lmt is used. see src/mpid/ch3/src/mpid_isend.c */
-#define MPID_NEM_DCFA_POLL_PERIOD_RECV_POSTED 2000 /* minimum period from previous dcfa_poll to dcfa_poll in recv_posted */
+#define MPID_NEM_DCFA_EAGER_MAX_MSG_SZ (IBCOM_RDMABUF_SZSEG/*1024*/-sizeof(MPIDI_CH3_Pkt_t)+sizeof(MPIDI_CH3_Pkt_eager_send_t)-sizeof(sz_hdrmagic_t)-sizeof(MPID_nem_dcfa_pkt_prefix_t)-sizeof(tailmagic_t))    /* when > this size, lmt is used. see src/mpid/ch3/src/mpid_isend.c */
+#define MPID_NEM_DCFA_POLL_PERIOD_RECV_POSTED 2000      /* minimum period from previous dcfa_poll to dcfa_poll in recv_posted */
 #define MPID_NEM_DCFA_POLL_PERIOD_SEND_POSTED 2000
 
 typedef struct {
-    void* addr;
+    void *addr;
     uint32_t rkey;
-    int seq_num_tail; /* notify RDMA-write-to buffer occupation */
-    uint8_t tail; /* last word of payload */
+    int seq_num_tail;           /* notify RDMA-write-to buffer occupation */
+    uint8_t tail;               /* last word of payload */
 } MPID_nem_dcfa_lmt_cookie_t;
 
 typedef enum MPID_nem_dcfa_pkt_subtype {
@@ -408,20 +437,38 @@ typedef struct MPID_nem_dcfa_pkt_change_rdmabuf_occupancy_notify_state_t {
     int state;
 } MPID_nem_dcfa_pkt_change_rdmabuf_occupancy_notify_state_t;
 
-int MPID_nem_dcfa_PktHandler_EagerSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */);
-int MPID_nem_dcfa_PktHandler_Put( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */);
-int MPID_nem_dcfa_PktHandler_Accumulate( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */);
-int MPID_nem_dcfa_PktHandler_Get( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */);
-int MPID_nem_dcfa_PktHandler_GetResp( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */);
-int MPID_nem_dcfa_PktHandler_lmt_done(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp);
-int MPID_nem_dcfa_pkt_GET_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp);
-int MPID_nem_dcfa_PktHandler_req_seq_num(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp);
-int MPID_nem_dcfa_PktHandler_reply_seq_num(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp);
-int MPID_nem_dcfa_PktHandler_change_rdmabuf_occupancy_notify_state(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp);
+int MPID_nem_dcfa_PktHandler_EagerSend(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                       MPIDI_msg_sz_t * buflen /* out */ ,
+                                       MPID_Request ** rreqp /* out */);
+int MPID_nem_dcfa_PktHandler_Put(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                 MPIDI_msg_sz_t * buflen /* out */ ,
+                                 MPID_Request ** rreqp /* out */);
+int MPID_nem_dcfa_PktHandler_Accumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                        MPIDI_msg_sz_t * buflen /* out */ ,
+                                        MPID_Request ** rreqp /* out */);
+int MPID_nem_dcfa_PktHandler_Get(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                 MPIDI_msg_sz_t * buflen /* out */ ,
+                                 MPID_Request ** rreqp /* out */);
+int MPID_nem_dcfa_PktHandler_GetResp(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                     MPIDI_msg_sz_t * buflen /* out */ ,
+                                     MPID_Request ** rreqp /* out */);
+int MPID_nem_dcfa_PktHandler_lmt_done(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                      MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp);
+int MPID_nem_dcfa_pkt_GET_DONE_handler(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                       MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp);
+int MPID_nem_dcfa_PktHandler_req_seq_num(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                         MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp);
+int MPID_nem_dcfa_PktHandler_reply_seq_num(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                           MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp);
+int MPID_nem_dcfa_PktHandler_change_rdmabuf_occupancy_notify_state(MPIDI_VC_t * vc,
+                                                                   MPIDI_CH3_Pkt_t * pkt,
+                                                                   MPIDI_msg_sz_t * buflen,
+                                                                   MPID_Request ** rreqp);
 
 /* MPID_nem_dcfa_PktHandler_lmt_done is a wrapper of pkt_DONE_handler and calls it */
 /* pkt_DONE_handler (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c) is not exported */
-int pkt_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp);
+int pkt_DONE_handler(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt, MPIDI_msg_sz_t * buflen,
+                     MPID_Request ** rreqp);
 
 
 #define MPID_nem_dcfa_send_req_seq_num(vc) do { \
@@ -442,7 +489,7 @@ int pkt_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *bufle
             MPIU_ERR_CHKANDJUMP(_req->status.MPI_ERROR, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_send_req_seq_num"); \
             MPID_Request_release(_req); \
         } \
-    } while (0)   
+    } while (0)
 
 #define MPID_nem_dcfa_send_reply_seq_num(vc) do {                                                             \
         MPID_PKT_DECL_CAST(_upkt, MPID_nem_dcfa_pkt_reply_seq_num_t, _pkt);                                   \
@@ -469,7 +516,7 @@ int pkt_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *bufle
             MPIU_ERR_CHKANDJUMP(_req->status.MPI_ERROR, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_send_reply_seq_num");         \
             MPID_Request_release(_req);                                                                       \
         }                                                                                                     \
-    } while (0)   
+    } while (0)
 
 #define MPID_nem_dcfa_send_change_rdmabuf_occupancy_notify_state(vc, _state) do {                             \
         MPID_PKT_DECL_CAST(_upkt, MPID_nem_dcfa_pkt_change_rdmabuf_occupancy_notify_state_t, _pkt);           \
@@ -486,7 +533,7 @@ int pkt_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *bufle
             MPIU_ERR_CHKANDJUMP(_req->status.MPI_ERROR, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_send_change_rdmabuf_occupancy_notify_state");         \
             MPID_Request_release(_req);                                                                       \
         }                                                                                                     \
-    } while (0)   
+    } while (0)
 
 #define MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, lsr_seq_num_tail) \
     do { \
@@ -501,7 +548,7 @@ int pkt_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *bufle
         \
         /*dprintf("notify_policy_lw,head=%d,tail=%d,lw=%d\n", lsr_seq_num_head, *lsr_seq_num_tail, IBCOM_RDMABUF_LOW_WATER_MARK);*/ \
         /* if the number of occupied slot of RDMA-write-to buffer have got below the low water-mark */ \
-        if(*rdmabuf_occupancy_notify_rstate == IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW && \
+        if (*rdmabuf_occupancy_notify_rstate == IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW && \
            MPID_nem_dcfa_diff32(lsr_seq_num_head, *lsr_seq_num_tail) < IBCOM_RDMABUF_LOW_WATER_MARK) { \
             dprintf("changing notify_rstate\n"); \
             /* remember remote notifying policy so that local can know when to change remote policy back to HW */ \
@@ -509,8 +556,8 @@ int pkt_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *bufle
             /* change remote notifying policy of RDMA-write-to buf occupancy info */ \
             MPID_nem_dcfa_send_change_rdmabuf_occupancy_notify_state(vc, IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_LW); \
         } \
-    } while(0)
-   
+    } while (0)
+
 #define MPID_nem_dcfa_lmt_send_GET_DONE(vc, rreq) do {                                                                   \
         MPID_PKT_DECL_CAST(_upkt, MPID_nem_dcfa_pkt_lmt_get_done_t, _done_pkt);                                          \
         MPID_Request *_done_req;                                                                                \
@@ -533,43 +580,63 @@ int pkt_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *bufle
             MPIU_ERR_CHKANDJUMP(_done_req->status.MPI_ERROR, mpi_errno, MPI_ERR_OTHER, "**donepkt");            \
             MPID_Request_release(_done_req);                                                                    \
         }                                                                                                       \
-    } while (0)   
+    } while (0)
 
 #define DCFA_MAX(a, b) ((a) > (b) ? (a) : (b))
 
-static inline void* MPID_nem_dcfa_stmalloc(size_t _sz) {
+static inline void *MPID_nem_dcfa_stmalloc(size_t _sz)
+{
     size_t sz = _sz;
     int i = 0;
-    do { i++; sz >>= 1; } while(sz > 0);
-    if(i < 12) { return malloc(sz); }
-    if(i > 30) { return mmap(0, sz, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); }
+    do {
+        i++;
+        sz >>= 1;
+    } while (sz > 0);
+    if (i < 12) {
+        return malloc(sz);
+    }
+    if (i > 30) {
+        return mmap(0, sz, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+    }
     int ndx = i - 12;
-    void* slot;
-    if(MPID_nem_dcfa_fl[ndx]) {
+    void *slot;
+    if (MPID_nem_dcfa_fl[ndx]) {
         slot = MPID_nem_dcfa_fl[ndx];
-        if(MPID_nem_dcfa_myrank == 1) {
-            //printf("stmalloc,reuse %p,%08x\n", slot, (int)_sz); 
+        if (MPID_nem_dcfa_myrank == 1) {
+            //printf("stmalloc,reuse %p,%08x\n", slot, (int)_sz);
         }
-        MPID_nem_dcfa_fl[ndx] = *((void**)MPID_nem_dcfa_fl[ndx]);
-    } else {
-        slot = mmap(0, 1<<i, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
-        if(MPID_nem_dcfa_myrank == 1) {
-            //printf("stmalloc,new %p,%08x\n", slot, (int)_sz); 
+        MPID_nem_dcfa_fl[ndx] = *((void **) MPID_nem_dcfa_fl[ndx]);
+    }
+    else {
+        slot = mmap(0, 1 << i, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+        if (MPID_nem_dcfa_myrank == 1) {
+            //printf("stmalloc,new %p,%08x\n", slot, (int)_sz);
         }
     }
     return slot;
 }
-static inline void MPID_nem_dcfa_stfree(void* ptr, size_t sz) {
-    if(MPID_nem_dcfa_myrank == 1) {
-        //printf("stfree,%p,%08x\n", ptr, (int)sz); 
+
+static inline void MPID_nem_dcfa_stfree(void *ptr, size_t sz)
+{
+    if (MPID_nem_dcfa_myrank == 1) {
+        //printf("stfree,%p,%08x\n", ptr, (int)sz);
     }
     int i = 0;
-    do { i++; sz >>= 1; } while(sz > 0);
-    if(i < 12) { free(ptr); goto fn_exit; }
-    if(i > 30) { munmap(ptr, sz); goto fn_exit; }
+    do {
+        i++;
+        sz >>= 1;
+    } while (sz > 0);
+    if (i < 12) {
+        free(ptr);
+        goto fn_exit;
+    }
+    if (i > 30) {
+        munmap(ptr, sz);
+        goto fn_exit;
+    }
     int ndx = i - 12;
-    *((void**)ptr) = MPID_nem_dcfa_fl[ndx];
+    *((void **) ptr) = MPID_nem_dcfa_fl[ndx];
     MPID_nem_dcfa_fl[ndx] = ptr;
- fn_exit:;
+  fn_exit:;
 }
 #endif /* DCFA_IMPL_H */
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_init.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_init.c
index efdc22e..5aebe8e 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_init.c
@@ -15,7 +15,7 @@
 #endif
 
 //#define DEBUG_DCFA_INIT
-#ifdef dprintf /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
+#ifdef dprintf  /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
 #undef dprintf
 #endif
 #ifdef DEBUG_DCFA_INIT
@@ -33,37 +33,38 @@ MPID_nem_netmod_funcs_t MPIDI_nem_dcfa_funcs = {
     MPID_nem_dcfa_vc_init,
     MPID_nem_dcfa_vc_destroy,
     MPID_nem_dcfa_vc_terminate,
-    NULL, /*MPID_nem_dcfa_anysource_iprobe*/
-    NULL, /*MPID_nem_dcfa_anysource_improbe*/
+    NULL,       /*MPID_nem_dcfa_anysource_iprobe */
+    NULL,       /*MPID_nem_dcfa_anysource_improbe */
 };
 
 MPIDI_CH3_PktHandler_Fcn *MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_NUM_PKT_HANDLERS];
 
 static MPIDI_Comm_ops_t comm_ops = {
-    /*NULL,*/MPID_nem_dcfa_recv_posted, /* recv_posted */
-    
-    NULL, /* send */
-    NULL, /* rsend */
-    NULL, /* ssend */
-    NULL, /* isend */
-    NULL, /* irsend */
-    NULL, /* issend */
-    
-    NULL, /* send_init */
-    NULL, /* bsend_init */
-    NULL, /* rsend_init */
-    NULL, /* ssend_init */
-    NULL, /* startall */
-    
-    NULL,/* cancel_send */
-    NULL, /* cancel_recv */
-    
-    NULL, /* probe */
-    NULL, /* iprobe */
-    NULL, /* improbe */
+                                                /*NULL, */ MPID_nem_dcfa_recv_posted,
+                                                /* recv_posted */
+
+    NULL,       /* send */
+    NULL,       /* rsend */
+    NULL,       /* ssend */
+    NULL,       /* isend */
+    NULL,       /* irsend */
+    NULL,       /* issend */
+
+    NULL,       /* send_init */
+    NULL,       /* bsend_init */
+    NULL,       /* rsend_init */
+    NULL,       /* ssend_init */
+    NULL,       /* startall */
+
+    NULL,       /* cancel_send */
+    NULL,       /* cancel_recv */
+
+    NULL,       /* probe */
+    NULL,       /* iprobe */
+    NULL,       /* improbe */
 };
 
-void* MPID_nem_dcfa_fl[18];
+void *MPID_nem_dcfa_fl[18];
 int MPID_nem_dcfa_nranks;
 dcfa_conn_ud_t *MPID_nem_dcfa_conn_ud;
 dcfaconn_t *MPID_nem_dcfa_conns;
@@ -84,23 +85,26 @@ int MPID_nem_dcfa_ncqe_connect;
 int MPID_nem_dcfa_ncqe_scratch_pad;
 int MPID_nem_dcfa_ncqe_to_drain;
 int MPID_nem_dcfa_ncqe_nces;
-MPID_nem_dcfa_lmtq_t MPID_nem_dcfa_lmtq = {NULL, NULL}; 
-MPID_nem_dcfa_lmtq_t MPID_nem_dcfa_lmt_orderq = {NULL, NULL}; 
-uint8_t MPID_nem_dcfa_lmt_tail_addr_cbf[MPID_nem_dcfa_cbf_nslot * MPID_nem_dcfa_cbf_bitsperslot / 8] = {0};
+MPID_nem_dcfa_lmtq_t MPID_nem_dcfa_lmtq = { NULL, NULL };
+MPID_nem_dcfa_lmtq_t MPID_nem_dcfa_lmt_orderq = { NULL, NULL };
+uint8_t MPID_nem_dcfa_lmt_tail_addr_cbf[MPID_nem_dcfa_cbf_nslot * MPID_nem_dcfa_cbf_bitsperslot /
+                                        8] = { 0 };
 static uint32_t MPID_nem_dcfa_rand_next = 1;
-MPID_nem_dcfa_vc_area* MPID_nem_dcfa_debug_current_vc_dcfa;
+MPID_nem_dcfa_vc_area *MPID_nem_dcfa_debug_current_vc_dcfa;
 static int listen_fd;
 static int listen_port;
 
-uint8_t MPID_nem_dcfa_rand() {
+uint8_t MPID_nem_dcfa_rand()
+{
     //return 0xaa;
     MPID_nem_dcfa_rand_next = MPID_nem_dcfa_rand_next * 1103515245 + 12345;
-    return (MPID_nem_dcfa_rand_next/65536) % 256;
+    return (MPID_nem_dcfa_rand_next / 65536) % 256;
 }
 
-uint64_t MPID_nem_dcfa_rdtsc() {
+uint64_t MPID_nem_dcfa_rdtsc()
+{
     uint64_t x;
-    __asm__ __volatile__("rdtsc; shl $32, %%rdx; or %%rdx, %%rax" : "=a"(x) : : "%rdx", "memory"); /* rdtsc cannot be executed earlier than here */
+    __asm__ __volatile__("rdtsc; shl $32, %%rdx; or %%rdx, %%rax":"=a"(x)::"%rdx", "memory");   /* rdtsc cannot be executed earlier than here */
     return x;
 }
 
@@ -108,33 +112,35 @@ uint64_t MPID_nem_dcfa_rdtsc() {
 #define FUNCNAME MPID_nem_dcfa_kvs_put_binary
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_kvs_put_binary(int from, const char *postfix, const uint8_t *buf, int length) {
+int MPID_nem_dcfa_kvs_put_binary(int from, const char *postfix, const uint8_t * buf, int length)
+{
     int mpi_errno = MPI_SUCCESS;
     int pmi_errno;
-    char* kvs_name;
+    char *kvs_name;
     char key[256], val[256], str[256];
     int j;
 
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_KVS_PUT_BINARY);
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_KVS_PUT_BINARY);
 
-    mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);      
+    mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);
     MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPIDI_PG_GetConnKVSname");
     dprintf("kvs_put_binary,kvs_name=%s\n", kvs_name);
 
     sprintf(key, "bc/%d/%s", from, postfix);
     val[0] = 0;
-    for(j = 0; j < length; j++) {
+    for (j = 0; j < length; j++) {
         sprintf(str, "%02x", buf[j]);
         strcat(val, str);
     }
-    dprintf("kvs_put_binary,rank=%d,from=%d,PMI_KVS_Put(%s, %s, %s)\n", MPID_nem_dcfa_myrank, from, kvs_name, key, val);
+    dprintf("kvs_put_binary,rank=%d,from=%d,PMI_KVS_Put(%s, %s, %s)\n", MPID_nem_dcfa_myrank, from,
+            kvs_name, key, val);
     pmi_errno = PMI_KVS_Put(kvs_name, key, val);
     MPIU_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**PMI_KVS_Put");
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_KVS_PUT_BINARY);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
@@ -142,38 +148,40 @@ int MPID_nem_dcfa_kvs_put_binary(int from, const char *postfix, const uint8_t *b
 #define FUNCNAME MPID_nem_dcfa_kvs_get_binary
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_kvs_get_binary(int from, const char *postfix, char *buf, int length) {
+int MPID_nem_dcfa_kvs_get_binary(int from, const char *postfix, char *buf, int length)
+{
     int mpi_errno = MPI_SUCCESS;
     int pmi_errno;
-    char* kvs_name;
+    char *kvs_name;
     char key[256], val[256], str[256];
     int j;
 
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_KVS_GET_BINARY);
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_KVS_GET_BINARY);
 
-    mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);      
+    mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);
     dprintf("kvs_get_binary,kvs_name=%s\n", kvs_name);
     MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPIDI_PG_GetConnKVSname");
 
     sprintf(key, "bc/%d/%s", from, postfix);
     pmi_errno = PMI_KVS_Get(kvs_name, key, val, 256);
-    dprintf("kvs_put_binary,rank=%d,from=%d,PMI_KVS_Get(%s, %s, %s)\n", MPID_nem_dcfa_myrank, from, kvs_name, key, val);
+    dprintf("kvs_put_binary,rank=%d,from=%d,PMI_KVS_Get(%s, %s, %s)\n", MPID_nem_dcfa_myrank, from,
+            kvs_name, key, val);
     MPIU_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**PMS_KVS_Get");
 
     dprintf("rank=%d,obtained val=%s\n", MPID_nem_dcfa_myrank, val);
-    char* strp = val;
-    for(j = 0; j < length; j++) {
+    char *strp = val;
+    for (j = 0; j < length; j++) {
         memcpy(str, strp, 2);
         str[2] = 0;
         buf[j] = strtol(str, NULL, 16);
         strp += 2;
     }
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_KVS_GET_BINARY);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
@@ -209,43 +217,50 @@ int MPID_nem_dcfa_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val
     MPID_nem_dcfa_ncqe_nces = 0;
     MPID_nem_dcfa_npollingset = 0;
 
-#ifdef DCFA_ONDEMAND    
+#ifdef DCFA_ONDEMAND
     /* prepare UD QPN for dynamic connection */
     ibcom_errno = ibcomOpen(ib_port, IBCOM_OPEN_UD, &MPID_nem_dcfa_conn_ud_fd);
     MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcomOpen");
-    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_conn_ud_fd, &MPID_nem_dcfa_conn_ud_ibcom); 
+    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_conn_ud_fd, &MPID_nem_dcfa_conn_ud_ibcom);
     MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_obtain_pointer");
     ibcom_errno = ibcom_rts(MPID_nem_dcfa_conn_ud_fd, 0, 0, 0);
     MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rts");
-    
-    for(i = 0; i < IBCOM_MAX_RQ_CAPACITY; i++) {
+
+    for (i = 0; i < IBCOM_MAX_RQ_CAPACITY; i++) {
         ibcom_errno = ibcom_udrecv(MPID_nem_dcfa_conn_ud_fd);
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_udrecv");
     }
-    
+
     /* obtain gid, lid, qpn using KVS */
-    MPIU_CHKPMEM_MALLOC(MPID_nem_dcfa_conn_ud, dcfa_conn_ud_t *, MPID_nem_dcfa_nranks * sizeof(dcfa_conn_ud_t), mpi_errno, "ud connection table");
+    MPIU_CHKPMEM_MALLOC(MPID_nem_dcfa_conn_ud, dcfa_conn_ud_t *,
+                        MPID_nem_dcfa_nranks * sizeof(dcfa_conn_ud_t), mpi_errno,
+                        "ud connection table");
     memset(MPID_nem_dcfa_conn_ud, 0, MPID_nem_dcfa_nranks * sizeof(dcfa_conn_ud_t));
 
     /* put bc/<my rank>/dcs/gid:lid:qpn */
-    uint32_t  my_qpnum;
+    uint32_t my_qpnum;
     uint16_t my_lid;
     union ibv_gid my_gid;
-    ibcom_get_info_conn(MPID_nem_dcfa_conn_ud_fd, IBCOM_INFOKEY_QP_QPN, &my_qpnum, sizeof(uint32_t));
-    ibcom_get_info_conn(MPID_nem_dcfa_conn_ud_fd, IBCOM_INFOKEY_PORT_LID, &my_lid, sizeof(uint16_t));
-    ibcom_get_info_conn(MPID_nem_dcfa_conn_ud_fd, IBCOM_INFOKEY_PORT_GID, &my_gid, sizeof(union ibv_gid));
+    ibcom_get_info_conn(MPID_nem_dcfa_conn_ud_fd, IBCOM_INFOKEY_QP_QPN, &my_qpnum,
+                        sizeof(uint32_t));
+    ibcom_get_info_conn(MPID_nem_dcfa_conn_ud_fd, IBCOM_INFOKEY_PORT_LID, &my_lid,
+                        sizeof(uint16_t));
+    ibcom_get_info_conn(MPID_nem_dcfa_conn_ud_fd, IBCOM_INFOKEY_PORT_GID, &my_gid,
+                        sizeof(union ibv_gid));
 
-    char* kvs_name;
-    mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);      
-    char* key_dcs, val[2*sizeof(union ibv_gid)+1+4+1+8+1], str[9];
+    char *kvs_name;
+    mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);
+    char *key_dcs, val[2 * sizeof(union ibv_gid) + 1 + 4 + 1 + 8 + 1], str[9];
 
     /* count maximum length of the string representation of remote_rank */
-    for(i = 0, nranks = MPID_nem_dcfa_nranks; nranks > 0; nranks /= 10, i++) { }
-    MPIU_CHKPMEM_MALLOC(key_dcs, char*, strlen("bc/") + i + strlen("/dcs/gid_lid_qpn") + 1, mpi_errno, "connection table");
+    for (i = 0, nranks = MPID_nem_dcfa_nranks; nranks > 0; nranks /= 10, i++) {
+    }
+    MPIU_CHKPMEM_MALLOC(key_dcs, char *, strlen("bc/") + i + strlen("/dcs/gid_lid_qpn") + 1,
+                        mpi_errno, "connection table");
 
     sprintf(key, "bc/%d/dcs/gid_lid_qpn", MPID_nem_dcfa_myrank);
     val[0] = 0;
-    for(j = 0; j < sizeof(union ibv_gid); j++) {
+    for (j = 0; j < sizeof(union ibv_gid); j++) {
         sprintf(str, "%02x", my_gid.raw[j]);
         strcat(val, str);
     }
@@ -262,35 +277,38 @@ int MPID_nem_dcfa_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val
     /* wait for key-value to propagate among all ranks */
     pmi_errno = PMI_Barrier();
     MPIU_ERR_CHKANDJUMP(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**PMI_Barrier");
-    
+
     /* obtain GID, LID, QP number for remote UD QP for dynamic connection */
     for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
-        if(i != MPID_nem_dcfa_myrank) { 
+        if (i != MPID_nem_dcfa_myrank) {
             sprintf(key_dcs, "bc/%d/dcs/gid_lid_qpn", i);
             pmi_errno = PMI_KVS_Get(kvs_name, key_dcs, val, 256);
             dprintf("pmi_errno=%d\n", pmi_errno);
             MPIU_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**PMI_KVS_Get");
             dprintf("rank=%d,obtained val=%s\n", MPID_nem_dcfa_myrank, val);
-            char* strp = val;
-            for(j = 0; j < sizeof(union ibv_gid); j++) {
+            char *strp = val;
+            for (j = 0; j < sizeof(union ibv_gid); j++) {
                 memcpy(str, strp, 2);
                 str[2] = 0;
                 MPID_nem_dcfa_conn_ud[i].gid.raw[j] = strtol(str, NULL, 16);
                 strp += 2;
             }
-            sscanf(strp, ":%04x:%08x", &MPID_nem_dcfa_conn_ud[i].lid, &MPID_nem_dcfa_conn_ud[i].qpn);
+            sscanf(strp, ":%04x:%08x", &MPID_nem_dcfa_conn_ud[i].lid,
+                   &MPID_nem_dcfa_conn_ud[i].qpn);
 
             dprintf("remote rank=%d,gid=", i);
-            for(j = 0; j < sizeof(union ibv_gid); j++) {
+            for (j = 0; j < sizeof(union ibv_gid); j++) {
                 dprintf("%02x", MPID_nem_dcfa_conn_ud[i].gid.raw[j]);
             }
-            dprintf(",lid=%04x,qpn=%08x\n", MPID_nem_dcfa_conn_ud[i].lid, MPID_nem_dcfa_conn_ud[i].qpn);
-       }
+            dprintf(",lid=%04x,qpn=%08x\n", MPID_nem_dcfa_conn_ud[i].lid,
+                    MPID_nem_dcfa_conn_ud[i].qpn);
+        }
     }
-#endif   
+#endif
 
     /* malloc scratch-pad fd */
-    MPIU_CHKPMEM_MALLOC(MPID_nem_dcfa_scratch_pad_fds, int*, MPID_nem_dcfa_nranks * sizeof(int), mpi_errno, "connection table");
+    MPIU_CHKPMEM_MALLOC(MPID_nem_dcfa_scratch_pad_fds, int *, MPID_nem_dcfa_nranks * sizeof(int),
+                        mpi_errno, "connection table");
     memset(MPID_nem_dcfa_scratch_pad_fds, 0, MPID_nem_dcfa_nranks * sizeof(int));
 
     /* prepare scrath-pad QP and malloc scratch-pad */
@@ -298,7 +316,9 @@ int MPID_nem_dcfa_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val
         ibcom_errno = ibcomOpen(ib_port, IBCOM_OPEN_SCRATCH_PAD, &MPID_nem_dcfa_scratch_pad_fds[i]);
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcomOpen");
 
-        ibcom_errno = ibcom_alloc(MPID_nem_dcfa_scratch_pad_fds[i], MPID_nem_dcfa_nranks * sizeof(ibcom_qp_state_t));
+        ibcom_errno =
+            ibcom_alloc(MPID_nem_dcfa_scratch_pad_fds[i],
+                        MPID_nem_dcfa_nranks * sizeof(ibcom_qp_state_t));
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_alloc");
     }
 
@@ -308,55 +328,80 @@ int MPID_nem_dcfa_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val
 #ifndef DCFA_ONDEMAND
     uint32_t my_qpnum;
     uint16_t my_lid;
-    union ibv_gid	my_gid;
+    union ibv_gid my_gid;
 #endif
-    void* my_rmem;
+    void *my_rmem;
     int my_rkey;
 
     int remote_qpnum;
     uint16_t remote_lid;
     union ibv_gid remote_gid;
-    void* remote_rmem;
+    void *remote_rmem;
     int remote_rkey;
 
     char *remote_rank_str;
-    char *key_str; 
+    char *key_str;
 
     /* count maximum length of the string representation of remote_rank */
-    for(i = 0, nranks = MPID_nem_dcfa_nranks; nranks > 0; nranks /= 10, i++) { }
-    MPIU_CHKPMEM_MALLOC(remote_rank_str, char*, 1 + i + 1, mpi_errno, "connection table");
-    MPIU_CHKPMEM_MALLOC(key_str, char*, strlen("sp/qpn") + 1 + i + 1, mpi_errno, "connection table");
+    for (i = 0, nranks = MPID_nem_dcfa_nranks; nranks > 0; nranks /= 10, i++) {
+    }
+    MPIU_CHKPMEM_MALLOC(remote_rank_str, char *, 1 + i + 1, mpi_errno, "connection table");
+    MPIU_CHKPMEM_MALLOC(key_str, char *, strlen("sp/qpn") + 1 + i + 1, mpi_errno,
+                        "connection table");
 
     for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
 
-        if(i == 0) {
-            ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_scratch_pad_fds[i], IBCOM_INFOKEY_PORT_LID, &my_lid, sizeof(uint16_t));
+        if (i == 0) {
+            ibcom_errno =
+                ibcom_get_info_conn(MPID_nem_dcfa_scratch_pad_fds[i], IBCOM_INFOKEY_PORT_LID,
+                                    &my_lid, sizeof(uint16_t));
             dprintf("dcfa_init,scratch pad,lid=%04x\n", my_lid);
             MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
 
-            mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, "sp/lid", (uint8_t*)&my_lid, sizeof(uint16_t));
-            MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
-            
-            { dprintf("dcfa_init,scratch pad,put <%d/sp/lid/,%04x>\n", MPID_nem_dcfa_myrank, (int)my_lid); }
+            mpi_errno =
+                MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, "sp/lid", (uint8_t *) & my_lid,
+                                             sizeof(uint16_t));
+            MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER,
+                                "**MPID_nem_dcfa_kvs_put_binary");
+
+            {
+                dprintf("dcfa_init,scratch pad,put <%d/sp/lid/,%04x>\n", MPID_nem_dcfa_myrank,
+                        (int) my_lid);
+            }
 
-            ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_scratch_pad_fds[i], IBCOM_INFOKEY_PORT_GID, &my_gid, sizeof(union ibv_gid));
+            ibcom_errno =
+                ibcom_get_info_conn(MPID_nem_dcfa_scratch_pad_fds[i], IBCOM_INFOKEY_PORT_GID,
+                                    &my_gid, sizeof(union ibv_gid));
             MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
 
-            mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, "sp/gid", (uint8_t*)&my_gid, sizeof(union ibv_gid));
-            MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
+            mpi_errno =
+                MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, "sp/gid", (uint8_t *) & my_gid,
+                                             sizeof(union ibv_gid));
+            MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER,
+                                "**MPID_nem_dcfa_kvs_put_binary");
 
-            { dprintf("dcfa_init,scratch pad,gid "); int i; for(i = 0; i < 16; i++) { dprintf("%02x", (int)my_gid.raw[i]); } dprintf("\n"); }
+            {
+                dprintf("dcfa_init,scratch pad,gid ");
+                int i;
+                for (i = 0; i < 16; i++) {
+                    dprintf("%02x", (int) my_gid.raw[i]);
+                } dprintf("\n");
+            }
         }
 
         /* put bc/me/sp/qpn/you  */
         strcpy(key_str, "sp/qpn");
         sprintf(remote_rank_str, "/%x", i);
         strcat(key_str, remote_rank_str);
-        ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_scratch_pad_fds[i], IBCOM_INFOKEY_QP_QPN, &my_qpnum, sizeof(uint32_t));
+        ibcom_errno =
+            ibcom_get_info_conn(MPID_nem_dcfa_scratch_pad_fds[i], IBCOM_INFOKEY_QP_QPN, &my_qpnum,
+                                sizeof(uint32_t));
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
         dprintf("dcfa_init,scratch pad,qpn=%08x\n", my_qpnum);
 
-        mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t*)&my_qpnum, sizeof(uint32_t));
+        mpi_errno =
+            MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t *) & my_qpnum,
+                                         sizeof(uint32_t));
         MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
         dprintf("dcfa_init,scratch pad,kvs put done\n");
 
@@ -364,69 +409,95 @@ int MPID_nem_dcfa_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val
         sprintf(remote_rank_str, "/%x", i);
         strcat(key_str, remote_rank_str);
 
-        ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_scratch_pad_fds[i], IBCOM_SCRATCH_PAD_TO, IBCOM_INFOKEY_MR_ADDR, &my_rmem, sizeof(void*));
+        ibcom_errno =
+            ibcom_get_info_mr(MPID_nem_dcfa_scratch_pad_fds[i], IBCOM_SCRATCH_PAD_TO,
+                              IBCOM_INFOKEY_MR_ADDR, &my_rmem, sizeof(void *));
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
 
         dprintf("dcfa_init,scratch_pad,rmem=%p\n", my_rmem);
-        mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t*)&my_rmem, sizeof(void*));
+        mpi_errno =
+            MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t *) & my_rmem,
+                                         sizeof(void *));
         MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
 
         strcpy(key_str, "sp/rkey");
         sprintf(remote_rank_str, "/%x", i);
         strcat(key_str, remote_rank_str);
 
-        ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_scratch_pad_fds[i], IBCOM_SCRATCH_PAD_TO, IBCOM_INFOKEY_MR_RKEY, &my_rkey, sizeof(int));
+        ibcom_errno =
+            ibcom_get_info_mr(MPID_nem_dcfa_scratch_pad_fds[i], IBCOM_SCRATCH_PAD_TO,
+                              IBCOM_INFOKEY_MR_RKEY, &my_rkey, sizeof(int));
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
         dprintf("dcfa_init,scratch_pad,rkey=%08x\n", my_rkey);
 
-        mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t*)&my_rkey, sizeof(uint32_t));
+        mpi_errno =
+            MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t *) & my_rkey,
+                                         sizeof(uint32_t));
         MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
     }
-    
+
     /* wait until key-value propagates among all ranks */
-    pmi_errno = PMI_Barrier(); 
+    pmi_errno = PMI_Barrier();
     MPIU_ERR_CHKANDJUMP(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**PMI_Barrier");
     dprintf("dcfa_init,put KVS;barrier;\n");
 
     /* make me-to-you scratch-pad QP RTS */
     for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
-        if(i != MPID_nem_dcfa_myrank) { 
+        if (i != MPID_nem_dcfa_myrank) {
 
-            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, "sp/gid", (char*)&remote_gid, sizeof(union ibv_gid));
+            mpi_errno =
+                MPID_nem_dcfa_kvs_get_binary(i, "sp/gid", (char *) &remote_gid,
+                                             sizeof(union ibv_gid));
             dprintf("dcfa_init,after kvs get\n");
-            if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            if (mpi_errno) {
+                MPIU_ERR_POP(mpi_errno);
+            }
 
-            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, "sp/lid", (char*)&remote_lid, sizeof(uint16_t));
+            mpi_errno =
+                MPID_nem_dcfa_kvs_get_binary(i, "sp/lid", (char *) &remote_lid, sizeof(uint16_t));
             dprintf("dcfa_init,after kvs get\n");
-            if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-            
+            if (mpi_errno) {
+                MPIU_ERR_POP(mpi_errno);
+            }
+
             strcpy(key_str, "sp/qpn");
-            strcat(key_str, ""); /* "" or "lmt-put" */
+            strcat(key_str, "");        /* "" or "lmt-put" */
             sprintf(remote_rank_str, "/%x", MPID_nem_dcfa_myrank);
             strcat(key_str, remote_rank_str);
-            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, key_str, (char*)&remote_qpnum, sizeof(uint32_t));
-            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            mpi_errno =
+                MPID_nem_dcfa_kvs_get_binary(i, key_str, (char *) &remote_qpnum, sizeof(uint32_t));
+            if (mpi_errno) {
+                MPIU_ERR_POP(mpi_errno);
+            }
             dprintf("dcfa_init,get KVS,remote_qpnum=%08x\n", remote_qpnum);
 
-            ibcom_errno = ibcom_rts(MPID_nem_dcfa_scratch_pad_fds[i], remote_qpnum, remote_lid, &remote_gid);
+            ibcom_errno =
+                ibcom_rts(MPID_nem_dcfa_scratch_pad_fds[i], remote_qpnum, remote_lid, &remote_gid);
             MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rts");
 
             strcpy(key_str, "sp/rmem");
             sprintf(remote_rank_str, "/%x", MPID_nem_dcfa_myrank);
             strcat(key_str, remote_rank_str);
-            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, key_str, (char*)&remote_rmem, sizeof(void*));
+            mpi_errno =
+                MPID_nem_dcfa_kvs_get_binary(i, key_str, (char *) &remote_rmem, sizeof(void *));
             dprintf("dcfa_init,after kvs get\n");
-            if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            if (mpi_errno) {
+                MPIU_ERR_POP(mpi_errno);
+            }
             dprintf("dcfa_init,get KVS,remote_rmem=%p\n", remote_rmem);
 
             strcpy(key_str, "sp/rkey");
             sprintf(remote_rank_str, "/%x", MPID_nem_dcfa_myrank);
             strcat(key_str, remote_rank_str);
-            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, key_str, (char*)&remote_rkey, sizeof(uint32_t));
-            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            mpi_errno =
+                MPID_nem_dcfa_kvs_get_binary(i, key_str, (char *) &remote_rkey, sizeof(uint32_t));
+            if (mpi_errno) {
+                MPIU_ERR_POP(mpi_errno);
+            }
             dprintf("dcfa_init,get KVS,remote_rkey=%08x\n", remote_rkey);
 
-            ibcom_errno = ibcom_reg_mr_connect(MPID_nem_dcfa_scratch_pad_fds[i], remote_rmem, remote_rkey);
+            ibcom_errno =
+                ibcom_reg_mr_connect(MPID_nem_dcfa_scratch_pad_fds[i], remote_rmem, remote_rkey);
             MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_reg_mr_connect");
         }
     }
@@ -435,97 +506,129 @@ int MPID_nem_dcfa_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val
     pmi_errno = PMI_Barrier();
     MPIU_ERR_CHKANDJUMP(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**PMI_Barrier");
 
-    MPIU_CHKPMEM_MALLOC(MPID_nem_dcfa_conns, dcfaconn_t *, MPID_nem_dcfa_nranks * sizeof(dcfaconn_t), mpi_errno, "connection table");
+    MPIU_CHKPMEM_MALLOC(MPID_nem_dcfa_conns, dcfaconn_t *,
+                        MPID_nem_dcfa_nranks * sizeof(dcfaconn_t), mpi_errno, "connection table");
     memset(MPID_nem_dcfa_conns, 0, MPID_nem_dcfa_nranks * sizeof(dcfaconn_t));
 
-    MPIU_CHKPMEM_MALLOC(MPID_nem_dcfa_pollingset, MPIDI_VC_t**, MPID_NEM_DCFA_MAX_POLLINGSET * sizeof(MPIDI_VC_t*), mpi_errno, "connection table");
-    memset(MPID_nem_dcfa_pollingset, 0, MPID_NEM_DCFA_MAX_POLLINGSET * sizeof(MPIDI_VC_t*));
+    MPIU_CHKPMEM_MALLOC(MPID_nem_dcfa_pollingset, MPIDI_VC_t **,
+                        MPID_NEM_DCFA_MAX_POLLINGSET * sizeof(MPIDI_VC_t *), mpi_errno,
+                        "connection table");
+    memset(MPID_nem_dcfa_pollingset, 0, MPID_NEM_DCFA_MAX_POLLINGSET * sizeof(MPIDI_VC_t *));
 
     /* prepare eager-send QP */
-    for(i = 0; i < MPID_nem_dcfa_nranks; i++) {
+    for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
         ibcom_errno = ibcomOpen(ib_port, IBCOM_OPEN_RC, &MPID_nem_dcfa_conns[i].fd);
         dprintf("init,fd=%d\n", MPID_nem_dcfa_conns[i].fd);
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcomOpen");
     }
 
-#if 0    
+#if 0
     for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
         ibcom_errno = ibcomOpen(ib_port, IBCOM_OPEN_RC_LMT_PUT, &MPID_nem_dcfa_conns[i].fd_lmt_put);
         dprintf("init,fd_lmt_put=%d\n", MPID_nem_dcfa_conns[i].fd_lmt_put);
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcomOpen");
-    }   
+    }
 #endif
 
     /* put bc/me/{gid,lid}, put bc/me/{qpn,rmem,rkey}/you */
     mpi_errno = MPID_nem_dcfa_announce_network_addr(pg_rank, bc_val_p, val_max_sz_p);
-    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+    if (mpi_errno) {
+        MPIU_ERR_POP(mpi_errno);
+    }
 
     /* wait until key-value propagates among all ranks */
-    pmi_errno = PMI_Barrier(); 
+    pmi_errno = PMI_Barrier();
     MPIU_ERR_CHKANDJUMP(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**PMI_Barrier");
 
     /* make me-to-you eager-send QP RTS */
     for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
-        if(i != MPID_nem_dcfa_myrank) { 
+        if (i != MPID_nem_dcfa_myrank) {
+
+            mpi_errno =
+                MPID_nem_dcfa_kvs_get_binary(i, MPIDI_CH3I_LID_KEY, (char *) &remote_lid,
+                                             sizeof(uint16_t));
+            if (mpi_errno) {
+                MPIU_ERR_POP(mpi_errno);
+            }
+            mpi_errno =
+                MPID_nem_dcfa_kvs_get_binary(i, MPIDI_CH3I_GID_KEY, (char *) &remote_gid,
+                                             sizeof(union ibv_gid));
+            if (mpi_errno) {
+                MPIU_ERR_POP(mpi_errno);
+            }
 
-            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, MPIDI_CH3I_LID_KEY, (char*)&remote_lid, sizeof(uint16_t));
-            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, MPIDI_CH3I_GID_KEY, (char*)&remote_gid, sizeof(union ibv_gid));
-            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-            
             strcpy(key_str, MPIDI_CH3I_RMEM_KEY);
             sprintf(remote_rank_str, "/%x", MPID_nem_dcfa_myrank);
             strcat(key_str, remote_rank_str);
-            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, key_str, (char*)&remote_rmem, sizeof(void*));
-            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-            
+            mpi_errno =
+                MPID_nem_dcfa_kvs_get_binary(i, key_str, (char *) &remote_rmem, sizeof(void *));
+            if (mpi_errno) {
+                MPIU_ERR_POP(mpi_errno);
+            }
+
             strcpy(key_str, MPIDI_CH3I_RKEY_KEY);
             sprintf(remote_rank_str, "/%x", MPID_nem_dcfa_myrank);
             strcat(key_str, remote_rank_str);
-            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, key_str, (char*)&remote_rkey, sizeof(uint32_t));
-            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-            
+            mpi_errno =
+                MPID_nem_dcfa_kvs_get_binary(i, key_str, (char *) &remote_rkey, sizeof(uint32_t));
+            if (mpi_errno) {
+                MPIU_ERR_POP(mpi_errno);
+            }
+
             strcpy(key_str, MPIDI_CH3I_QPN_KEY);
-            strcat(key_str, ""); /* "" or "lmt-put" */
+            strcat(key_str, "");        /* "" or "lmt-put" */
             sprintf(remote_rank_str, "/%x", MPID_nem_dcfa_myrank);
             strcat(key_str, remote_rank_str);
-            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, key_str, (char*)&remote_qpnum, sizeof(uint32_t));
-            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            mpi_errno =
+                MPID_nem_dcfa_kvs_get_binary(i, key_str, (char *) &remote_qpnum, sizeof(uint32_t));
+            if (mpi_errno) {
+                MPIU_ERR_POP(mpi_errno);
+            }
             dprintf("remote_qpnum obtained=%08x\n", remote_qpnum);
-            
-            ibcom_errno = ibcom_rts(MPID_nem_dcfa_conns[i].fd, remote_qpnum, remote_lid, &remote_gid);
+
+            ibcom_errno =
+                ibcom_rts(MPID_nem_dcfa_conns[i].fd, remote_qpnum, remote_lid, &remote_gid);
             MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rts");
-            
+
             /* report me-to-you eager-send QP becomes RTR */
-            IbCom* ibcom_scratch_pad;
-            ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_scratch_pad_fds[i], &ibcom_scratch_pad); 
+            IbCom *ibcom_scratch_pad;
+            ibcom_errno =
+                ibcom_obtain_pointer(MPID_nem_dcfa_scratch_pad_fds[i], &ibcom_scratch_pad);
             MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_obtain_pointer");
 
-            ibcom_qp_state_t state = {.state = IBCOM_QP_STATE_RTR};
-            ibcom_errno = ibcom_put_scratch_pad(MPID_nem_dcfa_scratch_pad_fds[i], (uint64_t)ibcom_scratch_pad, sizeof(ibcom_qp_state_t) * MPID_nem_dcfa_myrank, sizeof(ibcom_qp_state_t), (void*)&state);
+            ibcom_qp_state_t state = {.state = IBCOM_QP_STATE_RTR };
+            ibcom_errno =
+                ibcom_put_scratch_pad(MPID_nem_dcfa_scratch_pad_fds[i],
+                                      (uint64_t) ibcom_scratch_pad,
+                                      sizeof(ibcom_qp_state_t) * MPID_nem_dcfa_myrank,
+                                      sizeof(ibcom_qp_state_t), (void *) &state);
             MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_put_scratch_pad");
             MPID_nem_dcfa_ncqe_scratch_pad += 1;
 
             ibcom_errno = ibcom_reg_mr_connect(MPID_nem_dcfa_conns[i].fd, remote_rmem, remote_rkey);
             MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_reg_mr_connect");
             dprintf("dcfa_init,after mr_connect for me-to-you eager-send QP\n");
-            
+
 #if 0
             /* CQ, SQ, SCQ for lmt-put */
             strcpy(key_str, MPIDI_CH3I_QPN_KEY);
             strcat(key_str, "lmt-put"); /* "" or "lmt-put" */
             sprintf(remote_rank_str, "/%x", MPID_nem_dcfa_myrank);
             strcat(key_str, remote_rank_str);
-            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, key_str, (char*)&remote_qpnum, sizeof(uint32_t));
-            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-            
-            ibcom_errno = ibcom_rts(MPID_nem_dcfa_conns[i].fd_lmt_put, remote_qpnum, remote_lid, &remote_gid);
+            mpi_errno =
+                MPID_nem_dcfa_kvs_get_binary(i, key_str, (char *) &remote_qpnum, sizeof(uint32_t));
+            if (mpi_errno) {
+                MPIU_ERR_POP(mpi_errno);
+            }
+
+            ibcom_errno =
+                ibcom_rts(MPID_nem_dcfa_conns[i].fd_lmt_put, remote_qpnum, remote_lid, &remote_gid);
             MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rts");
 #endif
         }
     }
 
-#if 0 /* debug */
+#if 0   /* debug */
     for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
         dprintf("init,fd[%d]=%d\n", i, MPID_nem_dcfa_conns[i].fd);
     }
@@ -568,69 +671,86 @@ int MPID_nem_dcfa_announce_network_addr(int my_rank, char **bc_val_p, int *val_m
 
     uint32_t my_qpnum;
     uint16_t my_lid;
-    union ibv_gid	my_gid;
-    void* my_rmem;
+    union ibv_gid my_gid;
+    void *my_rmem;
     int my_rkey;
-    char *remote_rank_str; /* perl -e '$key_str .= $remote_rank;' */
-    char *key_str; 
+    char *remote_rank_str;      /* perl -e '$key_str .= $remote_rank;' */
+    char *key_str;
 
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_ANNOUNCE_NETWORK_ADDR);
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_ANNOUNCE_NETWORK_ADDR);
-    MPIU_CHKLMEM_DECL(2); /* argument is the number of alloca */
+    MPIU_CHKLMEM_DECL(2);       /* argument is the number of alloca */
 
     /* count maximum length of the string representation of remote_rank */
-    for(i = 0, nranks = MPID_nem_dcfa_nranks; nranks > 0; nranks /= 10, i++) { }
-    MPIU_CHKLMEM_MALLOC(remote_rank_str, char *, i + 1, mpi_errno, "key_str"); /* alloca */
-    MPIU_CHKLMEM_MALLOC(key_str, char *, strlen(MPIDI_CH3I_QPN_KEY) + i + 1, mpi_errno, "key_str"); /* alloca */
-
-    /* We have one local qp and remote qp for each rank-pair, 
-       so a rank should perform 
-       remote_qpn = kvs_get($remote_rank . "qpnum/" . $local_rank).
-       a memory area to read from and write to HCA,
-       and a memory area to read from HCA and write to DRAM is
-       associated with each connection, so a rank should perform 
-       rkey = kvs_get($remote_rank . "rkey/" . $local_rank) 
-       and raddr = kvs_get($remote_rank . "raddr/" . $local_rank). */
+    for (i = 0, nranks = MPID_nem_dcfa_nranks; nranks > 0; nranks /= 10, i++) {
+    }
+    MPIU_CHKLMEM_MALLOC(remote_rank_str, char *, i + 1, mpi_errno, "key_str");  /* alloca */
+    MPIU_CHKLMEM_MALLOC(key_str, char *, strlen(MPIDI_CH3I_QPN_KEY) + i + 1, mpi_errno, "key_str");     /* alloca */
+
+    /* We have one local qp and remote qp for each rank-pair,
+     * so a rank should perform
+     * remote_qpn = kvs_get($remote_rank . "qpnum/" . $local_rank).
+     * a memory area to read from and write to HCA,
+     * and a memory area to read from HCA and write to DRAM is
+     * associated with each connection, so a rank should perform
+     * rkey = kvs_get($remote_rank . "rkey/" . $local_rank)
+     * and raddr = kvs_get($remote_rank . "raddr/" . $local_rank). */
     for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
 
         /* lid and gid are common for all remote-ranks */
-        if(i == 0) {
-            ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[i].fd, IBCOM_INFOKEY_PORT_LID, &my_lid, sizeof(uint16_t));
+        if (i == 0) {
+            ibcom_errno =
+                ibcom_get_info_conn(MPID_nem_dcfa_conns[i].fd, IBCOM_INFOKEY_PORT_LID, &my_lid,
+                                    sizeof(uint16_t));
             dprintf("get_business_card,lid=%04x\n", my_lid);
             MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
 
-            mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, MPIDI_CH3I_LID_KEY, (uint8_t*)&my_lid, sizeof(uint16_t));
-            MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
-            
-            ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[i].fd, IBCOM_INFOKEY_PORT_GID, &my_gid, sizeof(union ibv_gid));
+            mpi_errno =
+                MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, MPIDI_CH3I_LID_KEY,
+                                             (uint8_t *) & my_lid, sizeof(uint16_t));
+            MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER,
+                                "**MPID_nem_dcfa_kvs_put_binary");
+
+            ibcom_errno =
+                ibcom_get_info_conn(MPID_nem_dcfa_conns[i].fd, IBCOM_INFOKEY_PORT_GID, &my_gid,
+                                    sizeof(union ibv_gid));
             {
                 dprintf("get_business_card,val_max_sz=%d\n", *val_max_sz_p);
                 dprintf("get_business_card,sz=%ld,my_gid=", sizeof(union ibv_gid));
                 int i;
-                for(i = 0; i < 16; i++) { dprintf("%02x", (int)my_gid.raw[i]); }
+                for (i = 0; i < 16; i++) {
+                    dprintf("%02x", (int) my_gid.raw[i]);
+                }
                 dprintf("\n");
             }
             MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
 
-            mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, MPIDI_CH3I_GID_KEY, (uint8_t*)&my_gid, sizeof(union ibv_gid));
-            MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
+            mpi_errno =
+                MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, MPIDI_CH3I_GID_KEY,
+                                             (uint8_t *) & my_gid, sizeof(union ibv_gid));
+            MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER,
+                                "**MPID_nem_dcfa_kvs_put_binary");
             dprintf("get_business_card,val_max_sz=%d\n", *val_max_sz_p);
         }
 
         /* we use different RDMA-rbuf for different senders.
-           so announce like this:
-           <"0/qpn/0", 0xa0000>
-           <"0/qpn/1", 0xb0000>   
-           <"0/qpn/2", 0xc0000>
-           <"0/qpn/3", 0xd0000>   
+         * so announce like this:
+         * <"0/qpn/0", 0xa0000>
+         * <"0/qpn/1", 0xb0000>
+         * <"0/qpn/2", 0xc0000>
+         * <"0/qpn/3", 0xd0000>
          */
         strcpy(key_str, MPIDI_CH3I_QPN_KEY);
         sprintf(remote_rank_str, "/%x", i);
         strcat(key_str, remote_rank_str);
-        ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[i].fd, IBCOM_INFOKEY_QP_QPN, &my_qpnum, sizeof(uint32_t));
+        ibcom_errno =
+            ibcom_get_info_conn(MPID_nem_dcfa_conns[i].fd, IBCOM_INFOKEY_QP_QPN, &my_qpnum,
+                                sizeof(uint32_t));
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
 
-        mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t*)&my_qpnum, sizeof(uint32_t));
+        mpi_errno =
+            MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t *) & my_qpnum,
+                                         sizeof(uint32_t));
         MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
 
 #if 0
@@ -639,10 +759,14 @@ int MPID_nem_dcfa_announce_network_addr(int my_rank, char **bc_val_p, int *val_m
         strcat(key_str, "lmt-put");
         sprintf(remote_rank_str, "/%x", i);
         strcat(key_str, remote_rank_str);
-        ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[i].fd_lmt_put, IBCOM_INFOKEY_QP_QPN, &my_qpnum, sizeof(uint32_t));
+        ibcom_errno =
+            ibcom_get_info_conn(MPID_nem_dcfa_conns[i].fd_lmt_put, IBCOM_INFOKEY_QP_QPN, &my_qpnum,
+                                sizeof(uint32_t));
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
 
-        mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t*)&my_qpnum, sizeof(uint32_t));
+        mpi_errno =
+            MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t *) & my_qpnum,
+                                         sizeof(uint32_t));
         MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
 #endif
 
@@ -650,21 +774,29 @@ int MPID_nem_dcfa_announce_network_addr(int my_rank, char **bc_val_p, int *val_m
         sprintf(remote_rank_str, "/%x", i);
         strcat(key_str, remote_rank_str);
 
-        ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_conns[i].fd, IBCOM_RDMAWR_TO, IBCOM_INFOKEY_MR_ADDR, &my_rmem, sizeof(void*));
+        ibcom_errno =
+            ibcom_get_info_mr(MPID_nem_dcfa_conns[i].fd, IBCOM_RDMAWR_TO, IBCOM_INFOKEY_MR_ADDR,
+                              &my_rmem, sizeof(void *));
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
 
         dprintf("rmem=%p\n", my_rmem);
-        mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t*)&my_rmem, sizeof(void*));
+        mpi_errno =
+            MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t *) & my_rmem,
+                                         sizeof(void *));
         MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
 
         strcpy(key_str, MPIDI_CH3I_RKEY_KEY);
         sprintf(remote_rank_str, "/%x", i);
         strcat(key_str, remote_rank_str);
 
-        ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_conns[i].fd, IBCOM_RDMAWR_TO, IBCOM_INFOKEY_MR_RKEY, &my_rkey, sizeof(int));
+        ibcom_errno =
+            ibcom_get_info_mr(MPID_nem_dcfa_conns[i].fd, IBCOM_RDMAWR_TO, IBCOM_INFOKEY_MR_RKEY,
+                              &my_rkey, sizeof(int));
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
 
-        mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t*)&my_rkey, sizeof(uint32_t));
+        mpi_errno =
+            MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t *) & my_rkey,
+                                         sizeof(uint32_t));
         MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
     }
 
@@ -680,14 +812,15 @@ int MPID_nem_dcfa_announce_network_addr(int my_rank, char **bc_val_p, int *val_m
 #define FUNCNAME MPID_nem_dcfa_connect_to_root
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_connect_to_root(const char *business_card, MPIDI_VC_t * new_vc) {
+int MPID_nem_dcfa_connect_to_root(const char *business_card, MPIDI_VC_t * new_vc)
+{
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_CONNECT_TO_ROOT);
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_CONNECT_TO_ROOT);
 
     dprintf("toroot,%d->%d", MPID_nem_dcfa_myrank, new_vc->pg_rank);
     /* not implemented */
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_CONNECT_TO_ROOT);
     return MPI_SUCCESS;
 }
@@ -710,7 +843,7 @@ int MPID_nem_dcfa_vc_init(MPIDI_VC_t * vc)
     int remote_qpnum;
     uint16_t remote_lid;
     union ibv_gid remote_gid;
-    void* remote_rmem;
+    void *remote_rmem;
     int remote_rkey;
 
     char key_str[256], remote_rank_str[256];
@@ -721,10 +854,11 @@ int MPID_nem_dcfa_vc_init(MPIDI_VC_t * vc)
     vc_dcfa->sc = &MPID_nem_dcfa_conns[vc->pg_rank];
 
     /* store pointer to ibcom */
-    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_conns[vc->pg_rank].fd, &vc_dcfa->ibcom); 
+    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_conns[vc->pg_rank].fd, &vc_dcfa->ibcom);
     MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_obtain_pointer");
 
-    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_conns[vc->pg_rank].fd_lmt_put, &vc_dcfa->ibcom_lmt_put); 
+    ibcom_errno =
+        ibcom_obtain_pointer(MPID_nem_dcfa_conns[vc->pg_rank].fd_lmt_put, &vc_dcfa->ibcom_lmt_put);
     MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_obtain_pointer");
 
     //dprintf("vc_init,open,fd=%d,ptr=%p,rsr_seq_num_poll=%d\n", MPID_nem_dcfa_conns[vc->pg_rank].fd, vc_dcfa->ibcom, vc_dcfa->ibcom->rsr_seq_num_poll);
@@ -737,33 +871,38 @@ int MPID_nem_dcfa_vc_init(MPIDI_VC_t * vc)
 
     /* rank is sent as wr_id and used to obtain vc in poll */
     MPID_nem_dcfa_conns[vc->pg_rank].vc = vc;
-    MPIU_ERR_CHKANDJUMP(MPID_nem_dcfa_npollingset+1 > MPID_NEM_DCFA_MAX_POLLINGSET, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_npollingset");
+    MPIU_ERR_CHKANDJUMP(MPID_nem_dcfa_npollingset + 1 > MPID_NEM_DCFA_MAX_POLLINGSET, mpi_errno,
+                        MPI_ERR_OTHER, "**MPID_nem_dcfa_npollingset");
     MPID_nem_dcfa_pollingset[MPID_nem_dcfa_npollingset++] = vc;
     //printf("vc_init,%d->%d,vc=%p,npollingset=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, vc, MPID_nem_dcfa_npollingset);
 
     /* wait until you-to-me eager-send QP becomes RTR */
-    IbCom* ibcom_scratch_pad;
-    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_scratch_pad_fds[vc->pg_rank], &ibcom_scratch_pad); 
+    IbCom *ibcom_scratch_pad;
+    ibcom_errno =
+        ibcom_obtain_pointer(MPID_nem_dcfa_scratch_pad_fds[vc->pg_rank], &ibcom_scratch_pad);
     MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_obtain_pointer");
 
     int ntrial = 0;
-    volatile ibcom_qp_state_t* rstate = (ibcom_qp_state_t*)(ibcom_scratch_pad->icom_mem[IBCOM_SCRATCH_PAD_TO] + vc->pg_rank * sizeof(ibcom_qp_state_t));
-    dprintf("dcfa_init,rstate=%p,*rstate=%08x\n", rstate, *((uint32_t*)rstate));
-    while(rstate->state != IBCOM_QP_STATE_RTR) {
-        __asm__ __volatile__ ("pause;" : : : "memory"); 
-        if(++ntrial > 1024) {
+    volatile ibcom_qp_state_t *rstate =
+        (ibcom_qp_state_t *) (ibcom_scratch_pad->icom_mem[IBCOM_SCRATCH_PAD_TO] +
+                              vc->pg_rank * sizeof(ibcom_qp_state_t));
+    dprintf("dcfa_init,rstate=%p,*rstate=%08x\n", rstate, *((uint32_t *) rstate));
+    while (rstate->state != IBCOM_QP_STATE_RTR) {
+        __asm__ __volatile__("pause;":::"memory");
+        if (++ntrial > 1024) {
             /* detect RDMA-write failure */
             ibcom_errno = MPID_nem_dcfa_drain_scq_scratch_pad();
-            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq_scratch_pad");
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER,
+                                "**MPID_nem_dcfa_drain_scq_scratch_pad");
         }
     }
     dprintf("dcfa_init,you-to-me eager-send QP is RTR\n");
 
     /* post IBCOM_MAX_SQ_CAPACITY of recv commands beforehand, replenish when retiring them in dcfa_poll */
     int i;
-    for(i = 0; i < IBCOM_MAX_RQ_CAPACITY; i++) {
+    for (i = 0; i < IBCOM_MAX_RQ_CAPACITY; i++) {
         //dprintf("irecv,%d->%d\n", MPID_nem_dcfa_myrank, vc->pg_rank);
-        ibcom_errno = ibcom_irecv(MPID_nem_dcfa_conns[vc->pg_rank].fd, (uint64_t)vc->pg_rank);
+        ibcom_errno = ibcom_irecv(MPID_nem_dcfa_conns[vc->pg_rank].fd, (uint64_t) vc->pg_rank);
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_irecv");
     }
 
@@ -771,39 +910,47 @@ int MPID_nem_dcfa_vc_init(MPIDI_VC_t * vc)
 
 
     uint32_t max_msg_sz;
-    ibcom_get_info_conn(MPID_nem_dcfa_conns[vc->pg_rank].fd, IBCOM_INFOKEY_PATTR_MAX_MSG_SZ, &max_msg_sz, sizeof(max_msg_sz));
-    VC_FIELD(vc,pending_sends) = 0;
+    ibcom_get_info_conn(MPID_nem_dcfa_conns[vc->pg_rank].fd, IBCOM_INFOKEY_PATTR_MAX_MSG_SZ,
+                        &max_msg_sz, sizeof(max_msg_sz));
+    VC_FIELD(vc, pending_sends) = 0;
 #ifdef DCFA_ONDEMAND
-    VC_FIELD(vc,is_connected) = 0;
+    VC_FIELD(vc, is_connected) = 0;
 #endif
 
-    MPIU_Assert(sizeof(sz_hdrmagic_t) == 8); /* assumption in dcfa_ibcom.h */
-    MPIU_Assert(sizeof(tailmagic_t) == 1); /* in dcfa_ibcom.h */
+    MPIU_Assert(sizeof(sz_hdrmagic_t) == 8);    /* assumption in dcfa_ibcom.h */
+    MPIU_Assert(sizeof(tailmagic_t) == 1);      /* in dcfa_ibcom.h */
 
     uint32_t sz;
 #if 0
     /* assumption in released(), must be power of two  */
     sz = IBCOM_RDMABUF_NSEG;
-    while((sz & 1) == 0) { sz>>=1; }
+    while ((sz & 1) == 0) {
+        sz >>= 1;
+    }
     sz >>= 1;
-    if(sz) { MPIU_Assert(0); }
+    if (sz) {
+        MPIU_Assert(0);
+    }
 #endif
 
     /* assumption in dcfa_poll.c, must be power of two */
-    for(sz = IBCOM_RDMABUF_SZSEG; sz > 0; sz >>= 1) {
-        if(sz != 1 && (sz & 1)) { MPIU_Assert(0); }
+    for (sz = IBCOM_RDMABUF_SZSEG; sz > 0; sz >>= 1) {
+        if (sz != 1 && (sz & 1)) {
+            MPIU_Assert(0);
+        }
     }
 
-    char* val;
+    char *val;
     val = getenv("MP2_IBA_EAGER_THRESHOLD");
     vc->eager_max_msg_sz = val ? atoi(val) : MPID_NEM_DCFA_EAGER_MAX_MSG_SZ;
     vc->ready_eager_max_msg_sz = val ? atoi(val) : MPID_NEM_DCFA_EAGER_MAX_MSG_SZ;
     dprintf("dcfa_vc_init,vc->eager_max_msg_sz=%d\n", vc->eager_max_msg_sz);
 
-    /* vc->rndvSend_fn is set in MPID_nem_vc_init (in src/mpid/ch3/channels/nemesis/src/mpid_nem_init.c) */;
+    /* vc->rndvSend_fn is set in MPID_nem_vc_init (in src/mpid/ch3/channels/nemesis/src/mpid_nem_init.c) */
+        ;
     vc->sendNoncontig_fn = MPID_nem_dcfa_SendNoncontig;
 
-    vc->comm_ops         = &comm_ops;
+    vc->comm_ops = &comm_ops;
 
 
     /* register packet handler */
@@ -816,8 +963,10 @@ int MPID_nem_dcfa_vc_init(MPIDI_VC_t * vc)
     MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_PKT_ACCUMULATE] = MPID_nem_dcfa_PktHandler_Accumulate;
     MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_PKT_LMT_GET_DONE] = MPID_nem_dcfa_pkt_GET_DONE_handler;
     MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_REQ_SEQ_NUM] = MPID_nem_dcfa_PktHandler_req_seq_num;
-    MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_REPLY_SEQ_NUM] = MPID_nem_dcfa_PktHandler_reply_seq_num;
-    MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_CHG_RDMABUF_OCC_NOTIFY_STATE] = MPID_nem_dcfa_PktHandler_change_rdmabuf_occupancy_notify_state;
+    MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_REPLY_SEQ_NUM] =
+        MPID_nem_dcfa_PktHandler_reply_seq_num;
+    MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_CHG_RDMABUF_OCC_NOTIFY_STATE] =
+        MPID_nem_dcfa_PktHandler_change_rdmabuf_occupancy_notify_state;
 
     /* register CH3 send/recv functions */
     vc_ch->iStartContigMsg = MPID_nem_dcfa_iStartContigMsg;
@@ -868,48 +1017,67 @@ int MPID_nem_dcfa_vc_terminate(MPIDI_VC_t * vc)
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_VC_TERMINATE);
 
     /* Check to make sure that it's OK to terminate the
-       connection without making sure that all sends have been sent */
+     * connection without making sure that all sends have been sent */
     /* it is safe to only check command queue because
-       data transactions always proceed after confirming send by MPI_Wait
-       and control transactions always proceed after receiveing reply */
+     * data transactions always proceed after confirming send by MPI_Wait
+     * and control transactions always proceed after receiveing reply */
     MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
 
-    dprintf("init,before,%d->%d,r rdmaocc=%d,l rdmaocc=%d,sendq=%d,ncqe=%d,pending_sends=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent), MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail), MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), MPID_nem_dcfa_ncqe, VC_FIELD(vc, pending_sends));
+    dprintf("init,before,%d->%d,r rdmaocc=%d,l rdmaocc=%d,sendq=%d,ncqe=%d,pending_sends=%d\n",
+            MPID_nem_dcfa_myrank, vc->pg_rank,
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail,
+                                 vc_dcfa->ibcom->rsr_seq_num_tail_last_sent),
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail),
+            MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), MPID_nem_dcfa_ncqe, VC_FIELD(vc,
+                                                                                    pending_sends));
 
     /* update remote RDMA-write-to buffer occupancy */
-#if 0 /* we can't send it when the other party has closed QP */
-    while(MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent) > 0) {
-        MPID_nem_dcfa_send_reply_seq_num(vc); 
+#if 0   /* we can't send it when the other party has closed QP */
+    while (MPID_nem_dcfa_diff32
+           (vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent) > 0) {
+        MPID_nem_dcfa_send_reply_seq_num(vc);
     }
 #endif
 
     /* update local RDMA-write-to buffer occupancy */
 #if 0
-    while(MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) > 0) {
+    while (MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) > 0) {
         MPID_nem_dcfa_poll_eager(vc);
     }
 #endif
 
     /* drain sendq */
-    while(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) {
+    while (!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) {
         MPID_nem_dcfa_send_progress(vc_dcfa);
     }
 
-    dprintf("init,middle,%d->%d,r rdmaocc=%d,l rdmaocc=%d,sendq=%d,ncqe=%d,pending_sends=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent), MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail), MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), MPID_nem_dcfa_ncqe, VC_FIELD(vc, pending_sends));
+    dprintf("init,middle,%d->%d,r rdmaocc=%d,l rdmaocc=%d,sendq=%d,ncqe=%d,pending_sends=%d\n",
+            MPID_nem_dcfa_myrank, vc->pg_rank,
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail,
+                                 vc_dcfa->ibcom->rsr_seq_num_tail_last_sent),
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail),
+            MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), MPID_nem_dcfa_ncqe, VC_FIELD(vc,
+                                                                                    pending_sends));
 
-    if(MPID_nem_dcfa_ncqe > 0 || VC_FIELD(vc, pending_sends) > 0) {
+    if (MPID_nem_dcfa_ncqe > 0 || VC_FIELD(vc, pending_sends) > 0) {
         usleep(1000);
         MPID_nem_dcfa_drain_scq(0);
     }
-    dprintf("init,middle2,%d->%d,r rdmaocc=%d,l rdmaocc=%d,sendq=%d,ncqe=%d,pending_sends=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent), MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail), MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), MPID_nem_dcfa_ncqe, VC_FIELD(vc, pending_sends));
-
-    if(MPID_nem_dcfa_ncqe > 0 || VC_FIELD(vc, pending_sends) > 0) {
+    dprintf("init,middle2,%d->%d,r rdmaocc=%d,l rdmaocc=%d,sendq=%d,ncqe=%d,pending_sends=%d\n",
+            MPID_nem_dcfa_myrank, vc->pg_rank,
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail,
+                                 vc_dcfa->ibcom->rsr_seq_num_tail_last_sent),
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail),
+            MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), MPID_nem_dcfa_ncqe, VC_FIELD(vc,
+                                                                                    pending_sends));
+
+    if (MPID_nem_dcfa_ncqe > 0 || VC_FIELD(vc, pending_sends) > 0) {
         usleep(1000);
         MPID_nem_dcfa_drain_scq(0);
     }
 #if 0
     /* drain scq */
-    while(MPID_nem_dcfa_ncqe > 0 || VC_FIELD(vc, pending_sends) > 0) {
+    while (MPID_nem_dcfa_ncqe > 0 || VC_FIELD(vc, pending_sends) > 0) {
         usleep(1000);
         MPID_nem_dcfa_drain_scq(0);
         //printf("%d\n", VC_FIELD(vc, pending_sends));
@@ -917,14 +1085,23 @@ int MPID_nem_dcfa_vc_terminate(MPIDI_VC_t * vc)
     }
 #endif
 
-    dprintf("init,after ,%d->%d,r rdmaocc=%d,l rdmaocc=%d,sendq=%d,ncqe=%d,pending_sends=%d\n",  MPID_nem_dcfa_myrank, vc->pg_rank, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent), MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail), MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), MPID_nem_dcfa_ncqe, VC_FIELD(vc, pending_sends));
+    dprintf("init,after ,%d->%d,r rdmaocc=%d,l rdmaocc=%d,sendq=%d,ncqe=%d,pending_sends=%d\n",
+            MPID_nem_dcfa_myrank, vc->pg_rank,
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail,
+                                 vc_dcfa->ibcom->rsr_seq_num_tail_last_sent),
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail),
+            MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), MPID_nem_dcfa_ncqe, VC_FIELD(vc,
+                                                                                    pending_sends));
 
     /* drain scratch-pad scq */
     ibcom_errno = MPID_nem_dcfa_drain_scq_scratch_pad();
-    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq_scratch_pad");
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER,
+                        "**MPID_nem_dcfa_drain_scq_scratch_pad");
 
     mpi_errno = MPIDI_CH3U_Handle_connection(vc, MPIDI_VC_EVENT_TERMINATED);
-    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+    if (mpi_errno) {
+        MPIU_ERR_POP(mpi_errno);
+    }
 
   fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_VC_TERMINATE);
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_lmt.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_lmt.c
index 58fff57..5c9b2f4 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_lmt.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_lmt.c
@@ -8,7 +8,7 @@
 #include "dcfa_impl.h"
 
 //#define DEBUG_DCFA_LMT
-#ifdef dprintf /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
+#ifdef dprintf  /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
 #undef dprintf
 #endif
 #ifdef DEBUG_DCFA_LMT
@@ -22,12 +22,13 @@
 #define FUNCNAME MPID_nem_dcfa_lmt_initiate_lmt
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_lmt_initiate_lmt(struct MPIDI_VC *vc, union MPIDI_CH3_Pkt *rts_pkt, struct MPID_Request *req)
+int MPID_nem_dcfa_lmt_initiate_lmt(struct MPIDI_VC *vc, union MPIDI_CH3_Pkt *rts_pkt,
+                                   struct MPID_Request *req)
 {
     int mpi_errno = MPI_SUCCESS;
     int dt_contig;
-    MPIDI_msg_sz_t data_sz; 
-    MPID_Datatype * dt_ptr;
+    MPIDI_msg_sz_t data_sz;
+    MPID_Datatype *dt_ptr;
     MPI_Aint dt_true_lb;
     MPIDI_CH3I_VC *vc_ch = VC_CH(vc);
     MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
@@ -39,57 +40,66 @@ int MPID_nem_dcfa_lmt_initiate_lmt(struct MPIDI_VC *vc, union MPIDI_CH3_Pkt *rts
 
     /* obtain dt_true_lb */
     /* see MPIDI_Datatype_get_info(in, in, out, out, out, out) (in src/mpid/ch3/include/mpidimpl.h) */
-    MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
+    MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr,
+                            dt_true_lb);
 
     /* malloc memory area for cookie. auto variable is NG because isend does not copy payload */
-    MPID_nem_dcfa_lmt_cookie_t* s_cookie_buf = (MPID_nem_dcfa_lmt_cookie_t*)MPIU_Malloc(sizeof(MPID_nem_dcfa_lmt_cookie_t));
+    MPID_nem_dcfa_lmt_cookie_t *s_cookie_buf =
+        (MPID_nem_dcfa_lmt_cookie_t *) MPIU_Malloc(sizeof(MPID_nem_dcfa_lmt_cookie_t));
 
     /* remember address to "free" when receiving DONE from receiver */
     req->ch.s_cookie = s_cookie_buf;
 
     /* see MPIDI_CH3_PktHandler_RndvClrToSend (in src/mpid/ch3/src/ch3u_rndv.c) */
     //assert(dt_true_lb == 0);
-    void* write_from_buf; 
-    if(dt_contig) {
+    void *write_from_buf;
+    if (dt_contig) {
         write_from_buf = req->dev.user_buf;
-    } else {
+    }
+    else {
         /* see MPIDI_CH3_EagerNoncontigSend (in ch3u_eager.c) */
-        req->dev.segment_ptr = MPID_Segment_alloc( );
-        MPIU_ERR_CHKANDJUMP((req->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**outofmemory");
-        
-        MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype, req->dev.segment_ptr, 0);
+        req->dev.segment_ptr = MPID_Segment_alloc();
+        MPIU_ERR_CHKANDJUMP((req->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER,
+                            "**outofmemory");
+
+        MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype,
+                          req->dev.segment_ptr, 0);
         req->dev.segment_first = 0;
         req->dev.segment_size = data_sz;
 
         MPIDI_msg_sz_t last;
-        last = req->dev.segment_size; /* segment_size is byte offset */
+        last = req->dev.segment_size;   /* segment_size is byte offset */
         MPIU_Assert(last > 0);
-        REQ_FIELD(req, lmt_pack_buf) = MPIU_Malloc((size_t)req->dev.segment_size);
-        MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER, "**outofmemory");
-        MPID_Segment_pack(req->dev.segment_ptr, req->dev.segment_first, &last, (char *)(REQ_FIELD(req, lmt_pack_buf)));
+        REQ_FIELD(req, lmt_pack_buf) = MPIU_Malloc((size_t) req->dev.segment_size);
+        MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER,
+                            "**outofmemory");
+        MPID_Segment_pack(req->dev.segment_ptr, req->dev.segment_first, &last,
+                          (char *) (REQ_FIELD(req, lmt_pack_buf)));
         MPIU_Assert(last == req->dev.segment_size);
         write_from_buf = REQ_FIELD(req, lmt_pack_buf);
     }
-    dprintf("lmt_initate_lmt,dt_contig=%d,write_from_buf=%p,req->dev.user_buf=%p,REQ_FIELD(req, lmt_pack_buf)=%p\n", dt_contig, write_from_buf, req->dev.user_buf, REQ_FIELD(req, lmt_pack_buf));
+    dprintf
+        ("lmt_initate_lmt,dt_contig=%d,write_from_buf=%p,req->dev.user_buf=%p,REQ_FIELD(req, lmt_pack_buf)=%p\n",
+         dt_contig, write_from_buf, req->dev.user_buf, REQ_FIELD(req, lmt_pack_buf));
 
 #ifdef DCFA
 #else
-    s_cookie_buf->addr = write_from_buf; 
+    s_cookie_buf->addr = write_from_buf;
 #endif
     /* put sz, see MPID_nem_lmt_RndvSend (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c) */
-    /* TODO remove sz field 
-    /* pkt_RTS_handler (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c) 
-         rreq->ch.lmt_data_sz = rts_pkt->data_sz; */
+    /* TODO remove sz field
+     * /* pkt_RTS_handler (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c)
+     * rreq->ch.lmt_data_sz = rts_pkt->data_sz; */
     //s_cookie_buf->sz = (uint32_t)((MPID_nem_pkt_lmt_rts_t*)rts_pkt)->data_sz;
 
-    /* preserve and put tail, because tail magic is written on the tail of payload 
-       because we don't want to add another SGE or RDMA command */
-    MPIU_Assert(((MPID_nem_pkt_lmt_rts_t*)rts_pkt)->data_sz == data_sz);
-    s_cookie_buf->tail = *((uint8_t*)(write_from_buf + data_sz - sizeof(uint8_t)));
+    /* preserve and put tail, because tail magic is written on the tail of payload
+     * because we don't want to add another SGE or RDMA command */
+    MPIU_Assert(((MPID_nem_pkt_lmt_rts_t *) rts_pkt)->data_sz == data_sz);
+    s_cookie_buf->tail = *((uint8_t *) (write_from_buf + data_sz - sizeof(uint8_t)));
     /* prepare magic */
     //*((uint32_t*)(write_from_buf + data_sz - sizeof(tailmagic_t))) = IBCOM_MAGIC;
 
-#if 1 /* embed RDMA-write-to buffer occupancy information */
+#if 1   /* embed RDMA-write-to buffer occupancy information */
     dprintf("lmt_initiate_lmt,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
     /* embed RDMA-write-to buffer occupancy information */
     s_cookie_buf->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail;
@@ -102,19 +112,23 @@ int MPID_nem_dcfa_lmt_initiate_lmt(struct MPIDI_VC *vc, union MPIDI_CH3_Pkt *rts
     struct ibv_mr *mr = ibcom_reg_mr_fetch(write_from_buf, data_sz);
     MPIU_ERR_CHKANDJUMP(!mr, mpi_errno, MPI_ERR_OTHER, "**ibcom_reg_mr_fetch");
 #ifdef DCFA
-    s_cookie_buf->addr = (void*)mr->host_addr; 
+    s_cookie_buf->addr = (void *) mr->host_addr;
     dprintf("lmt_initiate_lmt,s_cookie_buf->addr=%p\n", s_cookie_buf->addr);
 #endif
     s_cookie_buf->rkey = mr->rkey;
-    dprintf("lmt_initiate_lmt,tail=%02x,mem-tail=%p,%02x,sz=%ld,raddr=%p,rkey=%08x\n", s_cookie_buf->tail, write_from_buf + data_sz - sizeof(uint8_t), *((uint8_t*)(write_from_buf + data_sz - sizeof(uint8_t))), data_sz, s_cookie_buf->addr, s_cookie_buf->rkey);
+    dprintf("lmt_initiate_lmt,tail=%02x,mem-tail=%p,%02x,sz=%ld,raddr=%p,rkey=%08x\n",
+            s_cookie_buf->tail, write_from_buf + data_sz - sizeof(uint8_t),
+            *((uint8_t *) (write_from_buf + data_sz - sizeof(uint8_t))), data_sz,
+            s_cookie_buf->addr, s_cookie_buf->rkey);
     /* send cookie. rts_pkt as the MPI-header, s_cookie_buf as the payload */
-    MPID_nem_lmt_send_RTS(vc, (MPID_nem_pkt_lmt_rts_t*)rts_pkt, s_cookie_buf, sizeof(MPID_nem_dcfa_lmt_cookie_t));
+    MPID_nem_lmt_send_RTS(vc, (MPID_nem_pkt_lmt_rts_t *) rts_pkt, s_cookie_buf,
+                          sizeof(MPID_nem_dcfa_lmt_cookie_t));
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_INITIATE_LMT);
     return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+  fn_fail:
+    goto fn_exit;
 }
 
 /* essential lrecv part extracted for dequeueing and issue from sendq */
@@ -122,7 +136,8 @@ int MPID_nem_dcfa_lmt_initiate_lmt(struct MPIDI_VC *vc, union MPIDI_CH3_Pkt *rts
 #define FUNCNAME MPID_nem_dcfa_lmt_start_recv_core
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_lmt_start_recv_core(struct MPID_Request *req, void* raddr, uint32_t rkey, void* write_to_buf) 
+int MPID_nem_dcfa_lmt_start_recv_core(struct MPID_Request *req, void *raddr, uint32_t rkey,
+                                      void *write_to_buf)
 {
     int mpi_errno = MPI_SUCCESS;
     int ibcom_errno;
@@ -132,19 +147,25 @@ int MPID_nem_dcfa_lmt_start_recv_core(struct MPID_Request *req, void* raddr, uin
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_LMT_START_RECV_CORE);
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_LMT_START_RECV_CORE);
 
-    ibcom_errno = ibcom_lrecv(vc_dcfa->sc->fd, (uint64_t)req, raddr, req->ch.lmt_data_sz, rkey, write_to_buf);
+    ibcom_errno =
+        ibcom_lrecv(vc_dcfa->sc->fd, (uint64_t) req, raddr, req->ch.lmt_data_sz, rkey,
+                    write_to_buf);
     MPID_nem_dcfa_ncqe += 1;
     //dprintf("start_recv,ncqe=%d\n", MPID_nem_dcfa_ncqe);
-    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_lrecv");    
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_lrecv");
     dprintf("lmt_start_recv_core,MPID_nem_dcfa_ncqe=%d\n", MPID_nem_dcfa_ncqe);
-    dprintf("lmt_start_recv_core,req=%p,sz=%ld,write_to_buf=%p,lmt_pack_buf=%p,user_buf=%p,raddr=%p,rkey=%08x,tail=%p=%02x\n", req, req->ch.lmt_data_sz, write_to_buf, REQ_FIELD(req, lmt_pack_buf), req->dev.user_buf, raddr, rkey, write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t), *((uint8_t*)(write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t))));
-    
+    dprintf
+        ("lmt_start_recv_core,req=%p,sz=%ld,write_to_buf=%p,lmt_pack_buf=%p,user_buf=%p,raddr=%p,rkey=%08x,tail=%p=%02x\n",
+         req, req->ch.lmt_data_sz, write_to_buf, REQ_FIELD(req, lmt_pack_buf), req->dev.user_buf,
+         raddr, rkey, write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t),
+         *((uint8_t *) (write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t))));
+
 #ifdef LMT_GET_CQE
-    MPID_nem_dcfa_ncqe_to_drain += 1; /* use CQE instead of polling */
+    MPID_nem_dcfa_ncqe_to_drain += 1;   /* use CQE instead of polling */
 #else
     /* drain_scq and dcfa_poll is not ordered, so both can decrement ref_count */
     MPIR_Request_add_ref(req);
-    
+
     /* register to poll list in dcfa_poll() */
     /* don't use req->dev.next because it causes unknown problem */
     MPID_nem_dcfa_lmtq_enqueue(&MPID_nem_dcfa_lmtq, req);
@@ -153,28 +174,28 @@ int MPID_nem_dcfa_lmt_start_recv_core(struct MPID_Request *req, void* raddr, uin
     //dprintf("start_recv_core,cur_tail=%02x,lmt_receiver_tail=%02x\n", *tailmagic, REQ_FIELD(req, lmt_receiver_tail));
 #endif
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_START_RECV_CORE);
     return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+  fn_fail:
+    goto fn_exit;
 }
 
 /* Get protocol: (1) sender sends rts to receiver (2) receiver RDMA-reads (here)
-   (3) receiver polls on end-flag (4) receiver sends done to sender 
+   (3) receiver polls on end-flag (4) receiver sends done to sender
    caller: (in mpid_nem_lmt.c)
 */
 #undef FUNCNAME
 #define FUNCNAME MPID_nem_dcfa_lmt_start_recv
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_lmt_start_recv(struct MPIDI_VC *vc, struct MPID_Request *req, MPID_IOV s_cookie) 
+int MPID_nem_dcfa_lmt_start_recv(struct MPIDI_VC *vc, struct MPID_Request *req, MPID_IOV s_cookie)
 {
     int mpi_errno = MPI_SUCCESS;
     int ibcom_errno;
     int dt_contig;
-    MPIDI_msg_sz_t data_sz; 
-    MPID_Datatype * dt_ptr;
+    MPIDI_msg_sz_t data_sz;
+    MPID_Datatype *dt_ptr;
     MPI_Aint dt_true_lb;
     MPIDI_CH3I_VC *vc_ch = VC_CH(vc);
     MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
@@ -186,143 +207,177 @@ int MPID_nem_dcfa_lmt_start_recv(struct MPIDI_VC *vc, struct MPID_Request *req,
 
     /* obtain dt_true_lb */
     /* see MPIDI_Datatype_get_info(in, in, out, out, out, out) (in src/mpid/ch3/include/mpidimpl.h) */
-    MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
+    MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr,
+                            dt_true_lb);
 
-    MPID_nem_dcfa_lmt_cookie_t* s_cookie_buf = s_cookie.iov_base;
+    MPID_nem_dcfa_lmt_cookie_t *s_cookie_buf = s_cookie.iov_base;
 
     /* stash vc for dcfa_poll */
     req->ch.vc = vc;
 
-    void* write_to_buf;
-    if(dt_contig) {
-        write_to_buf = (void*)((char *)req->dev.user_buf /*+ REQ_FIELD(req, lmt_dt_true_lb)*/); 
-    } else {
+    void *write_to_buf;
+    if (dt_contig) {
+        write_to_buf = (void *) ((char *) req->dev.user_buf /*+ REQ_FIELD(req, lmt_dt_true_lb) */);
+    }
+    else {
         //REQ_FIELD(req, lmt_pack_buf) = MPIU_Malloc((size_t)req->ch.lmt_data_sz);
-        REQ_FIELD(req, lmt_pack_buf) = MPID_nem_dcfa_stmalloc((size_t)req->ch.lmt_data_sz);
-        MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER, "**outofmemory");
+        REQ_FIELD(req, lmt_pack_buf) = MPID_nem_dcfa_stmalloc((size_t) req->ch.lmt_data_sz);
+        MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER,
+                            "**outofmemory");
         write_to_buf = REQ_FIELD(req, lmt_pack_buf);
     }
 
 #ifdef LMT_GET_CQE
 #else
     /* unmark magic */
-    *((uint8_t*)(write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t))) = ~s_cookie_buf->tail; /* size in cookie was not set */
+    *((uint8_t *) (write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t))) = ~s_cookie_buf->tail;        /* size in cookie was not set */
 #endif
-    dprintf("lmt_start_recv,dt_contig=%d,write_to_buf=%p,req->dev.user_buf=%p,REQ_FIELD(req, lmt_pack_buf)=%p,marked-tail=%02x,unmarked-tail=%02x\n", dt_contig, write_to_buf, req->dev.user_buf, REQ_FIELD(req, lmt_pack_buf), s_cookie_buf->tail, *((uint8_t*)(write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t))));
+    dprintf
+        ("lmt_start_recv,dt_contig=%d,write_to_buf=%p,req->dev.user_buf=%p,REQ_FIELD(req, lmt_pack_buf)=%p,marked-tail=%02x,unmarked-tail=%02x\n",
+         dt_contig, write_to_buf, req->dev.user_buf, REQ_FIELD(req, lmt_pack_buf),
+         s_cookie_buf->tail, *((uint8_t *) (write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t))));
 
     /* stash tail for poll because do_cts in mpid_nem_lmt.c free s_cookie_buf just after this function */
     REQ_FIELD(req, lmt_tail) = s_cookie_buf->tail;
-    dprintf("lmt_start_recv,mem-tail=%p,%02x\n", write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t), *((uint32_t*)(write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t))));
+    dprintf("lmt_start_recv,mem-tail=%p,%02x\n",
+            write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t),
+            *((uint32_t *) (write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t))));
 
     //dprintf("lmt_start_recv,sendq_empty=%d,ncom=%d,ncqe=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY, MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY);
-        
+
     /* try to issue RDMA-read command */
-    int slack = 1; /* slack for control packet bringing sequence number */
-        if(MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq) && vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY - slack && MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY - slack) {
-            mpi_errno = MPID_nem_dcfa_lmt_start_recv_core(req, s_cookie_buf->addr, s_cookie_buf->rkey, write_to_buf); /* fast path not storing raddr and rkey */
-            if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-        } else {
-            /* enqueue command into send_queue */
-            dprintf("lmt_start_recv, enqueuing,sendq_empty=%d,ncom=%d,ncqe=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY, MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY);
-            
-            /* make raddr, (sz is in rreq->ch.lmt_data_sz), rkey, (user_buf is in req->dev.user_buf) survive enqueue, free cookie, dequeue */
-            REQ_FIELD(req, lmt_raddr) = s_cookie_buf->addr;
-            REQ_FIELD(req, lmt_rkey) = s_cookie_buf->rkey;
-            REQ_FIELD(req, lmt_write_to_buf) = write_to_buf;
-            
-            MPID_nem_dcfa_sendq_enqueue(&vc_dcfa->sendq, req);
+    int slack = 1;              /* slack for control packet bringing sequence number */
+    if (MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq) &&
+        vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY - slack &&
+        MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY - slack) {
+        mpi_errno = MPID_nem_dcfa_lmt_start_recv_core(req, s_cookie_buf->addr, s_cookie_buf->rkey, write_to_buf);       /* fast path not storing raddr and rkey */
+        if (mpi_errno) {
+            MPIU_ERR_POP(mpi_errno);
         }
+    }
+    else {
+        /* enqueue command into send_queue */
+        dprintf("lmt_start_recv, enqueuing,sendq_empty=%d,ncom=%d,ncqe=%d\n",
+                MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq),
+                vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY,
+                MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY);
+
+        /* make raddr, (sz is in rreq->ch.lmt_data_sz), rkey, (user_buf is in req->dev.user_buf) survive enqueue, free cookie, dequeue */
+        REQ_FIELD(req, lmt_raddr) = s_cookie_buf->addr;
+        REQ_FIELD(req, lmt_rkey) = s_cookie_buf->rkey;
+        REQ_FIELD(req, lmt_write_to_buf) = write_to_buf;
+
+        MPID_nem_dcfa_sendq_enqueue(&vc_dcfa->sendq, req);
+    }
 
     /* extract embeded RDMA-write-to buffer occupancy information */
-        dprintf("lmt_start_recv,old lsr_seq_num=%d,s_cookie_buf->seq_num_tail=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail, s_cookie_buf->seq_num_tail);
-        vc_dcfa->ibcom->lsr_seq_num_tail = DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, s_cookie_buf->seq_num_tail);
-        //dprintf("lmt_start_recv,new lsr_seq_num=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail);
+    dprintf("lmt_start_recv,old lsr_seq_num=%d,s_cookie_buf->seq_num_tail=%d\n",
+            vc_dcfa->ibcom->lsr_seq_num_tail, s_cookie_buf->seq_num_tail);
+    vc_dcfa->ibcom->lsr_seq_num_tail =
+        DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, s_cookie_buf->seq_num_tail);
+    //dprintf("lmt_start_recv,new lsr_seq_num=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail);
 
 #ifndef DISABLE_VAR_OCC_NOTIFY_RATE
-        /* change remote notification policy of RDMA-write-to buf */
-        //dprintf("lmt_start_recv,reply_seq_num,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
-        MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa,& vc_dcfa->ibcom->lsr_seq_num_tail);
-        //dprintf("lmt_start_recv,reply_seq_num,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+    /* change remote notification policy of RDMA-write-to buf */
+    //dprintf("lmt_start_recv,reply_seq_num,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
+    MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa,
+                                                            &vc_dcfa->ibcom->lsr_seq_num_tail);
+    //dprintf("lmt_start_recv,reply_seq_num,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
 #endif
-        //dprintf("lmt_start_recv,reply_seq_num,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
-        /* try to send from sendq because at least one RDMA-write-to buffer has been released */
-        //dprintf("lmt_start_recv,reply_seq_num,send_progress\n");
-        if(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) {
-            dprintf("lmt_start_recv,ncom=%d,ncqe=%d,diff=%d\n", vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY, MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG);
-        }
-        if(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq) && MPID_nem_dcfa_sendq_ready_to_send_head(vc_dcfa)) {
-            dprintf("lmt_start_recv,send_progress\n");fflush(stdout);
-            MPID_nem_dcfa_send_progress(vc_dcfa); 
-        }
+    //dprintf("lmt_start_recv,reply_seq_num,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+    /* try to send from sendq because at least one RDMA-write-to buffer has been released */
+    //dprintf("lmt_start_recv,reply_seq_num,send_progress\n");
+    if (!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) {
+        dprintf("lmt_start_recv,ncom=%d,ncqe=%d,diff=%d\n",
+                vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY,
+                MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY,
+                MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num,
+                                     vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG);
+    }
+    if (!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq) &&
+        MPID_nem_dcfa_sendq_ready_to_send_head(vc_dcfa)) {
+        dprintf("lmt_start_recv,send_progress\n");
+        fflush(stdout);
+        MPID_nem_dcfa_send_progress(vc_dcfa);
+    }
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_START_RECV);
     return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+  fn_fail:
+    goto fn_exit;
 }
 
-/* fall-back to lmt-get if end-flag of send-buf has the same value as the end-flag of recv-buf */ 
+/* fall-back to lmt-get if end-flag of send-buf has the same value as the end-flag of recv-buf */
 #undef FUNCNAME
 #define FUNCNAME MPID_nem_dcfa_lmt_switch_send
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_lmt_switch_send(struct MPIDI_VC *vc, struct MPID_Request *req) {
+int MPID_nem_dcfa_lmt_switch_send(struct MPIDI_VC *vc, struct MPID_Request *req)
+{
     int mpi_errno = MPI_SUCCESS;
     int dt_contig;
-    MPIDI_msg_sz_t data_sz; 
-    MPID_Datatype * dt_ptr;
+    MPIDI_msg_sz_t data_sz;
+    MPID_Datatype *dt_ptr;
     MPI_Aint dt_true_lb;
     MPID_IOV r_cookie = req->ch.lmt_tmp_cookie;
-    MPID_nem_dcfa_lmt_cookie_t* r_cookie_buf = r_cookie.iov_base;
+    MPID_nem_dcfa_lmt_cookie_t *r_cookie_buf = r_cookie.iov_base;
 
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_LMT_SWITCH_SEND);
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_LMT_SWITCH_SEND);
 
-    MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
-    
-    void* write_from_buf; 
-    if(dt_contig) {
+    MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr,
+                            dt_true_lb);
+
+    void *write_from_buf;
+    if (dt_contig) {
         write_from_buf = req->dev.user_buf;
-    } else {
+    }
+    else {
         /* see MPIDI_CH3_EagerNoncontigSend (in ch3u_eager.c) */
-        req->dev.segment_ptr = MPID_Segment_alloc( );
-        MPIU_ERR_CHKANDJUMP((req->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**outofmemory");
-        
-        MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype, req->dev.segment_ptr, 0);
+        req->dev.segment_ptr = MPID_Segment_alloc();
+        MPIU_ERR_CHKANDJUMP((req->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER,
+                            "**outofmemory");
+
+        MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype,
+                          req->dev.segment_ptr, 0);
         req->dev.segment_first = 0;
         req->dev.segment_size = data_sz;
 
         MPIDI_msg_sz_t last;
-        last = req->dev.segment_size; /* segment_size is byte offset */
+        last = req->dev.segment_size;   /* segment_size is byte offset */
         MPIU_Assert(last > 0);
 
         REQ_FIELD(req, lmt_pack_buf) = MPIU_Malloc(data_sz);
-        MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER, "**outofmemory");
+        MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER,
+                            "**outofmemory");
 
-        MPID_Segment_pack(req->dev.segment_ptr, req->dev.segment_first, &last, (char *)(REQ_FIELD(req, lmt_pack_buf)));
+        MPID_Segment_pack(req->dev.segment_ptr, req->dev.segment_first, &last,
+                          (char *) (REQ_FIELD(req, lmt_pack_buf)));
         MPIU_Assert(last == req->dev.segment_size);
 
         write_from_buf = REQ_FIELD(req, lmt_pack_buf);
     }
 
     //assert(dt_true_lb == 0);
-    uint8_t* tailp = (uint8_t*)(write_from_buf /*+ dt_true_lb*/ + data_sz - sizeof(uint8_t));
+    uint8_t *tailp = (uint8_t *) (write_from_buf /*+ dt_true_lb */  + data_sz - sizeof(uint8_t));
 #if 0
     *is_end_flag_same = (r_cookie_buf->tail == *tailp) ? 1 : 0;
 #else
     REQ_FIELD(req, lmt_receiver_tail) = r_cookie_buf->tail;
     REQ_FIELD(req, lmt_sender_tail) = *tailp;
-    dprintf("lmt_switch_send,tail on sender=%02x,tail onreceiver=%02x,req=%p\n", *tailp, r_cookie_buf->tail, req);
-    uint8_t* tail_wordp = (uint8_t*)(write_from_buf + data_sz - sizeof(uint32_t) * 2);
-    dprintf("lmt_switch_send,tail on sender=%d\n", *tail_wordp);fflush(stdout);
+    dprintf("lmt_switch_send,tail on sender=%02x,tail onreceiver=%02x,req=%p\n", *tailp,
+            r_cookie_buf->tail, req);
+    uint8_t *tail_wordp = (uint8_t *) (write_from_buf + data_sz - sizeof(uint32_t) * 2);
+    dprintf("lmt_switch_send,tail on sender=%d\n", *tail_wordp);
+    fflush(stdout);
 #endif
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_SWITCH_SEND);
     return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+  fn_fail:
+    goto fn_exit;
 }
 
 /* when cookie is received in the middle of the lmt */
@@ -340,11 +395,11 @@ int MPID_nem_dcfa_lmt_handle_cookie(struct MPIDI_VC *vc, struct MPID_Request *re
 
     /* Nothing to do */
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_HANDLE_COOKIE);
     return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+  fn_fail:
+    goto fn_exit;
 }
 
 /* when sender receives DONE from receiver */
@@ -359,59 +414,63 @@ int MPID_nem_dcfa_lmt_done_send(struct MPIDI_VC *vc, struct MPID_Request *req)
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_LMT_DONE_SEND);
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_LMT_DONE_SEND);
 
-    dprintf("lmt_done_send,enter,%d<-%d,req=%p,REQ_FIELD(req, lmt_pack_buf)=%p\n", MPID_nem_dcfa_myrank, vc->pg_rank, req, REQ_FIELD(req, lmt_pack_buf));
+    dprintf("lmt_done_send,enter,%d<-%d,req=%p,REQ_FIELD(req, lmt_pack_buf)=%p\n",
+            MPID_nem_dcfa_myrank, vc->pg_rank, req, REQ_FIELD(req, lmt_pack_buf));
 
 
     /* free memory area for cookie */
-    if(!req->ch.s_cookie) {
+    if (!req->ch.s_cookie) {
         dprintf("lmt_done_send,enter,req->ch.s_cookie is zero");
     }
     MPIU_Free(req->ch.s_cookie);
     //dprintf("lmt_done_send,free cookie,%p\n", req->ch.s_cookie);
-    
+
 #ifdef LMT_PRED
-        if(MPID_rndv_pred_nlearn == 1 || MPID_rndv_pred_npractice > 1) {
-            if(req->dev.rndv_pred_decision == MPIDI_CH3_RNDV_SEND_RTS) {
-                MPID_rndv_pred_hit++;
-                MPID_rndv_pred_count++;
-            } 
+    if (MPID_rndv_pred_nlearn == 1 || MPID_rndv_pred_npractice > 1) {
+        if (req->dev.rndv_pred_decision == MPIDI_CH3_RNDV_SEND_RTS) {
+            MPID_rndv_pred_hit++;
+            MPID_rndv_pred_count++;
+        }
+    }
+    if (MPID_rndv_pred_nlearn > 1) {
+        MPID_hist = (MPID_hist << 1) | 1;
+        pht_update((uint64_t) buf, MPID_hist, 1);
+        MPID_rndv_pred_nlearn++;
+        if (MPID_rndv_pred_nlearn > MPID_RNDV_PRED_MAXLEARN) {
+            MPID_rndv_pred_nlearn = 0;
+            MPID_rndv_pred_npractice = 1;
         }
-        if(MPID_rndv_pred_nlearn > 1) {
-            MPID_hist = (MPID_hist << 1) | 1;
-            pht_update((uint64_t)buf, MPID_hist, 1);
-            MPID_rndv_pred_nlearn++;
-            if(MPID_rndv_pred_nlearn > MPID_RNDV_PRED_MAXLEARN) {
-                MPID_rndv_pred_nlearn = 0;
-                MPID_rndv_pred_npractice = 1;
-            }
-        } 
-        if(MPID_rndv_pred_nlearn == 1) { MPID_rndv_pred_nlearn++; }
+    }
+    if (MPID_rndv_pred_nlearn == 1) {
+        MPID_rndv_pred_nlearn++;
+    }
 #endif
 
 
     /* free temporal buffer for eager-send non-contiguous data.
-       MPIDI_CH3U_Recvq_FDU_or_AEP (in mpid_isend.c) sets req->dev.datatype */
+     * MPIDI_CH3U_Recvq_FDU_or_AEP (in mpid_isend.c) sets req->dev.datatype */
     int is_contig;
     MPID_Datatype_is_contig(req->dev.datatype, &is_contig);
-    if(!is_contig && REQ_FIELD(req, lmt_pack_buf)) {
+    if (!is_contig && REQ_FIELD(req, lmt_pack_buf)) {
         dprintf("lmt_done_send,lmt-get,non-contiguous,free lmt_pack_buf\n");
-#if 1 /* debug, enable again later */
-        MPIU_Free(REQ_FIELD(req, lmt_pack_buf));       
+#if 1   /* debug, enable again later */
+        MPIU_Free(REQ_FIELD(req, lmt_pack_buf));
 #endif
     }
 
     /* mark completion on sreq */
-    MPIU_ERR_CHKANDJUMP(req->dev.OnDataAvail, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_lmt_done_send");
+    MPIU_ERR_CHKANDJUMP(req->dev.OnDataAvail, mpi_errno, MPI_ERR_OTHER,
+                        "**MPID_nem_dcfa_lmt_done_send");
     dprintf("lmt_done_send,1,req=%p,pcc=%d\n", req, MPIDI_CH3I_progress_completion_count.v);
     MPIDI_CH3U_Request_complete(req);
     dprintf("lmt_done_send,2,req=%p,pcc=%d\n", req, MPIDI_CH3I_progress_completion_count.v);
     //dprintf("lmt_done_send, mark completion on sreq\n");
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_DONE_SEND);
     return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+  fn_fail:
+    goto fn_exit;
 }
 
 /* lmt-put (1) sender sends done when finding cqe of put (2) packet-handler of DONE on receiver (3) here */
@@ -432,41 +491,43 @@ int MPID_nem_dcfa_lmt_done_recv(struct MPIDI_VC *vc, struct MPID_Request *rreq)
 
     int is_contig;
     MPID_Datatype_is_contig(rreq->dev.datatype, &is_contig);
-    if(!is_contig) {
+    if (!is_contig) {
         dprintf("lmt_done_recv,copying noncontiguous data to user buffer\n");
-        
+
         /* see MPIDI_CH3U_Request_unpack_uebuf (in /src/mpid/ch3/src/ch3u_request.c) */
         /* or MPIDI_CH3U_Receive_data_found (in src/mpid/ch3/src/ch3u_handle_recv_pkt.c) */
         MPIDI_msg_sz_t unpack_sz = rreq->ch.lmt_data_sz;
         MPID_Segment seg;
         MPI_Aint last;
-        
+
         MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, &seg, 0);
         last = unpack_sz;
         MPID_Segment_unpack(&seg, 0, &last, REQ_FIELD(rreq, lmt_pack_buf));
         if (last != unpack_sz) {
             /* --BEGIN ERROR HANDLING-- */
-            /* received data was not entirely consumed by unpack() 
-               because too few bytes remained to fill the next basic
-               datatype */
+            /* received data was not entirely consumed by unpack()
+             * because too few bytes remained to fill the next basic
+             * datatype */
             MPIR_STATUS_SET_COUNT(rreq->status, last);
-            rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**MPID_nem_dcfa_lmt_done_recv", 0);
+            rreq->status.MPI_ERROR =
+                MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__,
+                                     MPI_ERR_TYPE, "**MPID_nem_dcfa_lmt_done_recv", 0);
             /* --END ERROR HANDLING-- */
         }
-        
+
         //MPIU_Free(REQ_FIELD(rreq, lmt_pack_buf));
-        MPID_nem_dcfa_stfree(REQ_FIELD(rreq, lmt_pack_buf), (size_t)rreq->ch.lmt_data_sz);
+        MPID_nem_dcfa_stfree(REQ_FIELD(rreq, lmt_pack_buf), (size_t) rreq->ch.lmt_data_sz);
     }
 
     dprintf("lmt_done_recv,1,req=%p,pcc=%d\n", rreq, MPIDI_CH3I_progress_completion_count.v);
     MPIDI_CH3U_Request_complete(rreq);
     dprintf("lmt_done_recv,2,pcc=%d\n", MPIDI_CH3I_progress_completion_count.v);
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_DONE_RECV);
     return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+  fn_fail:
+    goto fn_exit;
 }
 
 #undef FUNCNAME
@@ -483,10 +544,9 @@ int MPID_nem_dcfa_lmt_vc_terminated(struct MPIDI_VC *vc)
 
     /* Nothing to do */
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_VC_TERMINATED);
     return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+  fn_fail:
+    goto fn_exit;
 }
-
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
index 2f88ec8..43a43b3 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
@@ -8,7 +8,7 @@
 #include "dcfa_impl.h"
 
 //#define DEBUG_DCFA_POLL
-#ifdef dprintf /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
+#ifdef dprintf  /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
 #undef dprintf
 #endif
 #ifdef DEBUG_DCFA_POLL
@@ -34,7 +34,7 @@ static int entered_drain_scq = 0;
 #endif
 #if 1
 #define MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS \
-    if(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq) && MPID_nem_dcfa_sendq_ready_to_send_head(vc_dcfa)) { \
+    if (!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq) && MPID_nem_dcfa_sendq_ready_to_send_head(vc_dcfa)) { \
     MPID_nem_dcfa_send_progress(vc_dcfa); \
 }
 #else
@@ -45,7 +45,8 @@ static int entered_drain_scq = 0;
 #define FUNCNAME MPID_nem_dcfa_drain_scq
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_drain_scq(int dont_call_progress) {
+int MPID_nem_dcfa_drain_scq(int dont_call_progress)
+{
 
     int mpi_errno = MPI_SUCCESS;
     int result;
@@ -56,84 +57,92 @@ int MPID_nem_dcfa_drain_scq(int dont_call_progress) {
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ);
 
     /* prevent a call path drain_scq -> send_progress -> drain_scq */
-    if(entered_drain_scq) { printf("drain_scq,re-enter\n"); goto fn_exit; }
+    if (entered_drain_scq) {
+        printf("drain_scq,re-enter\n");
+        goto fn_exit;
+    }
     entered_drain_scq = 1;
 
-#if 0 /*def DCFA*/
+#if 0   /*def DCFA */
     result = ibv_poll_cq(rc_shared_scq, 1, &cqe[0]);
 #else
-    result = ibv_poll_cq(rc_shared_scq, /*3*/IBCOM_MAX_CQ_HEIGHT_DRAIN, &cqe[0]);
+    result = ibv_poll_cq(rc_shared_scq, /*3 */ IBCOM_MAX_CQ_HEIGHT_DRAIN, &cqe[0]);
 #endif
 
     MPIU_ERR_CHKANDJUMP(result < 0, mpi_errno, MPI_ERR_OTHER, "**netmod,dcfa,ibv_poll_cq");
-    
-    if(result > 0) {
-        dprintf("poll,scq,result=%d\n", result); 
+
+    if (result > 0) {
+        dprintf("poll,scq,result=%d\n", result);
     }
-    for(i = 0; i < result; i++)  {
+    for (i = 0; i < result; i++) {
         dprintf("drain_scq,i=%d\n", i);
-        
+
         MPID_Request *req;
         MPID_Request_kind_t kind;
         int req_type, msg_type;
-        
+
         /* Obtain sreq */
-        req = (MPID_Request*)cqe[i].wr_id;
+        req = (MPID_Request *) cqe[i].wr_id;
 
         kind = req->kind;
         req_type = MPIDI_Request_get_type(req);
         msg_type = MPIDI_Request_get_msg_type(req);
 
-        dprintf("drain_scq,req=%p,req->ref_count=%d,cc_ptr=%d\n", req, req->ref_count, *req->cc_ptr);
-        if(req->ref_count <= 0) {
+        dprintf("drain_scq,req=%p,req->ref_count=%d,cc_ptr=%d\n", req, req->ref_count,
+                *req->cc_ptr);
+        if (req->ref_count <= 0) {
             MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(req->ch.vc);
-            printf("%d\n", *(int*)0); 
+            printf("%d\n", *(int *) 0);
         }
 
 #ifdef DCFA
-        if(cqe[i].status != IBV_WC_SUCCESS) { 
-            dprintf("drain_scq,kind=%d,req_type=%d,msg_type=%d,cqe.status=%08x\n", kind, req_type, msg_type, cqe[i].status); 
+        if (cqe[i].status != IBV_WC_SUCCESS) {
+            dprintf("drain_scq,kind=%d,req_type=%d,msg_type=%d,cqe.status=%08x\n", kind, req_type,
+                    msg_type, cqe[i].status);
         }
 #else
-        if(cqe[i].status != IBV_WC_SUCCESS) { 
-            dprintf("drain_scq,kind=%d,req_type=%d,msg_type=%d,comm=%p,cqe.status=%08x,%s\n", kind, req_type, msg_type, req->comm, cqe[i].status, ibv_wc_status_str(cqe[i].status)); 
+        if (cqe[i].status != IBV_WC_SUCCESS) {
+            dprintf("drain_scq,kind=%d,req_type=%d,msg_type=%d,comm=%p,cqe.status=%08x,%s\n", kind,
+                    req_type, msg_type, req->comm, cqe[i].status, ibv_wc_status_str(cqe[i].status));
         }
 #endif
-        MPIU_ERR_CHKANDJUMP(cqe[i].status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
-
-        /* 
-           packets generated by MPIDI_CH3_iStartMsgv has req_type of RECV
-           lmt_initiate_lmt, lmt_put_cts_to_sender, lmt_put_rts_to_receiver, lmt_send_put_done
-        */
-        if(
-           //req_type == MPIDI_REQUEST_TYPE_SEND 
-           (req_type == MPIDI_REQUEST_TYPE_SEND ||
-            req_type == MPIDI_REQUEST_TYPE_RECV ||
-            req_type == MPIDI_REQUEST_TYPE_SSEND)
-           && msg_type == MPIDI_REQUEST_EAGER_MSG) {
-            dprintf("drain_scq,send/recv,eager,req_type=%d,,comm=%p,opcode=%d\n", req_type, req->comm, cqe[i].opcode);
-            
+        MPIU_ERR_CHKANDJUMP(cqe[i].status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER,
+                            "**MPID_nem_dcfa_drain_scq");
+
+        /*
+         * packets generated by MPIDI_CH3_iStartMsgv has req_type of RECV
+         * lmt_initiate_lmt, lmt_put_cts_to_sender, lmt_put_rts_to_receiver, lmt_send_put_done
+         */
+        if (
+               //req_type == MPIDI_REQUEST_TYPE_SEND
+               (req_type == MPIDI_REQUEST_TYPE_SEND ||
+                req_type == MPIDI_REQUEST_TYPE_RECV || req_type == MPIDI_REQUEST_TYPE_SSEND)
+               && msg_type == MPIDI_REQUEST_EAGER_MSG) {
+            dprintf("drain_scq,send/recv,eager,req_type=%d,,comm=%p,opcode=%d\n", req_type,
+                    req->comm, cqe[i].opcode);
+
             MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(req->ch.vc);
-            dprintf("drain_scq,MPIDI_REQUEST_EAGER_MSG,%d->%d,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_myrank, req->ch.vc->pg_rank, MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail)); /* moved before MPID_Request_release because this references req->ch.vc */
+            dprintf("drain_scq,MPIDI_REQUEST_EAGER_MSG,%d->%d,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_myrank, req->ch.vc->pg_rank, MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));      /* moved before MPID_Request_release because this references req->ch.vc */
 
             /* free temporal buffer for eager-send non-contiguous data.
-               MPIDI_Request_create_sreq (in mpid_isend.c) sets req->dev.datatype 
-               control message has a req_type of MPIDI_REQUEST_TYPE_RECV and 
-               msg_type of MPIDI_REQUEST_EAGER_MSG because
-               control message send follows 
-               MPIDI_CH3_iStartMsg/v-->MPID_nem_dcfa_iStartContigMsg-->MPID_nem_dcfa_iSendContig
-               and MPID_nem_dcfa_iSendContig set req->dev.state to zero.
-               see MPID_Request_create (in src/mpid/ch3/src/ch3u_request.c) 
-               eager-short message has req->comm of zero 
-            */
-            if(req_type == MPIDI_REQUEST_TYPE_SEND && req->comm) {
+             * MPIDI_Request_create_sreq (in mpid_isend.c) sets req->dev.datatype
+             * control message has a req_type of MPIDI_REQUEST_TYPE_RECV and
+             * msg_type of MPIDI_REQUEST_EAGER_MSG because
+             * control message send follows
+             * MPIDI_CH3_iStartMsg/v-->MPID_nem_dcfa_iStartContigMsg-->MPID_nem_dcfa_iSendContig
+             * and MPID_nem_dcfa_iSendContig set req->dev.state to zero.
+             * see MPID_Request_create (in src/mpid/ch3/src/ch3u_request.c)
+             * eager-short message has req->comm of zero
+             */
+            if (req_type == MPIDI_REQUEST_TYPE_SEND && req->comm) {
                 /* exclude control messages by requiring MPIDI_REQUEST_TYPE_SEND
-                   exclude eager-short by requiring req->comm != 0 */
+                 * exclude eager-short by requiring req->comm != 0 */
                 int is_contig;
                 MPID_Datatype_is_contig(req->dev.datatype, &is_contig);
-                if(!is_contig && REQ_FIELD(req, lmt_pack_buf)) {
-                    dprintf("drain_scq,eager-send,non-contiguous,free lmt_pack_buf=%p\n", REQ_FIELD(req, lmt_pack_buf));
-                    MPIU_Free(REQ_FIELD(req, lmt_pack_buf));       
+                if (!is_contig && REQ_FIELD(req, lmt_pack_buf)) {
+                    dprintf("drain_scq,eager-send,non-contiguous,free lmt_pack_buf=%p\n",
+                            REQ_FIELD(req, lmt_pack_buf));
+                    MPIU_Free(REQ_FIELD(req, lmt_pack_buf));
                 }
             }
 
@@ -145,63 +154,77 @@ int MPID_nem_dcfa_drain_scq(int dont_call_progress) {
 
             /* ref_count is decremented in drain_scq and wait */
             if (*req->cc_ptr > 0) {
-                dprintf("drain_scq,MPID_nem_dcfa_ncqe_nces=%d,cc_ptr=%d,pending_sends=%d\n", MPID_nem_dcfa_ncqe_nces, *req->cc_ptr, VC_FIELD(req->ch.vc, pending_sends));
+                dprintf("drain_scq,MPID_nem_dcfa_ncqe_nces=%d,cc_ptr=%d,pending_sends=%d\n",
+                        MPID_nem_dcfa_ncqe_nces, *req->cc_ptr, VC_FIELD(req->ch.vc, pending_sends));
                 MPID_nem_dcfa_ncqe_nces -= 1;
 
-            int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
-            
-            (VC_FIELD(req->ch.vc, pending_sends)) -= 1;
-            
-            /* as in the template */
-            reqFn = req->dev.OnDataAvail;
-            if (!reqFn){
-                MPIDI_CH3U_Request_complete(req);
-                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
-                dprintf("drain_scq,complete,req=%p,pcc incremented to %d\n", req, MPIDI_CH3I_progress_completion_count.v);
-            } else {
-                dprintf("drain_scq,reqFn isn't zero\n");
-                MPIDI_VC_t *vc = req->ch.vc;
-                int complete = 0;
-                mpi_errno = reqFn(vc, req, &complete);
-                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-                /* not-completed case is not implemented */
-                MPIU_Assert(complete == TRUE);
+                int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);
+
+                (VC_FIELD(req->ch.vc, pending_sends)) -= 1;
+
+                /* as in the template */
+                reqFn = req->dev.OnDataAvail;
+                if (!reqFn) {
+                    MPIDI_CH3U_Request_complete(req);
+                    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
+                    dprintf("drain_scq,complete,req=%p,pcc incremented to %d\n", req,
+                            MPIDI_CH3I_progress_completion_count.v);
+                }
+                else {
+                    dprintf("drain_scq,reqFn isn't zero\n");
+                    MPIDI_VC_t *vc = req->ch.vc;
+                    int complete = 0;
+                    mpi_errno = reqFn(vc, req, &complete);
+                    if (mpi_errno)
+                        MPIU_ERR_POP(mpi_errno);
+                    /* not-completed case is not implemented */
+                    MPIU_Assert(complete == TRUE);
+                }
             }
-            } else {
-            MPID_Request_release(req); 
+            else {
+                MPID_Request_release(req);
             }
             /* try to send from sendq */
             //dprintf("dcfa_poll,SCQ,!lmt,send_progress\n");
-            if(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) {
-                dprintf("drain_scq,eager-send,ncom=%d,ncqe=%d,diff=%d\n", vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY, MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG);
-                
+            if (!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) {
+                dprintf("drain_scq,eager-send,ncom=%d,ncqe=%d,diff=%d\n",
+                        vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY,
+                        MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY,
+                        MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num,
+                                             vc_dcfa->ibcom->lsr_seq_num_tail) <
+                        IBCOM_RDMABUF_NSEG);
+
                 MPID_Request *sreq = MPID_nem_dcfa_sendq_head(vc_dcfa->sendq);
                 int msg_type = MPIDI_Request_get_msg_type(sreq);
-                
-                if(sreq->kind == MPID_REQUEST_SEND && msg_type == MPIDI_REQUEST_EAGER_MSG) {
+
+                if (sreq->kind == MPID_REQUEST_SEND && msg_type == MPIDI_REQUEST_EAGER_MSG) {
                     dprintf("drain_scq,eager-send,head is eager-send\n");
-                } else if(sreq->kind == MPID_REQUEST_RECV && msg_type == MPIDI_REQUEST_RNDV_MSG) {
+                }
+                else if (sreq->kind == MPID_REQUEST_RECV && msg_type == MPIDI_REQUEST_RNDV_MSG) {
                     dprintf("drain_scq,eager-send,head is lmt RDMA-read\n");
-                } else if(sreq->kind == MPID_REQUEST_SEND && msg_type == MPIDI_REQUEST_RNDV_MSG) {
+                }
+                else if (sreq->kind == MPID_REQUEST_SEND && msg_type == MPIDI_REQUEST_RNDV_MSG) {
                     dprintf("drain_scq,eager-send,head is lmt RDMA-write\n");
                 }
             }
             /*  call MPID_nem_dcfa_send_progress for all VCs in polling-set
-                instead of VC which releases CQ, command
-                when releasing them
-                because commands for VC-A are blocked by the command
-                for VC-B and waiting in the sendq
-            */
-                dprintf("drain_scq,eager-send,send_progress\n");
-                //MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET;
+             * instead of VC which releases CQ, command
+             * when releasing them
+             * because commands for VC-A are blocked by the command
+             * for VC-B and waiting in the sendq
+             */
+            dprintf("drain_scq,eager-send,send_progress\n");
+            //MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET;
 
             dprintf("drain_scq,eager-send,next\n");
 
-        } else if(req_type == MPIDI_REQUEST_TYPE_GET_RESP && msg_type == MPIDI_REQUEST_EAGER_MSG) {
-            dprintf("drain_scq,GET_RESP,eager,req_type=%d,,comm=%p,opcode=%d\n", req_type, req->comm, cqe[i].opcode);
-            
+        }
+        else if (req_type == MPIDI_REQUEST_TYPE_GET_RESP && msg_type == MPIDI_REQUEST_EAGER_MSG) {
+            dprintf("drain_scq,GET_RESP,eager,req_type=%d,,comm=%p,opcode=%d\n", req_type,
+                    req->comm, cqe[i].opcode);
+
             MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(req->ch.vc);
-            dprintf("drain_scq,MPIDI_REQUEST_EAGER_MSG,%d->%d,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_myrank, req->ch.vc->pg_rank, MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail)); /* moved before MPID_Request_release because this references req->ch.vc */
+            dprintf("drain_scq,MPIDI_REQUEST_EAGER_MSG,%d->%d,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_myrank, req->ch.vc->pg_rank, MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));      /* moved before MPID_Request_release because this references req->ch.vc */
 
             /* decrement the number of entries in IB command queue */
             vc_dcfa->ibcom->ncom -= 1;
@@ -210,81 +233,92 @@ int MPID_nem_dcfa_drain_scq(int dont_call_progress) {
             MPIU_Assert(req->ref_count == 1 || req->ref_count == 2);
 
             /* ref_count is decremented in drain_scq and wait */
-            dprintf("drain_scq,MPID_nem_dcfa_ncqe_nces=%d,cc_ptr=%d,pending_sends=%d\n", MPID_nem_dcfa_ncqe_nces, *req->cc_ptr, VC_FIELD(req->ch.vc, pending_sends));
+            dprintf("drain_scq,MPID_nem_dcfa_ncqe_nces=%d,cc_ptr=%d,pending_sends=%d\n",
+                    MPID_nem_dcfa_ncqe_nces, *req->cc_ptr, VC_FIELD(req->ch.vc, pending_sends));
             MPID_nem_dcfa_ncqe_nces -= 1;
 
-            int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
-            
+            int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);
+
             (VC_FIELD(req->ch.vc, pending_sends)) -= 1;
-            
+
             /* as in the template */
             reqFn = req->dev.OnDataAvail;
-            if (!reqFn){
+            if (!reqFn) {
                 MPIDI_CH3U_Request_complete(req);
                 MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
-                dprintf("drain_scq,complete,req=%p,pcc incremented to %d\n", req, MPIDI_CH3I_progress_completion_count.v);
-            } else {
+                dprintf("drain_scq,complete,req=%p,pcc incremented to %d\n", req,
+                        MPIDI_CH3I_progress_completion_count.v);
+            }
+            else {
                 dprintf("drain_scq,reqFn isn't zero\n");
                 dprintf("drain_scq,GET_RESP,before dev.OnDataAvail,ref_count=%d\n", req->ref_count);
                 MPIDI_VC_t *vc = req->ch.vc;
                 int complete = 0;
                 mpi_errno = reqFn(vc, req, &complete);
-                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                if (mpi_errno)
+                    MPIU_ERR_POP(mpi_errno);
                 /* not-completed case is not implemented */
                 MPIU_Assert(complete == TRUE);
             }
-                
+
             //MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET;
 
             dprintf("drain_scq,GET_RESP,next\n");
 
-        } else if(req_type == MPIDI_REQUEST_TYPE_RECV && msg_type == MPIDI_REQUEST_RNDV_MSG && cqe[i].opcode == IBV_WC_RDMA_READ) {
+        }
+        else if (req_type == MPIDI_REQUEST_TYPE_RECV && msg_type == MPIDI_REQUEST_RNDV_MSG &&
+                 cqe[i].opcode == IBV_WC_RDMA_READ) {
             /* lmt get */
-            /* the case for lmt-put-done or lmt-put where 
-               (1) sender finds end-flag won't change (2) sender sends RTS to receiver
-               (3) receiver gets (4) here 
-               is distinguished by cqe[i].opcode
-            */
+            /* the case for lmt-put-done or lmt-put where
+             * (1) sender finds end-flag won't change (2) sender sends RTS to receiver
+             * (3) receiver gets (4) here
+             * is distinguished by cqe[i].opcode
+             */
             dprintf("drain_scq,recv,rndv,rdma-read,kind=%d,opcode=%d\n", kind, cqe[i].opcode);
 
 
-           MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(req->ch.vc);
+            MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(req->ch.vc);
 #if defined(LMT_GET_CQE)
 
-           /* unpack non-contiguous dt */
-           int is_contig;
-           MPID_Datatype_is_contig(req->dev.datatype, &is_contig);
-           if(!is_contig) {
-               dprintf("drain_scq,lmt,GET_CQE,unpack noncontiguous data to user buffer\n");
-
-               /* see MPIDI_CH3U_Request_unpack_uebuf (in /src/mpid/ch3/src/ch3u_request.c) */
-               /* or MPIDI_CH3U_Receive_data_found (in src/mpid/ch3/src/ch3u_handle_recv_pkt.c) */
-               MPIDI_msg_sz_t unpack_sz = req->ch.lmt_data_sz;
-               MPID_Segment seg;
-               MPI_Aint last;
-
-               MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype, &seg, 0);
-               last = unpack_sz;
-               MPID_Segment_unpack(&seg, 0, &last, REQ_FIELD(req, lmt_pack_buf));
-               if (last != unpack_sz) {
-                   /* --BEGIN ERROR HANDLING-- */
-                   /* received data was not entirely consumed by unpack() 
-                      because too few bytes remained to fill the next basic
-                      datatype */
-                   MPIR_STATUS_SET_COUNT(req->status, last);
-                   req->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**MPID_nem_dcfa_poll", 0);
-                   /* --END ERROR HANDLING-- */
-               }
-               dprintf("drain_scq,lmt,GET_CQE,ref_count=%d,lmt_pack_buf=%p\n", req->ref_count, REQ_FIELD(req, lmt_pack_buf));
-               MPID_nem_dcfa_stfree(REQ_FIELD(req, lmt_pack_buf), (size_t)req->ch.lmt_data_sz);
-           }
-           dprintf("drain_scq,lmt,GET_CQE,lmt_send_GET_DONE,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
-
-           /* send done to sender. vc is stashed in MPID_nem_dcfa_lmt_start_recv (in dcfa_lmt.c) */
-           MPID_nem_dcfa_lmt_send_GET_DONE(req->ch.vc, req);
+            /* unpack non-contiguous dt */
+            int is_contig;
+            MPID_Datatype_is_contig(req->dev.datatype, &is_contig);
+            if (!is_contig) {
+                dprintf("drain_scq,lmt,GET_CQE,unpack noncontiguous data to user buffer\n");
+
+                /* see MPIDI_CH3U_Request_unpack_uebuf (in /src/mpid/ch3/src/ch3u_request.c) */
+                /* or MPIDI_CH3U_Receive_data_found (in src/mpid/ch3/src/ch3u_handle_recv_pkt.c) */
+                MPIDI_msg_sz_t unpack_sz = req->ch.lmt_data_sz;
+                MPID_Segment seg;
+                MPI_Aint last;
+
+                MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype, &seg,
+                                  0);
+                last = unpack_sz;
+                MPID_Segment_unpack(&seg, 0, &last, REQ_FIELD(req, lmt_pack_buf));
+                if (last != unpack_sz) {
+                    /* --BEGIN ERROR HANDLING-- */
+                    /* received data was not entirely consumed by unpack()
+                     * because too few bytes remained to fill the next basic
+                     * datatype */
+                    MPIR_STATUS_SET_COUNT(req->status, last);
+                    req->status.MPI_ERROR =
+                        MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__,
+                                             MPI_ERR_TYPE, "**MPID_nem_dcfa_poll", 0);
+                    /* --END ERROR HANDLING-- */
+                }
+                dprintf("drain_scq,lmt,GET_CQE,ref_count=%d,lmt_pack_buf=%p\n", req->ref_count,
+                        REQ_FIELD(req, lmt_pack_buf));
+                MPID_nem_dcfa_stfree(REQ_FIELD(req, lmt_pack_buf), (size_t) req->ch.lmt_data_sz);
+            }
+            dprintf("drain_scq,lmt,GET_CQE,lmt_send_GET_DONE,rsr_seq_num_tail=%d\n",
+                    vc_dcfa->ibcom->rsr_seq_num_tail);
+
+            /* send done to sender. vc is stashed in MPID_nem_dcfa_lmt_start_recv (in dcfa_lmt.c) */
+            MPID_nem_dcfa_lmt_send_GET_DONE(req->ch.vc, req);
 #endif
             /* unmark "lmt is going on" */
-            
+
             //dprintf("dcfa_poll,SCQ,lmt,%d->%d,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_myrank, req->ch.vc->pg_rank, MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail)); /* moved before MPID_Request_release because this references req->ch.vc */
 
             /* decrement the number of entries in IB command queue */
@@ -300,64 +334,74 @@ int MPID_nem_dcfa_drain_scq(int dont_call_progress) {
 
             int is_contig;
             MPID_Datatype_is_contig(req->dev.datatype, &is_contig);
-            if(!is_contig) {
-                //if(req->ref_count == 1) {
-                dprintf("drain_scq,GET&&!GET_CQE,ref_count=%d,lmt_pack_buf=%p\n", req->ref_count, REQ_FIELD(req, lmt_pack_buf));
-                    /* debug, polling waits forever when freeing here. */
-                    //free(REQ_FIELD(req, lmt_pack_buf));
-                    //MPID_nem_dcfa_stfree(REQ_FIELD(req, lmt_pack_buf), (size_t)req->ch.lmt_data_sz);
-                    //dprintf("drain_scq,lmt,insert to free-list=%p\n", MPID_nem_dcfa_fl);
-                    //} else {
-                    //dprintf("drain_scq,GET&&!GET_CQE,ref_count=%d,lmt_pack_buf=%p\n", req->ref_count, REQ_FIELD(req, lmt_pack_buf));
-                    //}
+            if (!is_contig) {
+                //if (req->ref_count == 1) {
+                dprintf("drain_scq,GET&&!GET_CQE,ref_count=%d,lmt_pack_buf=%p\n", req->ref_count,
+                        REQ_FIELD(req, lmt_pack_buf));
+                /* debug, polling waits forever when freeing here. */
+                //free(REQ_FIELD(req, lmt_pack_buf));
+                //MPID_nem_dcfa_stfree(REQ_FIELD(req, lmt_pack_buf), (size_t)req->ch.lmt_data_sz);
+                //dprintf("drain_scq,lmt,insert to free-list=%p\n", MPID_nem_dcfa_fl);
+                //} else {
+                //dprintf("drain_scq,GET&&!GET_CQE,ref_count=%d,lmt_pack_buf=%p\n", req->ref_count, REQ_FIELD(req, lmt_pack_buf));
+                //}
             }
 
-           /* lmt_start_recv increments ref_count 
-              drain_scq and dcfa_poll is not ordered, so both can decrement ref_count */
+            /* lmt_start_recv increments ref_count
+             * drain_scq and dcfa_poll is not ordered, so both can decrement ref_count */
             MPID_Request_release(req);
 #endif
             /* try to send from sendq */
-            if(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) {
-                dprintf("drain_scq,GET,ncom=%d,ncqe=%d,diff=%d\n", vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY, MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG);
+            if (!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) {
+                dprintf("drain_scq,GET,ncom=%d,ncqe=%d,diff=%d\n",
+                        vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY,
+                        MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY,
+                        MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num,
+                                             vc_dcfa->ibcom->lsr_seq_num_tail) <
+                        IBCOM_RDMABUF_NSEG);
                 MPID_Request *sreq = MPID_nem_dcfa_sendq_head(vc_dcfa->sendq);
                 int msg_type = MPIDI_Request_get_msg_type(sreq);
-                
-                if(sreq->kind == MPID_REQUEST_SEND && msg_type == MPIDI_REQUEST_EAGER_MSG) {
+
+                if (sreq->kind == MPID_REQUEST_SEND && msg_type == MPIDI_REQUEST_EAGER_MSG) {
                     dprintf("drain_scq,eager-send,head is eager-send\n");
-                } else if(sreq->kind == MPID_REQUEST_RECV && msg_type == MPIDI_REQUEST_RNDV_MSG) {
+                }
+                else if (sreq->kind == MPID_REQUEST_RECV && msg_type == MPIDI_REQUEST_RNDV_MSG) {
                     dprintf("drain_scq,eager-send,head is lmt\n");
                 }
             }
-            //if(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq) && MPID_nem_dcfa_sendq_ready_to_send_head(vc_dcfa)) {
-                dprintf("drain_scq,GET,send_progress\n");fflush(stdout);
-                //MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET 
-                    //}
-        } else {
+            //if (!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq) && MPID_nem_dcfa_sendq_ready_to_send_head(vc_dcfa)) {
+            dprintf("drain_scq,GET,send_progress\n");
+            fflush(stdout);
+            //MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET
+            //}
+        }
+        else {
             printf("drain_scq,unknown kind=%d,req_type=%d,msg_type=%d\n", kind, req_type, msg_type);
             assert(0);
-#if 1 // lazy consulting of completion queue
+#if 1   // lazy consulting of completion queue
             MPIU_ERR_CHKANDJUMP(1, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
 #else
             //printf("kind=%d\n", kind);
 #endif
         }
     }
-    if(!dont_call_progress) {
+    if (!dont_call_progress) {
         MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET;
     }
- fn_exit:
+  fn_exit:
     entered_drain_scq = 0;
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ);
     return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+  fn_fail:
+    goto fn_exit;
 }
 
 #undef FUNCNAME
 #define FUNCNAME MPID_nem_dcfa_drain_scq_lmt_put
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_drain_scq_lmt_put() {
+int MPID_nem_dcfa_drain_scq_lmt_put()
+{
 
     int mpi_errno = MPI_SUCCESS;
     int result;
@@ -367,99 +411,107 @@ int MPID_nem_dcfa_drain_scq_lmt_put() {
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ_LMT_PUT);
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ_LMT_PUT);
 
-#if 0 /*def DCFA*/
+#if 0   /*def DCFA */
     result = ibv_poll_cq(rc_shared_scq_lmt_put, 1, &cqe[0]);
 #else
     result = ibv_poll_cq(rc_shared_scq_lmt_put, IBCOM_MAX_CQ_HEIGHT_DRAIN, &cqe[0]);
 #endif
     MPIU_ERR_CHKANDJUMP(result < 0, mpi_errno, MPI_ERR_OTHER, "**netmod,dcfa,ibv_poll_cq");
-    
-    if(result > 0) {
-        dprintf("drain_scq_lmt_put,found,result=%d\n", result); 
+
+    if (result > 0) {
+        dprintf("drain_scq_lmt_put,found,result=%d\n", result);
     }
-    for(i = 0; i < result; i++)  {
-        
+    for (i = 0; i < result; i++) {
+
         MPID_Request *req;
         MPID_Request_kind_t kind;
         int req_type, msg_type;
-        
+
 #ifdef DCFA
-        if(cqe[i].status != IBV_WC_SUCCESS) {
-            dprintf("drain_scq_lmt_put,status=%08x\n", cqe[i].status); 
+        if (cqe[i].status != IBV_WC_SUCCESS) {
+            dprintf("drain_scq_lmt_put,status=%08x\n", cqe[i].status);
         }
 #else
-        if(cqe[i].status != IBV_WC_SUCCESS) {
-            dprintf("drain_scq_lmt_put,status=%08x,%s\n", cqe[i].status, ibv_wc_status_str(cqe[i].status)); 
+        if (cqe[i].status != IBV_WC_SUCCESS) {
+            dprintf("drain_scq_lmt_put,status=%08x,%s\n", cqe[i].status,
+                    ibv_wc_status_str(cqe[i].status));
         }
 #endif
-        MPIU_ERR_CHKANDJUMP(cqe[i].status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq_lmt_put");
-        
+        MPIU_ERR_CHKANDJUMP(cqe[i].status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER,
+                            "**MPID_nem_dcfa_drain_scq_lmt_put");
+
         /* Obtain sreq */
-        req = (MPID_Request*)cqe[i].wr_id;
+        req = (MPID_Request *) cqe[i].wr_id;
         dprintf("drain_scq_lmt_put,req=%p,req->ref_count=%d\n", req, req->ref_count);
         MPIU_Assert(req->ref_count > 0);
 
         kind = req->kind;
         req_type = MPIDI_Request_get_type(req);
         msg_type = MPIDI_Request_get_msg_type(req);
-        
 
-        if(req_type == MPIDI_REQUEST_TYPE_RECV && msg_type == MPIDI_REQUEST_RNDV_MSG) {
+
+        if (req_type == MPIDI_REQUEST_TYPE_RECV && msg_type == MPIDI_REQUEST_RNDV_MSG) {
             /* lmt-put */
             /* MPIDI_Request_set_type is not performed when
-               MPID_Isend --> FDU_or_AEP --> recv_posted --> dcfa_poll --> PUTCTS packet-handler */
-            
+             * MPID_Isend --> FDU_or_AEP --> recv_posted --> dcfa_poll --> PUTCTS packet-handler */
+
             dprintf("drain_scq_lmt_put,lmt-put found\n");
-            
-#if 0 /* moving to just after put */ /*implementing back-to-back put and done */
-#endif            
-            
+
+#if 0 /* moving to just after put */    /*implementing back-to-back put and done */
+#endif
+
             /* decrement the number of entries in IB command queue */
             MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(req->ch.vc);
             vc_dcfa->ibcom->ncom_lmt_put -= 1;
             MPID_nem_dcfa_ncqe_lmt_put -= 1;
-            dprintf("drain_scq_lmt_put,rndv,ncqe=%d\n", MPID_nem_dcfa_ncqe_lmt_put);/*suspicious*/
-            int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
-            
+            dprintf("drain_scq_lmt_put,rndv,ncqe=%d\n", MPID_nem_dcfa_ncqe_lmt_put);    /*suspicious */
+            int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);
+
             (VC_FIELD(req->ch.vc, pending_sends)) -= 1;
-            
+
             /* as in the template */
             reqFn = req->dev.OnDataAvail;
-            if (!reqFn){
-                MPIDI_CH3U_Request_complete(req); /* decrement cc, signal_completion, decrement ref_count, free */
-                dprintf("drain_scq,lmt-put,req=%p,cc incremented to %d\n", req, MPIDI_CH3I_progress_completion_count.v);
+            if (!reqFn) {
+                MPIDI_CH3U_Request_complete(req);       /* decrement cc, signal_completion, decrement ref_count, free */
+                dprintf("drain_scq,lmt-put,req=%p,cc incremented to %d\n", req,
+                        MPIDI_CH3I_progress_completion_count.v);
                 MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
-            } else {
+            }
+            else {
                 MPIDI_VC_t *vc = req->ch.vc;
                 int complete = 0;
                 mpi_errno = reqFn(vc, req, &complete);
-                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                if (mpi_errno)
+                    MPIU_ERR_POP(mpi_errno);
                 /* not-completed case is not implemented */
                 MPIU_Assert(complete == TRUE);
                 MPIU_Assert(0); /* decrement ref_count and free sreq causes problem */
             }
-        } else {
-            dprintf("drain_scq_lmt_put,unknown kind=%d,req_type=%d,msg_type=%d\n", kind, req_type, msg_type);
-#if 1 // lazy consulting of completion queue
+        }
+        else {
+            dprintf("drain_scq_lmt_put,unknown kind=%d,req_type=%d,msg_type=%d\n", kind, req_type,
+                    msg_type);
+#if 1   // lazy consulting of completion queue
             MPIU_ERR_CHKANDJUMP(1, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq_lmt_put");
 #else
             //printf("kind=%d\n", kind);
 #endif
         }
     }
-    
- fn_exit:
+
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ_LMT_PUT);
     return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+  fn_fail:
+    goto fn_exit;
 }
 
 #undef FUNCNAME
 #define FUNCNAME MPID_nem_dcfa_drain_scq_scratch_pad
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_drain_scq_scratch_pad() {
+int MPID_nem_dcfa_drain_scq_scratch_pad()
+{
 
     int mpi_errno = MPI_SUCCESS;
     int result;
@@ -469,437 +521,505 @@ int MPID_nem_dcfa_drain_scq_scratch_pad() {
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ_SCRATCH_PAD);
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ_SCRATCH_PAD);
 
-#if 0 /*def DCFA*/
+#if 0   /*def DCFA */
     result = ibv_poll_cq(rc_shared_scq_scratch_pad, 1, &cqe[0]);
 #else
     result = ibv_poll_cq(rc_shared_scq_scratch_pad, IBCOM_MAX_CQ_HEIGHT_DRAIN, &cqe[0]);
 #endif
     MPIU_ERR_CHKANDJUMP(result < 0, mpi_errno, MPI_ERR_OTHER, "**netmod,dcfa,ibv_poll_cq");
-    
-    if(result > 0) {
-        dprintf("drain_scq_scratch_pad,found,result=%d\n", result); 
+
+    if (result > 0) {
+        dprintf("drain_scq_scratch_pad,found,result=%d\n", result);
     }
-    for(i = 0; i < result; i++)  {
-        
+    for (i = 0; i < result; i++) {
+
 #ifdef DCFA
-        if(cqe[i].status != IBV_WC_SUCCESS) {
+        if (cqe[i].status != IBV_WC_SUCCESS) {
             dprintf("drain_scq_scratch_pad,status=%08x\n", cqe[i].status);
         }
 #else
-        if(cqe[i].status != IBV_WC_SUCCESS) {
-            dprintf("drain_scq_scratch_pad,status=%08x,%s\n", cqe[i].status, ibv_wc_status_str(cqe[i].status)); 
+        if (cqe[i].status != IBV_WC_SUCCESS) {
+            dprintf("drain_scq_scratch_pad,status=%08x,%s\n", cqe[i].status,
+                    ibv_wc_status_str(cqe[i].status));
         }
 #endif
-        MPIU_ERR_CHKANDJUMP(cqe[i].status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq_scratch_pad");
-        
-        IbCom* ibcom_scratch_pad = (IbCom*)cqe[i].wr_id;
+        MPIU_ERR_CHKANDJUMP(cqe[i].status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER,
+                            "**MPID_nem_dcfa_drain_scq_scratch_pad");
+
+        IbCom *ibcom_scratch_pad = (IbCom *) cqe[i].wr_id;
         dprintf("drain_scq_scratch_pad,ibcom_scratch_pad=%p\n", ibcom_scratch_pad);
         ibcom_scratch_pad->ncom_scratch_pad -= 1;
         MPID_nem_dcfa_ncqe_scratch_pad -= 1;
     }
-    
- fn_exit:
+
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ_SCRATCH_PAD);
     return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+  fn_fail:
+    goto fn_exit;
 }
 
 #undef FUNCNAME
 #define FUNCNAME MPID_nem_dcfa_poll_eager
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_poll_eager(MPIDI_VC_t *vc) {
+int MPID_nem_dcfa_poll_eager(MPIDI_VC_t * vc)
+{
 
-   int mpi_errno = MPI_SUCCESS;   
-   int ibcom_errno;
-   int result;
-   struct ibv_wc cqe[IBCOM_MAX_CQ_HEIGHT_DRAIN];
-   uint64_t tscs, tsce;
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    int result;
+    struct ibv_wc cqe[IBCOM_MAX_CQ_HEIGHT_DRAIN];
+    uint64_t tscs, tsce;
 
-   MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_POLL_EAGER);
-   MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_POLL_EAGER);
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_POLL_EAGER);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_POLL_EAGER);
 
-   //MPID_nem_dcfa_tsc_poll = MPID_nem_dcfa_rdtsc();
+    //MPID_nem_dcfa_tsc_poll = MPID_nem_dcfa_rdtsc();
 
-       MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
-       //dprintf("dcfa_poll,ld,rsr_seq_num_poll=%d\n", vc_dcfa->ibcom->rsr_seq_num_poll);
-       volatile void* buf = vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZSEG * ((uint32_t)vc_dcfa->ibcom->rsr_seq_num_poll % IBCOM_RDMABUF_NSEG);
-       volatile sz_hdrmagic_t* sz_hdrmagic = (sz_hdrmagic_t*)buf;
-       if(sz_hdrmagic->magic != IBCOM_MAGIC) { goto fn_exit; }
-       //dprintf("dcfa_poll_eager,buf=%p,sz=%d\n", buf, sz_hdrmagic->sz);
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    //dprintf("dcfa_poll,ld,rsr_seq_num_poll=%d\n", vc_dcfa->ibcom->rsr_seq_num_poll);
+    volatile void *buf =
+        vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] +
+        IBCOM_RDMABUF_SZSEG * ((uint32_t) vc_dcfa->ibcom->rsr_seq_num_poll % IBCOM_RDMABUF_NSEG);
+    volatile sz_hdrmagic_t *sz_hdrmagic = (sz_hdrmagic_t *) buf;
+    if (sz_hdrmagic->magic != IBCOM_MAGIC) {
+        goto fn_exit;
+    }
+    //dprintf("dcfa_poll_eager,buf=%p,sz=%d\n", buf, sz_hdrmagic->sz);
 
-       /* unmark magic */
-       sz_hdrmagic->magic = 0/*0xdead*/; 
+    /* unmark magic */
+    sz_hdrmagic->magic = 0 /*0xdead */ ;
 #if 0
-       ibcom_errno = ibcom_poll_cq(IBCOM_RC_SHARED_RCQ, &cqe, &result);
-       MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_poll_cq");
+    ibcom_errno = ibcom_poll_cq(IBCOM_RC_SHARED_RCQ, &cqe, &result);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_poll_cq");
 #endif
-       dprintf("dcfa_poll_eager,eager-send,found\n");
-       
-       //MPIU_ERR_CHKANDJUMP1(cqe.status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ibcom_poll_cq", "**ibcom_poll_cq %s", ibcom_strerror(ibcom_errno));
-       
-       int sz_data_pow2;
-       DCFA_NEM_SZ_DATA_POW2(sz_hdrmagic->sz);
-       volatile tailmagic_t* tailmagic = (tailmagic_t*)(buf + sz_data_pow2);
-       dprintf("poll,sz_data_pow2=%d,tailmagic=%p,sz=%d\n", sz_data_pow2, tailmagic, sz_hdrmagic->sz);
-       int k = 0;
-       //tsce = MPID_nem_dcfa_rdtsc(); printf("9,%ld\n", tsce - tscs); // 55 for 512-byte
-       //tscs = MPID_nem_dcfa_rdtsc();
-       //#define TLB_PREF_AMT_AHEAD 20
+    dprintf("dcfa_poll_eager,eager-send,found\n");
+
+    //MPIU_ERR_CHKANDJUMP1(cqe.status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ibcom_poll_cq", "**ibcom_poll_cq %s", ibcom_strerror(ibcom_errno));
+
+    int sz_data_pow2;
+    DCFA_NEM_SZ_DATA_POW2(sz_hdrmagic->sz);
+    volatile tailmagic_t *tailmagic = (tailmagic_t *) (buf + sz_data_pow2);
+    dprintf("poll,sz_data_pow2=%d,tailmagic=%p,sz=%d\n", sz_data_pow2, tailmagic, sz_hdrmagic->sz);
+    int k = 0;
+    //tsce = MPID_nem_dcfa_rdtsc(); printf("9,%ld\n", tsce - tscs); // 55 for 512-byte
+    //tscs = MPID_nem_dcfa_rdtsc();
+    //#define TLB_PREF_AMT_AHEAD 20
 #ifdef TLB_PREF_AMT_AHEAD
-       int tlb_pref_ahd = (uint64_t)tailmagic + 4096 * TLB_PREF_AMT_AHEAD - (uint64_t)buf;
+    int tlb_pref_ahd = (uint64_t) tailmagic + 4096 * TLB_PREF_AMT_AHEAD - (uint64_t) buf;
 #endif
-       while(tailmagic->magic != IBCOM_MAGIC) {
-           //k++; 
-#if 0 /* pre-fetch next RDMA-write-buf slot to cover TLB miss latency */
-           __asm__ __volatile__
-               ("movq %0, %%rsi;"
-                "movq 0(%%rsi), %%rsi;"
-                : : "r"(vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZSEG * ((vc_dcfa->ibcom->rsr_seq_num_poll + 1) % IBCOM_RDMABUF_NSEG)) : "%rsi");
+    while (tailmagic->magic != IBCOM_MAGIC) {
+        //k++;
+#if 0   /* pre-fetch next RDMA-write-buf slot to cover TLB miss latency */
+        __asm__ __volatile__
+            ("movq %0, %%rsi;"
+             "movq 0(%%rsi), %%rsi;"::"r"(vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] +
+                                          IBCOM_RDMABUF_SZSEG *
+                                          ((vc_dcfa->ibcom->rsr_seq_num_poll +
+                                            1) % IBCOM_RDMABUF_NSEG)):"%rsi");
 #endif
 #ifdef TLB_PREF_AMT_AHEAD
-           __asm__ __volatile__
-               ("movq %0, %%rsi;"
-                "movq 0(%%rsi), %%rax;"
-                : : "r"(buf + tlb_pref_ahd) : "%rsi", "%rax");
-           tlb_pref_ahd = (tlb_pref_ahd + 4096 * 20) % IBCOM_RDMABUF_SZ;
+        __asm__ __volatile__
+            ("movq %0, %%rsi;" "movq 0(%%rsi), %%rax;"::"r"(buf + tlb_pref_ahd):"%rsi", "%rax");
+        tlb_pref_ahd = (tlb_pref_ahd + 4096 * 20) % IBCOM_RDMABUF_SZ;
 #endif
-       }
-       //tsce = MPID_nem_dcfa_rdtsc(); printf("0,%ld\n", tsce - tscs); // 20-60 for 512-byte
-       //tscs = MPID_nem_dcfa_rdtsc();
-       //dprintf("magic wait=%d\n", k);
-       
-
-       /* this reduces memcpy in MPIDI_CH3U_Receive_data_found */
-       /* MPIDI_CH3_PktHandler_EagerSend (in ch3u_eager.c)
-            MPIDI_CH3U_Receive_data_found (in ch3u_handle_recv_pkt.c)
-              MPIU_Memcpy((char*)(rreq->dev.user_buf) + dt_true_lb, buf, data_sz);
-               600 cycle for 512B!!! --> 284 cycle with prefetch
-       */
+    }
+    //tsce = MPID_nem_dcfa_rdtsc(); printf("0,%ld\n", tsce - tscs); // 20-60 for 512-byte
+    //tscs = MPID_nem_dcfa_rdtsc();
+    //dprintf("magic wait=%d\n", k);
+
+
+    /* this reduces memcpy in MPIDI_CH3U_Receive_data_found */
+    /* MPIDI_CH3_PktHandler_EagerSend (in ch3u_eager.c)
+     * MPIDI_CH3U_Receive_data_found (in ch3u_handle_recv_pkt.c)
+     * MPIU_Memcpy((char*)(rreq->dev.user_buf) + dt_true_lb, buf, data_sz);
+     * 600 cycle for 512B!!! --> 284 cycle with prefetch
+     */
 
 #if 1
-       void* rsi;
-       for(rsi = (void*)buf; rsi < buf + sz_hdrmagic->sz; rsi += 64*4) {
+    void *rsi;
+    for (rsi = (void *) buf; rsi < buf + sz_hdrmagic->sz; rsi += 64 * 4) {
 #ifdef __MIC__
-           __asm__ __volatile__
-               (
-                "movq %0, %%rsi;"
-                "vprefetch0 0x00(%%rsi);"
-                "vprefetch0 0x40(%%rsi);"
-                "vprefetch0 0x80(%%rsi);"
-                "vprefetch0 0xc0(%%rsi);"
-                : 
-                : "r"(rsi)
-                : "%rsi"); 
+        __asm__ __volatile__
+            ("movq %0, %%rsi;"
+             "vprefetch0 0x00(%%rsi);"
+             "vprefetch0 0x40(%%rsi);" "vprefetch0 0x80(%%rsi);" "vprefetch0 0xc0(%%rsi);"::"r"(rsi)
+             :"%rsi");
 #else
-           __asm__ __volatile__
-               (
-                "movq %0, %%rsi;"
-                "prefetchnta 0x00(%%rsi);"
-                "prefetchnta 0x40(%%rsi);"
-                "prefetchnta 0x80(%%rsi);"
-                "prefetchnta 0xc0(%%rsi);"
-                : 
-                : "r"(rsi)
-                : "%rsi"); 
+        __asm__ __volatile__
+            ("movq %0, %%rsi;"
+             "prefetchnta 0x00(%%rsi);"
+             "prefetchnta 0x40(%%rsi);"
+             "prefetchnta 0x80(%%rsi);" "prefetchnta 0xc0(%%rsi);"::"r"(rsi)
+             :"%rsi");
 #endif
-       }
+    }
 #endif
 
-       MPIDI_CH3_Pkt_eager_send_t *pkt = (MPIDI_CH3_Pkt_eager_send_t*)(buf + sizeof(sz_hdrmagic_t));
-       MPIU_Assert(sz_hdrmagic->sz >= sizeof(sz_hdrmagic_t) + sizeof(MPIDI_CH3_Pkt_t) + sizeof(tailmagic_t));
-       MPIDI_CH3_Pkt_eager_send_t *pkt2 = (MPIDI_CH3_Pkt_eager_send_t*)(buf + sizeof(sz_hdrmagic_t)+sizeof(MPID_nem_dcfa_pkt_prefix_t));
-       dprintf("handle_pkt,before,%d<-%d,id=%d,pkt->type=%d,pcc=%d,MPIDI_NEM_PKT_END=%d,pkt=%p,subtype=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, vc_dcfa->ibcom->rsr_seq_num_poll, pkt->type, MPIDI_CH3I_progress_completion_count.v, MPIDI_NEM_PKT_END, pkt, ((MPID_nem_pkt_netmod_t*)pkt)->subtype);
-       /* see MPIDI_CH3_PktHandler_EagerSend (in src/mpid/ch3/src/ch3u_eager.c) */
-       mpi_errno = MPID_nem_handle_pkt(vc, (char *)(buf + sizeof(sz_hdrmagic_t)), (MPIDI_msg_sz_t)(sz_hdrmagic->sz - sizeof(sz_hdrmagic_t) - sizeof(tailmagic_t)));
-       if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-       //tsce = MPID_nem_dcfa_rdtsc(); printf("0,%ld\n", tsce - tscs); // 512-byte, 900 cyc (1100 w/o prefetch)
-       
-       /* Update occupation status of remote SR (send request) queue */
-       /* this includes local RDMA-wr-to buf occupation
-          because MPID_nem_handle_pkt releases RDMA-wr-to buf by copying data out */
-       /* responder releases resource and then embed largest sequence number into MPI message bound to initiator */
-       //dprintf("after handle_pkt,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
+    MPIDI_CH3_Pkt_eager_send_t *pkt = (MPIDI_CH3_Pkt_eager_send_t *) (buf + sizeof(sz_hdrmagic_t));
+    MPIU_Assert(sz_hdrmagic->sz >=
+                sizeof(sz_hdrmagic_t) + sizeof(MPIDI_CH3_Pkt_t) + sizeof(tailmagic_t));
+    MPIDI_CH3_Pkt_eager_send_t *pkt2 =
+        (MPIDI_CH3_Pkt_eager_send_t *) (buf + sizeof(sz_hdrmagic_t) +
+                                        sizeof(MPID_nem_dcfa_pkt_prefix_t));
+    dprintf
+        ("handle_pkt,before,%d<-%d,id=%d,pkt->type=%d,pcc=%d,MPIDI_NEM_PKT_END=%d,pkt=%p,subtype=%d\n",
+         MPID_nem_dcfa_myrank, vc->pg_rank, vc_dcfa->ibcom->rsr_seq_num_poll, pkt->type,
+         MPIDI_CH3I_progress_completion_count.v, MPIDI_NEM_PKT_END, pkt,
+         ((MPID_nem_pkt_netmod_t *) pkt)->subtype);
+    /* see MPIDI_CH3_PktHandler_EagerSend (in src/mpid/ch3/src/ch3u_eager.c) */
+    mpi_errno =
+        MPID_nem_handle_pkt(vc, (char *) (buf + sizeof(sz_hdrmagic_t)),
+                            (MPIDI_msg_sz_t) (sz_hdrmagic->sz - sizeof(sz_hdrmagic_t) -
+                                              sizeof(tailmagic_t)));
+    if (mpi_errno) {
+        MPIU_ERR_POP(mpi_errno);
+    }
+    //tsce = MPID_nem_dcfa_rdtsc(); printf("0,%ld\n", tsce - tscs); // 512-byte, 900 cyc (1100 w/o prefetch)
+
+    /* Update occupation status of remote SR (send request) queue */
+    /* this includes local RDMA-wr-to buf occupation
+     * because MPID_nem_handle_pkt releases RDMA-wr-to buf by copying data out */
+    /* responder releases resource and then embed largest sequence number into MPI message bound to initiator */
+    //dprintf("after handle_pkt,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
 #if 1
-       dprintf("handle_pkt,after,%d<-%d,id=%d,pkt->type=%d,eagershort=%d,close=%d,rts=%d,piggy-backed-eagersend=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, vc_dcfa->ibcom->rsr_seq_num_poll, pkt->type, MPIDI_CH3_PKT_EAGERSHORT_SEND, MPIDI_CH3_PKT_CLOSE, MPIDI_NEM_PKT_LMT_RTS, MPIDI_NEM_DCFA_PKT_EAGER_SEND);
-
-       int notify_rate;
-       ibcom_errno = ibcom_rdmabuf_occupancy_notify_rate_get(MPID_nem_dcfa_conns[vc->pg_rank].fd, &notify_rate);
-       dprintf("poll_eager,sendq=%d,ncom=%d,ncqe=%d,ldiff=%d(%d-%d),rdiff=%d(%d-%d),rate=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY, MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail), vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent), vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent, notify_rate);
-
-       //dprintf("dcfa_poll,current pcc=%d\n", MPIDI_CH3I_progress_completion_count.v);
-
-       /* Don't forget to put lmt-cookie types here!! */
-       if(
-          1
-          ) 
-           { 
-           /* lmt cookie messages or control message other than eager-short */
-           
-           /* eager-send with zero-length data is released here
-              because there is no way to trace the RDMA-write-to buffer addr
-              because rreq->dev.tmpbuf is set to zero in ch3_eager.c
-           */
-           dprintf("poll_eager,released,type=%d,MPIDI_NEM_DCFA_REPLY_SEQ_NUM=%d\n", pkt->type, MPIDI_NEM_DCFA_REPLY_SEQ_NUM);
-           MPID_nem_dcfa_recv_buf_released(vc, (void*)buf + sizeof(sz_hdrmagic_t) + sizeof(MPIDI_CH3_Pkt_t));
-       } else {
-           if(sz_hdrmagic->sz == sizeof(sz_hdrmagic_t) + sizeof(MPIDI_CH3_Pkt_t) + sizeof(tailmagic_t)) { 
-               if(pkt->type == MPIDI_CH3_PKT_EAGERSHORT_SEND 
-                  //||                  pkt->type == MPIDI_CH3_PKT_GET
+    dprintf
+        ("handle_pkt,after,%d<-%d,id=%d,pkt->type=%d,eagershort=%d,close=%d,rts=%d,piggy-backed-eagersend=%d\n",
+         MPID_nem_dcfa_myrank, vc->pg_rank, vc_dcfa->ibcom->rsr_seq_num_poll, pkt->type,
+         MPIDI_CH3_PKT_EAGERSHORT_SEND, MPIDI_CH3_PKT_CLOSE, MPIDI_NEM_PKT_LMT_RTS,
+         MPIDI_NEM_DCFA_PKT_EAGER_SEND);
+
+    int notify_rate;
+    ibcom_errno =
+        ibcom_rdmabuf_occupancy_notify_rate_get(MPID_nem_dcfa_conns[vc->pg_rank].fd, &notify_rate);
+    dprintf("poll_eager,sendq=%d,ncom=%d,ncqe=%d,ldiff=%d(%d-%d),rdiff=%d(%d-%d),rate=%d\n",
+            MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY,
+            MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY,
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail),
+            vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail,
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail,
+                                 vc_dcfa->ibcom->rsr_seq_num_tail_last_sent),
+            vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent,
+            notify_rate);
+
+    //dprintf("dcfa_poll,current pcc=%d\n", MPIDI_CH3I_progress_completion_count.v);
+
+    /* Don't forget to put lmt-cookie types here!! */
+    if (1) {
+        /* lmt cookie messages or control message other than eager-short */
+
+        /* eager-send with zero-length data is released here
+         * because there is no way to trace the RDMA-write-to buffer addr
+         * because rreq->dev.tmpbuf is set to zero in ch3_eager.c
+         */
+        dprintf("poll_eager,released,type=%d,MPIDI_NEM_DCFA_REPLY_SEQ_NUM=%d\n", pkt->type,
+                MPIDI_NEM_DCFA_REPLY_SEQ_NUM);
+        MPID_nem_dcfa_recv_buf_released(vc,
+                                        (void *) buf + sizeof(sz_hdrmagic_t) +
+                                        sizeof(MPIDI_CH3_Pkt_t));
+    }
+    else {
+        if (sz_hdrmagic->sz ==
+            sizeof(sz_hdrmagic_t) + sizeof(MPIDI_CH3_Pkt_t) + sizeof(tailmagic_t)) {
+            if (pkt->type == MPIDI_CH3_PKT_EAGERSHORT_SEND
+                //||                  pkt->type == MPIDI_CH3_PKT_GET
 ) {
-               } else {
-                   printf("dcfa_poll,unknown pkt->type=%d\n", pkt->type);
-                   assert(0);
-                   MPIU_ERR_INTERNALANDJUMP(mpi_errno, "MPI header only but not released");
-               }
-           }
-       }
+            }
+            else {
+                printf("dcfa_poll,unknown pkt->type=%d\n", pkt->type);
+                assert(0);
+                MPIU_ERR_INTERNALANDJUMP(mpi_errno, "MPI header only but not released");
+            }
+        }
+    }
 #endif
 
-       vc_dcfa->ibcom->rsr_seq_num_poll += 1;
-       dprintf("dcfa_poll,inc,rsr_seq_num_poll=%d\n", vc_dcfa->ibcom->rsr_seq_num_poll);
+    vc_dcfa->ibcom->rsr_seq_num_poll += 1;
+    dprintf("dcfa_poll,inc,rsr_seq_num_poll=%d\n", vc_dcfa->ibcom->rsr_seq_num_poll);
 
- out:
- fn_exit:
-   MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_POLL_EAGER);
-   return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+  out:
+  fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_POLL_EAGER);
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
 }
 
 #undef FUNCNAME
 #define FUNCNAME MPID_nem_dcfa_poll
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_poll(int in_blocking_poll) {
+int MPID_nem_dcfa_poll(int in_blocking_poll)
+{
 
-   int mpi_errno = MPI_SUCCESS;   
-   int ibcom_errno;
-   uint32_t i;
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    uint32_t i;
 
-   MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_POLL);
-   MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_POLL);
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_POLL);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_POLL);
 
 #if 1
-   unsigned int progress_completion_count_old = MPIDI_CH3I_progress_completion_count.v;
+    unsigned int progress_completion_count_old = MPIDI_CH3I_progress_completion_count.v;
 #endif
 
-   /* poll lmt */
-   /* when receiver side sends CTS to sender side 
-      sender receives CTS and give up sending RTS
-      sender initiates RDMA-write,
-      sender sends RTS of the next epoch,
-      to detect the end of RDMA-write first and DP the entry for CTS,
-      you should perform lmt-poll first, next eager-poll
-    */
-   MPID_Request *rreq, *prev_rreq;
-   rreq = MPID_nem_dcfa_lmtq_head(MPID_nem_dcfa_lmtq);
-   if(rreq) {
+    /* poll lmt */
+    /* when receiver side sends CTS to sender side
+     * sender receives CTS and give up sending RTS
+     * sender initiates RDMA-write,
+     * sender sends RTS of the next epoch,
+     * to detect the end of RDMA-write first and DP the entry for CTS,
+     * you should perform lmt-poll first, next eager-poll
+     */
+    MPID_Request *rreq, *prev_rreq;
+    rreq = MPID_nem_dcfa_lmtq_head(MPID_nem_dcfa_lmtq);
+    if (rreq) {
 #if defined (TIMER_WAIT_DCFA_POLL)
-   if(in_blocking_poll) { tsc[0] = MPI_rdtsc(); }
+        if (in_blocking_poll) {
+            tsc[0] = MPI_rdtsc();
+        }
 #endif
-   // dprintf("dcfa_poll,poll lmtq\n");
-       prev_rreq = NULL;
-       do {
-           /* Obtain cookie. pkt_RTS_handler memcpy it (in mpid_nem_lmt.c) */
-           /* MPID_IOV_BUF is macro, converted into iov_base (in src/include/mpiiov.h) */
-           /* do not use s_cookie_buf because do_cts frees it */
-           //MPID_nem_dcfa_lmt_cookie_t* s_cookie_buf = (MPID_nem_dcfa_lmt_cookie_t*)rreq->ch.lmt_tmp_cookie.iov_base;
-           
-           /* Wait for completion of DMA */
-           /* do not use s_cookie_buf->sz because do_cts frees it */
-           volatile void* write_to_buf;
-           int is_contig;
-           MPID_Datatype_is_contig(rreq->dev.datatype, &is_contig);
-           if(is_contig) {
-               write_to_buf = (void*)((char *)rreq->dev.user_buf /*+ REQ_FIELD(req, lmt_dt_true_lb)*/); 
-           } else {
-               write_to_buf = REQ_FIELD(rreq, lmt_pack_buf);
-           }
-
-           //assert(REQ_FIELD(rreq, lmt_dt_true_lb) == 0);
-           volatile uint8_t* tailmagic = (uint8_t*)(write_to_buf /*+ REQ_FIELD(rreq, lmt_dt_true_lb)*/ + rreq->ch.lmt_data_sz - sizeof(uint8_t));
-           
-           uint8_t lmt_tail = REQ_FIELD(rreq, lmt_tail);
-           if(*tailmagic != REQ_FIELD(rreq, lmt_tail)) { goto next; }
-           dprintf("dcfa_poll,sz=%ld,old tail=%02x,new tail=%02x\n", rreq->ch.lmt_data_sz, REQ_FIELD(rreq, lmt_tail), *tailmagic);
-           
-           dprintf("dcfa_poll,lmt found,%d<-%d,req=%p,ref_count=%d,is_contig=%d,write_to_buf=%p,lmt_pack_buf=%p,user_buf=%p,tail=%p\n", MPID_nem_dcfa_myrank, rreq->ch.vc->pg_rank, rreq, rreq->ref_count, is_contig, write_to_buf, REQ_FIELD(rreq, lmt_pack_buf), rreq->dev.user_buf, tailmagic);
-
-           /* unpack non-contiguous dt */
-           if(!is_contig) {
-               dprintf("dcfa_poll,copying noncontiguous data to user buffer\n");
-
-               /* see MPIDI_CH3U_Request_unpack_uebuf (in /src/mpid/ch3/src/ch3u_request.c) */
-               /* or MPIDI_CH3U_Receive_data_found (in src/mpid/ch3/src/ch3u_handle_recv_pkt.c) */
-               MPIDI_msg_sz_t unpack_sz = rreq->ch.lmt_data_sz;
-               MPID_Segment seg;
-               MPI_Aint last;
-
-               MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, &seg, 0);
-               last = unpack_sz;
-               MPID_Segment_unpack(&seg, 0, &last, REQ_FIELD(rreq, lmt_pack_buf));
-               if (last != unpack_sz) {
-                   /* --BEGIN ERROR HANDLING-- */
-                   /* received data was not entirely consumed by unpack() 
-                      because too few bytes remained to fill the next basic
-                      datatype */
-                   MPIR_STATUS_SET_COUNT(rreq->status, last);
-                   rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**MPID_nem_dcfa_poll", 0);
-                   /* --END ERROR HANDLING-- */
-               }
-#if 1 /* debug, enable again later, polling waits forever when freeing it here. */
-               //if(rreq->ref_count == 1) {
-               dprintf("dcfa_poll,lmt,ref_count=%d,lmt_pack_buf=%p\n", rreq->ref_count, REQ_FIELD(rreq, lmt_pack_buf));
-                   //MPIU_Free(REQ_FIELD(rreq, lmt_pack_buf));
-                   MPID_nem_dcfa_stfree(REQ_FIELD(rreq, lmt_pack_buf), (size_t)rreq->ch.lmt_data_sz);    
-                   //} else {
-                   // dprintf("dcfa_poll,lmt,ref_count=%d,lmt_pack_buf=%p\n", rreq->ref_count, REQ_FIELD(rreq, lmt_pack_buf));
-                   //}
+        // dprintf("dcfa_poll,poll lmtq\n");
+        prev_rreq = NULL;
+        do {
+            /* Obtain cookie. pkt_RTS_handler memcpy it (in mpid_nem_lmt.c) */
+            /* MPID_IOV_BUF is macro, converted into iov_base (in src/include/mpiiov.h) */
+            /* do not use s_cookie_buf because do_cts frees it */
+            //MPID_nem_dcfa_lmt_cookie_t* s_cookie_buf = (MPID_nem_dcfa_lmt_cookie_t*)rreq->ch.lmt_tmp_cookie.iov_base;
+
+            /* Wait for completion of DMA */
+            /* do not use s_cookie_buf->sz because do_cts frees it */
+            volatile void *write_to_buf;
+            int is_contig;
+            MPID_Datatype_is_contig(rreq->dev.datatype, &is_contig);
+            if (is_contig) {
+                write_to_buf =
+                    (void *) ((char *) rreq->dev.user_buf /*+ REQ_FIELD(req, lmt_dt_true_lb) */);
+            }
+            else {
+                write_to_buf = REQ_FIELD(rreq, lmt_pack_buf);
+            }
+
+            //assert(REQ_FIELD(rreq, lmt_dt_true_lb) == 0);
+            volatile uint8_t *tailmagic =
+                (uint8_t *) (write_to_buf /*+ REQ_FIELD(rreq, lmt_dt_true_lb) */  +
+                             rreq->ch.lmt_data_sz - sizeof(uint8_t));
+
+            uint8_t lmt_tail = REQ_FIELD(rreq, lmt_tail);
+            if (*tailmagic != REQ_FIELD(rreq, lmt_tail)) {
+                goto next;
+            }
+            dprintf("dcfa_poll,sz=%ld,old tail=%02x,new tail=%02x\n", rreq->ch.lmt_data_sz,
+                    REQ_FIELD(rreq, lmt_tail), *tailmagic);
+
+            dprintf
+                ("dcfa_poll,lmt found,%d<-%d,req=%p,ref_count=%d,is_contig=%d,write_to_buf=%p,lmt_pack_buf=%p,user_buf=%p,tail=%p\n",
+                 MPID_nem_dcfa_myrank, rreq->ch.vc->pg_rank, rreq, rreq->ref_count, is_contig,
+                 write_to_buf, REQ_FIELD(rreq, lmt_pack_buf), rreq->dev.user_buf, tailmagic);
+
+            /* unpack non-contiguous dt */
+            if (!is_contig) {
+                dprintf("dcfa_poll,copying noncontiguous data to user buffer\n");
+
+                /* see MPIDI_CH3U_Request_unpack_uebuf (in /src/mpid/ch3/src/ch3u_request.c) */
+                /* or MPIDI_CH3U_Receive_data_found (in src/mpid/ch3/src/ch3u_handle_recv_pkt.c) */
+                MPIDI_msg_sz_t unpack_sz = rreq->ch.lmt_data_sz;
+                MPID_Segment seg;
+                MPI_Aint last;
+
+                MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype,
+                                  &seg, 0);
+                last = unpack_sz;
+                MPID_Segment_unpack(&seg, 0, &last, REQ_FIELD(rreq, lmt_pack_buf));
+                if (last != unpack_sz) {
+                    /* --BEGIN ERROR HANDLING-- */
+                    /* received data was not entirely consumed by unpack()
+                     * because too few bytes remained to fill the next basic
+                     * datatype */
+                    MPIR_STATUS_SET_COUNT(rreq->status, last);
+                    rreq->status.MPI_ERROR =
+                        MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__,
+                                             MPI_ERR_TYPE, "**MPID_nem_dcfa_poll", 0);
+                    /* --END ERROR HANDLING-- */
+                }
+#if 1   /* debug, enable again later, polling waits forever when freeing it here. */
+                //if (rreq->ref_count == 1) {
+                dprintf("dcfa_poll,lmt,ref_count=%d,lmt_pack_buf=%p\n", rreq->ref_count,
+                        REQ_FIELD(rreq, lmt_pack_buf));
+                //MPIU_Free(REQ_FIELD(rreq, lmt_pack_buf));
+                MPID_nem_dcfa_stfree(REQ_FIELD(rreq, lmt_pack_buf), (size_t) rreq->ch.lmt_data_sz);
+                //} else {
+                // dprintf("dcfa_poll,lmt,ref_count=%d,lmt_pack_buf=%p\n", rreq->ref_count, REQ_FIELD(rreq, lmt_pack_buf));
+                //}
 #endif
-           }
-
-           /* send done to sender. vc is stashed in MPID_nem_dcfa_lmt_start_recv (in dcfa_lmt.c) */
-           MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(rreq->ch.vc);
-           dprintf("dcfa_poll,GET,lmt_send_GET_DONE,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
-           MPID_nem_dcfa_lmt_send_GET_DONE(rreq->ch.vc, rreq);
-           dprintf("dcfa_poll,prev_rreq=%p,rreq->lmt_next=%p\n", prev_rreq, MPID_nem_dcfa_lmtq_next(rreq));
-
-           /* unlink rreq */
-           if(prev_rreq != NULL) {
-               MPID_nem_dcfa_lmtq_next(prev_rreq) = MPID_nem_dcfa_lmtq_next(rreq);
-           } else {
-               MPID_nem_dcfa_lmtq_head(MPID_nem_dcfa_lmtq) = MPID_nem_dcfa_lmtq_next(rreq);
-           }
-           if(MPID_nem_dcfa_lmtq_next(rreq) == NULL) { MPID_nem_dcfa_lmtq.tail = prev_rreq; }
-
-           /* save rreq->dev.next (and rreq) because decrementing reference-counter might free rreq */
-           MPID_Request *tmp_rreq = rreq;
-           rreq = MPID_nem_dcfa_lmtq_next(rreq);
-
-           /* decrement completion-counter */
-           dprintf("dcfa_poll,%d<-%d,", MPID_nem_dcfa_myrank, tmp_rreq->ch.vc->pg_rank);
-           int incomplete;
-           MPIDI_CH3U_Request_decrement_cc(tmp_rreq, &incomplete);
-           dprintf("lmt,complete,tmp_rreq=%p,rreq->ref_count=%d,comm=%p\n", tmp_rreq, tmp_rreq->ref_count, tmp_rreq->comm);
-
-           if(!incomplete) { MPIDI_CH3_Progress_signal_completion(); }
-
-           /* lmt_start_recv increments ref_count 
-              drain_scq and dcfa_poll is not ordered, so both can decrement ref_count */
-           /* ref_count is decremented
-              get-lmt: dcfa_poll, drain_scq, wait 
-              put-lmt: dcfa_poll, wait */
-           MPID_Request_release(tmp_rreq); 
-           dprintf("dcfa_poll,lmt,after release,tmp_rreq=%p,rreq->ref_count=%d,comm=%p\n", tmp_rreq, tmp_rreq->ref_count, tmp_rreq->comm);
-
-    
-           goto next_unlinked;
-       next:
-           prev_rreq = rreq;
-           rreq = MPID_nem_dcfa_lmtq_next(rreq);
-       next_unlinked:;
-       } while(rreq);
+            }
+
+            /* send done to sender. vc is stashed in MPID_nem_dcfa_lmt_start_recv (in dcfa_lmt.c) */
+            MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(rreq->ch.vc);
+            dprintf("dcfa_poll,GET,lmt_send_GET_DONE,rsr_seq_num_tail=%d\n",
+                    vc_dcfa->ibcom->rsr_seq_num_tail);
+            MPID_nem_dcfa_lmt_send_GET_DONE(rreq->ch.vc, rreq);
+            dprintf("dcfa_poll,prev_rreq=%p,rreq->lmt_next=%p\n", prev_rreq,
+                    MPID_nem_dcfa_lmtq_next(rreq));
+
+            /* unlink rreq */
+            if (prev_rreq != NULL) {
+                MPID_nem_dcfa_lmtq_next(prev_rreq) = MPID_nem_dcfa_lmtq_next(rreq);
+            }
+            else {
+                MPID_nem_dcfa_lmtq_head(MPID_nem_dcfa_lmtq) = MPID_nem_dcfa_lmtq_next(rreq);
+            }
+            if (MPID_nem_dcfa_lmtq_next(rreq) == NULL) {
+                MPID_nem_dcfa_lmtq.tail = prev_rreq;
+            }
+
+            /* save rreq->dev.next (and rreq) because decrementing reference-counter might free rreq */
+            MPID_Request *tmp_rreq = rreq;
+            rreq = MPID_nem_dcfa_lmtq_next(rreq);
+
+            /* decrement completion-counter */
+            dprintf("dcfa_poll,%d<-%d,", MPID_nem_dcfa_myrank, tmp_rreq->ch.vc->pg_rank);
+            int incomplete;
+            MPIDI_CH3U_Request_decrement_cc(tmp_rreq, &incomplete);
+            dprintf("lmt,complete,tmp_rreq=%p,rreq->ref_count=%d,comm=%p\n", tmp_rreq,
+                    tmp_rreq->ref_count, tmp_rreq->comm);
+
+            if (!incomplete) {
+                MPIDI_CH3_Progress_signal_completion();
+            }
+
+            /* lmt_start_recv increments ref_count
+             * drain_scq and dcfa_poll is not ordered, so both can decrement ref_count */
+            /* ref_count is decremented
+             * get-lmt: dcfa_poll, drain_scq, wait
+             * put-lmt: dcfa_poll, wait */
+            MPID_Request_release(tmp_rreq);
+            dprintf("dcfa_poll,lmt,after release,tmp_rreq=%p,rreq->ref_count=%d,comm=%p\n",
+                    tmp_rreq, tmp_rreq->ref_count, tmp_rreq->comm);
+
+
+            goto next_unlinked;
+          next:
+            prev_rreq = rreq;
+            rreq = MPID_nem_dcfa_lmtq_next(rreq);
+          next_unlinked:;
+        } while (rreq);
 #if defined (TIMER_WAIT_DCFA_POLL)
-   if(in_blocking_poll) { stsc[0] += MPI_rdtsc() - tsc[0]; }
+        if (in_blocking_poll) {
+            stsc[0] += MPI_rdtsc() - tsc[0];
+        }
 #endif
-   }
+    }
 
 #if defined (TIMER_WAIT_DCFA_POLL)
-   if(in_blocking_poll) { tsc[1] = MPI_rdtsc(); }
+    if (in_blocking_poll) {
+        tsc[1] = MPI_rdtsc();
+    }
 #endif
-   int ncom_almost_full = 0;
-   for(i = 0; i < MPID_nem_dcfa_npollingset; i++) {
-       //tscs = MPID_nem_dcfa_rdtsc();
-       MPIDI_VC_t *vc = MPID_nem_dcfa_pollingset[i];
-       mpi_errno = MPID_nem_dcfa_poll_eager(vc);
-       if(mpi_errno) { MPIU_ERR_POP (mpi_errno); }
-
-       MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
-
-       /* without this, command in sendq doesn't have a chance 
-          to perform send_progress 
-          when send and progress_send call drain_scq but asking it 
-          for not performing send_progress and make the CQ empty */
-       MPID_nem_dcfa_send_progress(vc_dcfa);
-
-       ncom_almost_full |= (vc_dcfa->ibcom->ncom >= IBCOM_MAX_SQ_HEIGHT_DRAIN);
-       
+    int ncom_almost_full = 0;
+    for (i = 0; i < MPID_nem_dcfa_npollingset; i++) {
+        //tscs = MPID_nem_dcfa_rdtsc();
+        MPIDI_VC_t *vc = MPID_nem_dcfa_pollingset[i];
+        mpi_errno = MPID_nem_dcfa_poll_eager(vc);
+        if (mpi_errno) {
+            MPIU_ERR_POP(mpi_errno);
+        }
+
+        MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+
+        /* without this, command in sendq doesn't have a chance
+         * to perform send_progress
+         * when send and progress_send call drain_scq but asking it
+         * for not performing send_progress and make the CQ empty */
+        MPID_nem_dcfa_send_progress(vc_dcfa);
+
+        ncom_almost_full |= (vc_dcfa->ibcom->ncom >= IBCOM_MAX_SQ_HEIGHT_DRAIN);
+
 #if 0
-       /* aggressively perform drain_scq */
-       ncom_almost_full |= !(MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq));
+        /* aggressively perform drain_scq */
+        ncom_almost_full |= !(MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq));
 #endif
-   }
+    }
 #if defined (TIMER_WAIT_DCFA_POLL)
-   if(in_blocking_poll) { stsc[1] += MPI_rdtsc() - tsc[1]; }
+    if (in_blocking_poll) {
+        stsc[1] += MPI_rdtsc() - tsc[1];
+    }
 #endif
 
-   // lazy feching of completion queue entry because it causes cache-miss
+    // lazy feching of completion queue entry because it causes cache-miss
 #if !defined (LMT_PUT_DONE) && defined (LMT_GET_CQE)
-   if(MPID_nem_dcfa_ncqe_to_drain > 0 || MPID_nem_dcfa_ncqe_nces > 0 || MPID_nem_dcfa_ncqe >= IBCOM_MAX_CQ_HEIGHT_DRAIN || ncom_almost_full)
+    if (MPID_nem_dcfa_ncqe_to_drain > 0 || MPID_nem_dcfa_ncqe_nces > 0 ||
+        MPID_nem_dcfa_ncqe >= IBCOM_MAX_CQ_HEIGHT_DRAIN || ncom_almost_full)
 #endif
 #if !defined (LMT_PUT_DONE) && !defined (LMT_GET_CQE)
-   if(/*(in_blocking_poll && result == 0) ||*/ MPID_nem_dcfa_ncqe_nces > 0 || MPID_nem_dcfa_ncqe >= IBCOM_MAX_CQ_HEIGHT_DRAIN || ncom_almost_full)
+        if (/*(in_blocking_poll && result == 0) || */ MPID_nem_dcfa_ncqe_nces > 0 ||
+            MPID_nem_dcfa_ncqe >= IBCOM_MAX_CQ_HEIGHT_DRAIN || ncom_almost_full)
 #endif
-   {
+        {
 #if defined (TIMER_WAIT_DCFA_POLL)
-       if(in_blocking_poll) { tsc[0] = MPI_rdtsc(); }
+            if (in_blocking_poll) {
+                tsc[0] = MPI_rdtsc();
+            }
 #endif
-       //dprintf("dcfa_poll,calling drain_scq\n");
-       ibcom_errno = MPID_nem_dcfa_drain_scq(0);
-       MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
+            //dprintf("dcfa_poll,calling drain_scq\n");
+            ibcom_errno = MPID_nem_dcfa_drain_scq(0);
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
 #if defined (TIMER_WAIT_DCFA_POLL)
-       if(in_blocking_poll) { stsc[0] += MPI_rdtsc() - tsc[0]; }
+            if (in_blocking_poll) {
+                stsc[0] += MPI_rdtsc() - tsc[0];
+            }
 #endif
-   }
+        }
 #if 1
-   /* aggressively perform drain_scq */
-   ibcom_errno = MPID_nem_dcfa_drain_scq(0);
-   MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
+    /* aggressively perform drain_scq */
+    ibcom_errno = MPID_nem_dcfa_drain_scq(0);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
 #endif
-   /* detect completion of lmt-put when MPI_Wait kicks dcfa_poll */
-   if(MPID_nem_dcfa_ncqe_lmt_put > 0) {
-       ibcom_errno = MPID_nem_dcfa_drain_scq_lmt_put();
-       MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq_lmt_put");
-   }       
+    /* detect completion of lmt-put when MPI_Wait kicks dcfa_poll */
+    if (MPID_nem_dcfa_ncqe_lmt_put > 0) {
+        ibcom_errno = MPID_nem_dcfa_drain_scq_lmt_put();
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER,
+                            "**MPID_nem_dcfa_drain_scq_lmt_put");
+    }
 
 #ifdef DCFA_ONDEMAND
-   /* process incoming connection request */
-   MPID_nem_dcfa_cm_accept();
-
-   /* process outgoing conncetion request */
-   if(MPID_nem_dcfa_ncqe_connect >= IBCOM_MAX_CQ_HEIGHT_DRAIN) {
-       ibcom_errno = MPID_nem_dcfa_cm_drain_scq(0);
-       MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_drain_scq");
-   }       
+    /* process incoming connection request */
+    MPID_nem_dcfa_cm_accept();
+
+    /* process outgoing conncetion request */
+    if (MPID_nem_dcfa_ncqe_connect >= IBCOM_MAX_CQ_HEIGHT_DRAIN) {
+        ibcom_errno = MPID_nem_dcfa_cm_drain_scq(0);
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_drain_scq");
+    }
 #endif
 
 #if 1
-   /* if polling on eager-send and lmt would repeat frequently, perform "pause" to yield instruction issue bandwitdh to other logical-core */
-   if(in_blocking_poll && progress_completion_count_old == MPIDI_CH3I_progress_completion_count.v) {
-           __asm__ __volatile__ ("pause;" : : : "memory"); 
-   }
+    /* if polling on eager-send and lmt would repeat frequently, perform "pause" to yield instruction issue bandwitdh to other logical-core */
+    if (in_blocking_poll && progress_completion_count_old == MPIDI_CH3I_progress_completion_count.v) {
+        __asm__ __volatile__("pause;":::"memory");
+    }
 #endif
-   //if(in_blocking_poll) { goto prev; }
-
- out:
-   fn_exit:
-   MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_POLL);
-   return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+    //if (in_blocking_poll) { goto prev; }
+
+  out:
+  fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_POLL);
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
 }
 
-   /* new rreq is obtained in MPID_Irecv in mpid_irecv.c, 
-    so we associate rreq with a receive request and ibv_post_recv it 
-    so that we can obtain rreq by ibv_poll_cq
-   */
+   /* new rreq is obtained in MPID_Irecv in mpid_irecv.c,
+    * so we associate rreq with a receive request and ibv_post_recv it
+    * so that we can obtain rreq by ibv_poll_cq
+    */
 #undef FUNCNAME
 #define FUNCNAME MPID_nem_dcfa_recv_posted
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_recv_posted(struct MPIDI_VC *vc, struct MPID_Request *req) {
+int MPID_nem_dcfa_recv_posted(struct MPIDI_VC *vc, struct MPID_Request *req)
+{
 
     int mpi_errno = MPI_SUCCESS;
     MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
@@ -910,48 +1030,54 @@ int MPID_nem_dcfa_recv_posted(struct MPIDI_VC *vc, struct MPID_Request *req) {
 
 #if 0
     int ibcom_errno;
-    ibcom_errno = ibcom_irecv(vc_dcfa->sc->fd, (uint64_t)vc->pg_rank);
+    ibcom_errno = ibcom_irecv(vc_dcfa->sc->fd, (uint64_t) vc->pg_rank);
     MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_irecv");
 #endif
-    
-#if 1 /*takagi*/
+
+#if 1   /*takagi */
     MPIDI_msg_sz_t data_sz;
     int dt_contig;
     MPI_Aint dt_true_lb;
-    MPID_Datatype * dt_ptr;
-    MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
+    MPID_Datatype *dt_ptr;
+    MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr,
+                            dt_true_lb);
 
     /* poll when rreq is for lmt */
     /* anticipating received message finds maching request in the posted-queue */
-    if(data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) > vc->eager_max_msg_sz) {
-        //if(MPID_nem_dcfa_tsc_poll - MPID_nem_dcfa_rdtsc() > MPID_NEM_DCFA_POLL_PERIOD_RECV_POSTED) {
+    if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) > vc->eager_max_msg_sz) {
+        //if (MPID_nem_dcfa_tsc_poll - MPID_nem_dcfa_rdtsc() > MPID_NEM_DCFA_POLL_PERIOD_RECV_POSTED) {
 #if 1
         mpi_errno = MPID_nem_dcfa_poll_eager(vc);
 #else
-            mpi_errno = MPID_nem_dcfa_poll(0);
+        mpi_errno = MPID_nem_dcfa_poll(0);
 #endif
-            if(mpi_errno) { MPIU_ERR_POP (mpi_errno); }
-            //}
-    } else {
+        if (mpi_errno) {
+            MPIU_ERR_POP(mpi_errno);
+        }
+        //}
+    }
+    else {
 #if 1
-    /* anticipating received message finds maching request in the posted-queue */
-    //if(MPID_nem_dcfa_tsc_poll - MPID_nem_dcfa_rdtsc() > MPID_NEM_DCFA_POLL_PERIOD_RECV_POSTED) {
+        /* anticipating received message finds maching request in the posted-queue */
+        //if (MPID_nem_dcfa_tsc_poll - MPID_nem_dcfa_rdtsc() > MPID_NEM_DCFA_POLL_PERIOD_RECV_POSTED) {
 #if 1
         mpi_errno = MPID_nem_dcfa_poll_eager(vc);
 #else
         mpi_errno = MPID_nem_dcfa_poll(0);
 #endif
-        if(mpi_errno) { MPIU_ERR_POP (mpi_errno); }
+        if (mpi_errno) {
+            MPIU_ERR_POP(mpi_errno);
+        }
         //}
 #endif
     }
 #endif
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_RECV_POSTED);
     return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+  fn_fail:
+    goto fn_exit;
 }
 
 /* (1) packet-handler memcpy RDMA-write-to buf data to MPI user-buffer when matching request is found in posted-queue
@@ -959,55 +1085,71 @@ int MPID_nem_dcfa_recv_posted(struct MPIDI_VC *vc, struct MPID_Request *req) {
    the latter case can't be dealt with when call this after poll-found and packet-handler
    (packet-handler memcpy RDMA-write-to buf to another buffer when
    matching request is not found in posted-queue, so calling this after poll-found and packet-handler
-   suffices in original MPICH implementation )
+   suffices in original MPICH implementation)
 */
 #undef FUNCNAME
 #define FUNCNAME MPID_nem_dcfa_recv_buf_released
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_recv_buf_released(struct MPIDI_VC *vc, void* user_data) {
+int MPID_nem_dcfa_recv_buf_released(struct MPIDI_VC *vc, void *user_data)
+{
     int mpi_errno = MPI_SUCCESS;
     int ibcom_errno;
     MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
 
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_RECV_BUF_RELEASED);
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_RECV_BUF_RELEASED);
-    dprintf("recv_buf_released,%d<-%d,user_data=%p\n", MPID_nem_dcfa_myrank, vc->pg_rank, user_data);
-#if 1 /* moving from dcfa_poll */
-       /* unmark magic */
-       /* magic is located at IBCOM_INLINE_DATA boundary and variable length entails multiple prospective locations for the future use */
+    dprintf("recv_buf_released,%d<-%d,user_data=%p\n", MPID_nem_dcfa_myrank, vc->pg_rank,
+            user_data);
+#if 1   /* moving from dcfa_poll */
+    /* unmark magic */
+    /* magic is located at IBCOM_INLINE_DATA boundary and variable length entails multiple prospective locations for the future use */
 
     /* see MPIDI_CH3_PktHandler_EagerShortSend (in src/mpid/ch3/src/ch3u_eager.c */
     /* eager-send with zero-length data is released in poll
-       because there is no way to trace the RDMA-write-to buffer addr
-       because rreq->dev.tmpbuf is set to zero in ch3_eager.c
-        */
-    if(user_data == NULL) { goto fn_exit; }
+     * because there is no way to trace the RDMA-write-to buffer addr
+     * because rreq->dev.tmpbuf is set to zero in ch3_eager.c
+     */
+    if (user_data == NULL) {
+        goto fn_exit;
+    }
 
-    MPIU_Assert(vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] <= user_data && user_data < vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZ);
-    unsigned long mod = (unsigned long)(user_data - vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO]) & (IBCOM_RDMABUF_SZSEG-1);
+    MPIU_Assert(vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] <= user_data &&
+                user_data < vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZ);
+    unsigned long mod =
+        (unsigned long) (user_data -
+                         vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO]) & (IBCOM_RDMABUF_SZSEG - 1);
 
-    void* buf = (void*)(user_data - mod);
+    void *buf = (void *) (user_data - mod);
     //dprintf("recv_buf_released,clearing,buf=%p\n", buf);
-    sz_hdrmagic_t* sz_hdrmagic = (sz_hdrmagic_t*)buf;
-    
+    sz_hdrmagic_t *sz_hdrmagic = (sz_hdrmagic_t *) buf;
+
     int sz_data_pow2;
     DCFA_NEM_SZ_DATA_POW2(sz_hdrmagic->sz);
     //dprintf("recv_buf_released,sz=%d,pow2=%d\n", sz_hdrmagic->sz, sz_data_pow2);
 #if 1
     uint32_t offset;
-    for(offset = 0; ; offset = offset ? ( (((offset + 1) << 1) - 1) > DCFA_NEM_MAX_DATA_POW2 ? DCFA_NEM_MAX_DATA_POW2 : (((offset + 1) << 1) - 1) ) : 15) {
-        volatile tailmagic_t* ptr = (tailmagic_t*)(buf + offset);
-        MPIU_Assert(vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] <= ptr && ptr < vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZ);
-        ptr->magic = 0/*0xde*/; 
-        if(offset == sz_data_pow2) { break; }
+    for (offset = 0;;
+         offset =
+         offset ? ((((offset + 1) << 1) - 1) >
+                   DCFA_NEM_MAX_DATA_POW2 ? DCFA_NEM_MAX_DATA_POW2 : (((offset + 1) << 1) -
+                                                                      1)) : 15) {
+        volatile tailmagic_t *ptr = (tailmagic_t *) (buf + offset);
+        MPIU_Assert(vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] <= ptr &&
+                    ptr < vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZ);
+        ptr->magic = 0 /*0xde */ ;
+        if (offset == sz_data_pow2) {
+            break;
+        }
     }
 #endif
 #endif
 
-#if 1 /* moving from dcfa_poll */
+#if 1   /* moving from dcfa_poll */
     /* mark that one eager-send RDMA-write-to buffer has been released */
-    int index_slot = (unsigned long)(user_data - vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO]) / IBCOM_RDMABUF_SZSEG;
+    int index_slot =
+        (unsigned long) (user_data -
+                         vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO]) / IBCOM_RDMABUF_SZSEG;
     MPIU_Assert(0 <= index_slot && index_slot < IBCOM_RDMABUF_NSEG);
     //dprintf("user_data=%p,mem=%p,sub=%08lx,index_slot=%d\n", user_data, vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO], (unsigned long)user_data - (unsigned long)vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO], index_slot);
     //dprintf("index_slot=%d,released=%016lx\n", index_slot, vc_dcfa->ibcom->rsr_seq_num_released[index_slot / 64]);
@@ -1017,55 +1159,66 @@ int MPID_nem_dcfa_recv_buf_released(struct MPIDI_VC *vc, void* user_data) {
     int index_tail = (vc_dcfa->ibcom->rsr_seq_num_tail + 1) % IBCOM_RDMABUF_NSEG;
     //dprintf("tail+1=%d,index_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail + 1, index_tail);
     //dprintf("released=%016lx\n", vc_dcfa->ibcom->rsr_seq_num_released[index_tail / 64]);
-    if(1||(index_tail & 7) || MPID_nem_dcfa_diff32(index_slot, index_tail) >= IBCOM_RDMABUF_NSEG - 8) { /* avoid wrap-around */
-        while(1) {
-        if(((vc_dcfa->ibcom->rsr_seq_num_released[index_tail / 64] >> (index_tail & 63)) & 1) == 1) {
-            vc_dcfa->ibcom->rsr_seq_num_tail += 1;
-            vc_dcfa->ibcom->rsr_seq_num_released[index_tail / 64] &= ~(1ULL << (index_tail & 63));
-            dprintf("rsr_seq_num_tail,incremented to %d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
-        } else {
-            break;
-        }
+    if (1 || (index_tail & 7) || MPID_nem_dcfa_diff32(index_slot, index_tail) >= IBCOM_RDMABUF_NSEG - 8) {      /* avoid wrap-around */
+        while (1) {
+            if (((vc_dcfa->ibcom->
+                  rsr_seq_num_released[index_tail / 64] >> (index_tail & 63)) & 1) == 1) {
+                vc_dcfa->ibcom->rsr_seq_num_tail += 1;
+                vc_dcfa->ibcom->rsr_seq_num_released[index_tail / 64] &=
+                    ~(1ULL << (index_tail & 63));
+                dprintf("rsr_seq_num_tail,incremented to %d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
+            }
+            else {
+                break;
+            }
         }
-    } else {
-        if(((vc_dcfa->ibcom->rsr_seq_num_released[index_tail / 64] >> (index_tail & 63)) & 0xff) == 0xff) {
+    }
+    else {
+        if (((vc_dcfa->ibcom->rsr_seq_num_released[index_tail / 64] >> (index_tail & 63)) & 0xff) ==
+            0xff) {
             vc_dcfa->ibcom->rsr_seq_num_tail += 8;
-            vc_dcfa->ibcom->rsr_seq_num_released[index_tail / 64] &= ~(0xffULL << (index_tail & 63));
+            vc_dcfa->ibcom->rsr_seq_num_released[index_tail / 64] &=
+                ~(0xffULL << (index_tail & 63));
             //dprintf("released[index_tail/64]=%016lx\n", vc_dcfa->ibcom->rsr_seq_num_released[index_tail / 64]);
         }
     }
-    
+
     //dprintf("recv_buf_released,%d->%d,rsr_seq_num_tail=%d,rsr_seq_num_tail_last_sent=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent);
 
     int notify_rate;
-    ibcom_errno = ibcom_rdmabuf_occupancy_notify_rate_get(MPID_nem_dcfa_conns[vc->pg_rank].fd, &notify_rate);
-    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rdmabuf_occupancy_notify_rate_get");
+    ibcom_errno =
+        ibcom_rdmabuf_occupancy_notify_rate_get(MPID_nem_dcfa_conns[vc->pg_rank].fd, &notify_rate);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER,
+                        "**ibcom_rdmabuf_occupancy_notify_rate_get");
 
     /* if you missed the chance to make eager-send message piggy-back it */
-    if(MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent) > IBCOM_RDMABUF_OCCUPANCY_NOTIFY_RATE_DELAY_MULTIPLIER(notify_rate) 
-       //|| MPID_nem_dcfa_diff32(lsr_seq_num_head, vc_dcfa->ibcom->lsr_seq_num_tail_last_sent) == IBCOM_RDMABUF_NSEG
-       ) {
+    if (MPID_nem_dcfa_diff32
+        (vc_dcfa->ibcom->rsr_seq_num_tail,
+         vc_dcfa->ibcom->rsr_seq_num_tail_last_sent) >
+        IBCOM_RDMABUF_OCCUPANCY_NOTIFY_RATE_DELAY_MULTIPLIER(notify_rate)
+        //|| MPID_nem_dcfa_diff32(lsr_seq_num_head, vc_dcfa->ibcom->lsr_seq_num_tail_last_sent) == IBCOM_RDMABUF_NSEG
+) {
         MPID_Request *sreq;
         sreq = MPID_nem_dcfa_sendq_head(vc_dcfa->sendq);
-        if(sreq) {
+        if (sreq) {
             int msg_type = MPIDI_Request_get_msg_type(sreq);
-            MPIDI_CH3_Pkt_t* ch3_hdr = (MPIDI_CH3_Pkt_t *)sreq->dev.iov[0].MPID_IOV_BUF;
-            if(msg_type == MPIDI_REQUEST_EAGER_MSG && /* guard for the following pointer dereference */
-               ch3_hdr->type == MPIDI_NEM_DCFA_REPLY_SEQ_NUM) {
+            MPIDI_CH3_Pkt_t *ch3_hdr = (MPIDI_CH3_Pkt_t *) sreq->dev.iov[0].MPID_IOV_BUF;
+            if (msg_type == MPIDI_REQUEST_EAGER_MSG &&  /* guard for the following pointer dereference */
+                ch3_hdr->type == MPIDI_NEM_DCFA_REPLY_SEQ_NUM) {
                 goto skip;
             }
         }
         //printf("recv_buf_released,sending reply_seq_num,diff=%d,rate=%d,id=%d\n", MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent), notify_rate + (notify_rate>>1), vc_dcfa->ibcom->sseq_num);
         MPID_nem_dcfa_send_reply_seq_num(vc);
-    skip:;
+      skip:;
     }
 #endif
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_RECV_BUF_RELEASED);
     return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+  fn_fail:
+    goto fn_exit;
 }
 
 #if 0
@@ -1075,40 +1228,43 @@ int MPID_nem_dcfa_recv_buf_released(struct MPIDI_VC *vc, void* user_data) {
 #define FUNCNAME MPID_nem_dcfa_PktHandler_lmt_done
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_PktHandler_lmt_done(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp)
+int MPID_nem_dcfa_PktHandler_lmt_done(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                      MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp)
 {
     int mpi_errno = MPI_SUCCESS;
     int ibcom_errno;
-    MPID_nem_dcfa_pkt_lmt_done_t * const done_pkt = (MPID_nem_dcfa_pkt_lmt_done_t *)pkt;
+    MPID_nem_dcfa_pkt_lmt_done_t *const done_pkt = (MPID_nem_dcfa_pkt_lmt_done_t *) pkt;
     MPID_Request *req;
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_LMT_DONE);
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_LMT_DONE);
 
     /* Check the assumption on sizeof(MPIDI_CH3_Pkt_t).
-       It is utilized in pkt_DONE_handler (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c) 
-       that must be larger than sizeof(MPID_nem_dcfa_pkt_lmt_done_t) */
-    if(sizeof(MPID_nem_dcfa_pkt_lmt_done_t) > sizeof(MPIDI_CH3_Pkt_t)) {
+     * It is utilized in pkt_DONE_handler (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c)
+     * that must be larger than sizeof(MPID_nem_dcfa_pkt_lmt_done_t) */
+    if (sizeof(MPID_nem_dcfa_pkt_lmt_done_t) > sizeof(MPIDI_CH3_Pkt_t)) {
         MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_INTERN, "**sizeof(MPIDI_CH3_Pkt_t)");
     }
 
     /* fall back to the original handler */
-    /* we don't need to worry about the difference caused by embedding seq_num 
-       because the handler does not use it (e.g. applying sizeof operator to it) */
-    MPID_nem_pkt_lmt_done_t *pkt_parent_class = (MPID_nem_pkt_lmt_done_t *)pkt;
+    /* we don't need to worry about the difference caused by embedding seq_num
+     * because the handler does not use it (e.g. applying sizeof operator to it) */
+    MPID_nem_pkt_lmt_done_t *pkt_parent_class = (MPID_nem_pkt_lmt_done_t *) pkt;
     pkt_parent_class->type = MPIDI_NEM_PKT_LMT_DONE;
 #if 0
-    mpi_errno = MPID_nem_handle_pkt(vc, (char *)pkt_parent_class, *buflen);
+    mpi_errno = MPID_nem_handle_pkt(vc, (char *) pkt_parent_class, *buflen);
 #else
     MPIU_ERR_CHKANDJUMP(1, mpi_errno, MPI_ERR_OTHER, "**notimplemented");
     /* you need to modify mpid_nem_lmt.c to make pkt_DONE_handler visible to me */
     //mpi_errno = pkt_DONE_handler(vc, pkt, buflen, rreqp);
 #endif
-    if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+    if (mpi_errno) {
+        MPIU_ERR_POP(mpi_errno);
+    }
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_LMT_DONE);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 #endif
@@ -1119,10 +1275,14 @@ int MPID_nem_dcfa_PktHandler_lmt_done(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPID
 #define FUNCNAME MPID_nem_dcfa_PktHandler_EagerSend
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_PktHandler_EagerSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */) {
-    MPID_nem_dcfa_pkt_prefix_t *netmod_pkt = (MPID_nem_dcfa_pkt_prefix_t*)pkt;
-    MPIDI_CH3_Pkt_eager_send_t *ch3_pkt = (MPIDI_CH3_Pkt_eager_send_t*)((void*)pkt + sizeof(MPID_nem_dcfa_pkt_prefix_t));
-    MPID_Request * rreq;
+int MPID_nem_dcfa_PktHandler_EagerSend(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                       MPIDI_msg_sz_t * buflen /* out */ ,
+                                       MPID_Request ** rreqp /* out */)
+{
+    MPID_nem_dcfa_pkt_prefix_t *netmod_pkt = (MPID_nem_dcfa_pkt_prefix_t *) pkt;
+    MPIDI_CH3_Pkt_eager_send_t *ch3_pkt =
+        (MPIDI_CH3_Pkt_eager_send_t *) ((void *) pkt + sizeof(MPID_nem_dcfa_pkt_prefix_t));
+    MPID_Request *rreq;
     int found;
     int complete;
     char *data_buf;
@@ -1136,9 +1296,9 @@ int MPID_nem_dcfa_PktHandler_EagerSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MP
     printf("dcfa_pkthandler_eagersend,tag=%d\n", ch3_pkt->match.parts.tag);
 
     /* Check the assumption on sizeof(MPIDI_CH3_Pkt_t).
-       It is utilized to point the payload location in MPIDI_CH3_PktHandler_EagerSend
-       (src/mpid/ch3/src/ch3u_eager.c) that must be larger than sizeof(MPID_nem_dcfa_pkt_eager_send_t) */
-    //if(sizeof(MPID_nem_dcfa_pkt_eager_send_t) > sizeof(MPIDI_CH3_Pkt_t)) {
+     * It is utilized to point the payload location in MPIDI_CH3_PktHandler_EagerSend
+     * (src/mpid/ch3/src/ch3u_eager.c) that must be larger than sizeof(MPID_nem_dcfa_pkt_eager_send_t) */
+    //if (sizeof(MPID_nem_dcfa_pkt_eager_send_t) > sizeof(MPIDI_CH3_Pkt_t)) {
     //MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_INTERN, "**sizeof(MPIDI_CH3_Pkt_t)");
     //}
 
@@ -1147,50 +1307,58 @@ int MPID_nem_dcfa_PktHandler_EagerSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MP
     MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
     ibcom_errno = ibcom_lsr_seq_num_tail_get(vc_dcfa->sc->fd, &lsr_seq_num_tail);
     MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_lsr_seq_num_tail_get");
-    dprintf("MPID_nem_dcfa_PktHandler_EagerSend,lsr_seq_num_tail=%d,netmod_pkt->seq_num_tail=%d\n", *lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    dprintf("MPID_nem_dcfa_PktHandler_EagerSend,lsr_seq_num_tail=%d,netmod_pkt->seq_num_tail=%d\n",
+            *lsr_seq_num_tail, netmod_pkt->seq_num_tail);
     *lsr_seq_num_tail = DCFA_MAX(*lsr_seq_num_tail, netmod_pkt->seq_num_tail);
-    dprintf("MPID_nem_dcfa_PktHandler_EagerSend,lsr_seq_num_tail updated to %d\n", *lsr_seq_num_tail);
+    dprintf("MPID_nem_dcfa_PktHandler_EagerSend,lsr_seq_num_tail updated to %d\n",
+            *lsr_seq_num_tail);
 
 #ifndef DISABLE_VAR_OCC_NOTIFY_RATE
     /* change remote notification policy of RDMA-write-to buf */
-    dprintf("pkthandler,eagersend,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+    dprintf("pkthandler,eagersend,old rstate=%d\n",
+            vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
     MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, lsr_seq_num_tail);
-    dprintf("pkthandler,eagersend,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+    dprintf("pkthandler,eagersend,new rstate=%d\n",
+            vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
 #endif
-    
-    dprintf("pkthandler,eagersend,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+
+    dprintf("pkthandler,eagersend,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n",
+            MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom,
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
     /* try to send from sendq because at least one RDMA-write-to buffer has been released */
     /* calling drain_scq from progress_send derpives of chance
-       for dcfa_poll to drain sendq using ncqe
-       however transfers events to
-       (not to reply_seq_num because it's regulated by the rate)
-       fire on dcfa_poll using nces (e.g. MPI_Put) so we need to perform 
-       progress_send for all of VCs using nces in dcfa_poll.*/
-        dprintf("pkthandler,eagersend,send_progress\n");fflush(stdout);
-        MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS
-
-    /* fall back to the original handler */
-    /* we don't need to worry about the difference caused by embedding seq_num
-       because size of MPI-header of MPIDI_CH3_PKT_EAGER_SEND equals to sizeof(MPIDI_CH3_Pkt_t) 
-       see MPID_nem_dcfa_iSendContig
-     */
-    //ch3_pkt->type = MPIDI_CH3_PKT_EAGER_SEND;
+     * for dcfa_poll to drain sendq using ncqe
+     * however transfers events to
+     * (not to reply_seq_num because it's regulated by the rate)
+     * fire on dcfa_poll using nces (e.g. MPI_Put) so we need to perform
+     * progress_send for all of VCs using nces in dcfa_poll. */
+    dprintf("pkthandler,eagersend,send_progress\n");
+    fflush(stdout);
+    MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS
+        /* fall back to the original handler */
+        /* we don't need to worry about the difference caused by embedding seq_num
+         * because size of MPI-header of MPIDI_CH3_PKT_EAGER_SEND equals to sizeof(MPIDI_CH3_Pkt_t)
+         * see MPID_nem_dcfa_iSendContig
+         */
+        //ch3_pkt->type = MPIDI_CH3_PKT_EAGER_SEND;
 #if 0
-    mpi_errno = MPID_nem_handle_pkt(vc, (char *)pkt_parent_class, *buflen);
+        mpi_errno = MPID_nem_handle_pkt(vc, (char *) pkt_parent_class, *buflen);
 #else
-    printf("dcfa_poll.c,before PktHandler_EagerSend,buflen=%ld\n", *buflen);
+        printf("dcfa_poll.c,before PktHandler_EagerSend,buflen=%ld\n", *buflen);
     MPIDI_msg_sz_t ch3_buflen = *buflen - sizeof(MPID_nem_dcfa_pkt_prefix_t);
-    mpi_errno = MPIDI_CH3_PktHandler_EagerSend(vc, (MPIDI_CH3_Pkt_t*)ch3_pkt, &ch3_buflen, rreqp);
+    mpi_errno = MPIDI_CH3_PktHandler_EagerSend(vc, (MPIDI_CH3_Pkt_t *) ch3_pkt, &ch3_buflen, rreqp);
     printf("dcfa_poll.c,after PktHandler_EagerSend,buflen=%ld\n", ch3_buflen);
-    *buflen = ch3_buflen + sizeof(MPID_nem_dcfa_pkt_prefix_t); 
+    *buflen = ch3_buflen + sizeof(MPID_nem_dcfa_pkt_prefix_t);
     printf("dcfa_poll.c,after addition,buflen=%ld\n", *buflen);
 #endif
-    if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
- 
- fn_exit:
+    if (mpi_errno) {
+        MPIU_ERR_POP(mpi_errno);
+    }
+
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_EAGERSEND);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
@@ -1201,10 +1369,14 @@ int MPID_nem_dcfa_PktHandler_EagerSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MP
 #define FUNCNAME MPID_nem_dcfa_PktHandler_Put
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_PktHandler_Put( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */) {
-    MPID_nem_dcfa_pkt_prefix_t *netmod_pkt = (MPID_nem_dcfa_pkt_prefix_t*)pkt;
-    MPIDI_CH3_Pkt_put_t *ch3_pkt = (MPIDI_CH3_Pkt_put_t*)((void*)pkt + sizeof(MPID_nem_dcfa_pkt_prefix_t));
-    MPID_Request * rreq;
+int MPID_nem_dcfa_PktHandler_Put(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                 MPIDI_msg_sz_t * buflen /* out */ ,
+                                 MPID_Request ** rreqp /* out */)
+{
+    MPID_nem_dcfa_pkt_prefix_t *netmod_pkt = (MPID_nem_dcfa_pkt_prefix_t *) pkt;
+    MPIDI_CH3_Pkt_put_t *ch3_pkt =
+        (MPIDI_CH3_Pkt_put_t *) ((void *) pkt + sizeof(MPID_nem_dcfa_pkt_prefix_t));
+    MPID_Request *rreq;
     int found;
     int complete;
     char *data_buf;
@@ -1217,35 +1389,44 @@ int MPID_nem_dcfa_PktHandler_Put( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_ms
 
     /* Update occupation status of local SR (send request) queue */
     MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
-    dprintf("MPID_nem_dcfa_Pkthandler_Put,lsr_seq_num_tail=%d,put_pkt->seq_num_tail=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
-    vc_dcfa->ibcom->lsr_seq_num_tail = DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
-    dprintf("MPID_nem_dcfa_Pkthandler_Put,lsr_seq_num_tail updated to %d\n", vc_dcfa->ibcom->lsr_seq_num_tail);
+    dprintf("MPID_nem_dcfa_Pkthandler_Put,lsr_seq_num_tail=%d,put_pkt->seq_num_tail=%d\n",
+            vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    vc_dcfa->ibcom->lsr_seq_num_tail =
+        DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    dprintf("MPID_nem_dcfa_Pkthandler_Put,lsr_seq_num_tail updated to %d\n",
+            vc_dcfa->ibcom->lsr_seq_num_tail);
 
 #ifndef DISABLE_VAR_OCC_NOTIFY_RATE
     /* change remote notification policy of RDMA-write-to buf */
-    dprintf("pkthandler,put,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
-    MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, &vc_dcfa->ibcom->lsr_seq_num_tail);
-    dprintf("pkthandler,put,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
-#endif    
-    dprintf("pkthandler,put,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+    dprintf("pkthandler,put,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
+    MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa,
+                                                            &vc_dcfa->ibcom->lsr_seq_num_tail);
+    dprintf("pkthandler,put,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
+#endif
+    dprintf("pkthandler,put,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n",
+            MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom,
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
     /* try to send from sendq because at least one RDMA-write-to buffer has been released */
-    dprintf("pkthandler,put,send_progress\n");fflush(stdout);
+    dprintf("pkthandler,put,send_progress\n");
+    fflush(stdout);
     MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS;
-    
+
     /* fall back to the original handler */
     /* we don't need to worry about the difference caused by embedding seq_num
-       because size of MPI-header of MPIDI_CH3_PKT_PUT equals to sizeof(MPIDI_CH3_Pkt_t) 
-       see MPID_nem_dcfa_iSendContig
-    */
+     * because size of MPI-header of MPIDI_CH3_PKT_PUT equals to sizeof(MPIDI_CH3_Pkt_t)
+     * see MPID_nem_dcfa_iSendContig
+     */
     MPIDI_msg_sz_t ch3_buflen = *buflen - sizeof(MPID_nem_dcfa_pkt_prefix_t);
-    mpi_errno = MPIDI_CH3_PktHandler_Put(vc, (MPIDI_CH3_Pkt_t *)ch3_pkt, &ch3_buflen, rreqp);
+    mpi_errno = MPIDI_CH3_PktHandler_Put(vc, (MPIDI_CH3_Pkt_t *) ch3_pkt, &ch3_buflen, rreqp);
     *buflen = ch3_buflen + sizeof(MPID_nem_dcfa_pkt_prefix_t);
-    if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
- 
- fn_exit:
+    if (mpi_errno) {
+        MPIU_ERR_POP(mpi_errno);
+    }
+
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_PUT);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 #endif
@@ -1256,10 +1437,14 @@ int MPID_nem_dcfa_PktHandler_Put( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_ms
 #define FUNCNAME MPID_nem_dcfa_PktHandler_Accumulate
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_PktHandler_Accumulate( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */) {
-    MPID_nem_dcfa_pkt_prefix_t *netmod_pkt = (MPID_nem_dcfa_pkt_prefix_t*)pkt;
-    MPIDI_CH3_Pkt_accum_t *ch3_pkt = (MPIDI_CH3_Pkt_accum_t*)((void*)pkt + sizeof(MPID_nem_dcfa_pkt_prefix_t));
-    MPID_Request * rreq;
+int MPID_nem_dcfa_PktHandler_Accumulate(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                        MPIDI_msg_sz_t * buflen /* out */ ,
+                                        MPID_Request ** rreqp /* out */)
+{
+    MPID_nem_dcfa_pkt_prefix_t *netmod_pkt = (MPID_nem_dcfa_pkt_prefix_t *) pkt;
+    MPIDI_CH3_Pkt_accum_t *ch3_pkt =
+        (MPIDI_CH3_Pkt_accum_t *) ((void *) pkt + sizeof(MPID_nem_dcfa_pkt_prefix_t));
+    MPID_Request *rreq;
     int found;
     int complete;
     char *data_buf;
@@ -1272,36 +1457,44 @@ int MPID_nem_dcfa_PktHandler_Accumulate( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, M
 
     /* Update occupation status of local SR (send request) queue */
     MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
-    dprintf("MPID_nem_dcfa_Pkthandler_Accumulate,lsr_seq_num_tail=%d,accum_pkt->seq_num_tail=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
-    vc_dcfa->ibcom->lsr_seq_num_tail = DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
-    dprintf("MPID_nem_dcfa_Pkthandler_Accumulate,lsr_seq_num_tail updated to %d\n", vc_dcfa->ibcom->lsr_seq_num_tail);
+    dprintf("MPID_nem_dcfa_Pkthandler_Accumulate,lsr_seq_num_tail=%d,accum_pkt->seq_num_tail=%d\n",
+            vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    vc_dcfa->ibcom->lsr_seq_num_tail =
+        DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    dprintf("MPID_nem_dcfa_Pkthandler_Accumulate,lsr_seq_num_tail updated to %d\n",
+            vc_dcfa->ibcom->lsr_seq_num_tail);
 
 #ifndef DISABLE_VAR_OCC_NOTIFY_RATE
     /* change remote notification policy of RDMA-write-to buf */
-    dprintf("pkthandler,put,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
-    MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, &vc_dcfa->ibcom->lsr_seq_num_tail);
-    dprintf("pkthandler,put,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
-#endif    
-    dprintf("pkthandler,put,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+    dprintf("pkthandler,put,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
+    MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa,
+                                                            &vc_dcfa->ibcom->lsr_seq_num_tail);
+    dprintf("pkthandler,put,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
+#endif
+    dprintf("pkthandler,put,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n",
+            MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom,
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
     /* try to send from sendq because at least one RDMA-write-to buffer has been released */
-        dprintf("pkthandler,put,send_progress\n");fflush(stdout);
-        MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS
-
+    dprintf("pkthandler,put,send_progress\n");
+    fflush(stdout);
+    MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS
+        /* fall back to the original handler */
+        /* we don't need to worry about the difference caused by embedding seq_num
+         * because size of MPI-header of MPIDI_CH3_PKT_PUT equals to sizeof(MPIDI_CH3_Pkt_t)
+         * see MPID_nem_dcfa_iSendContig
+         */
+        MPIDI_msg_sz_t ch3_buflen = *buflen - sizeof(MPID_nem_dcfa_pkt_prefix_t);
+    mpi_errno =
+        MPIDI_CH3_PktHandler_Accumulate(vc, (MPIDI_CH3_Pkt_t *) ch3_pkt, &ch3_buflen, rreqp);
+    *buflen = ch3_buflen + sizeof(MPID_nem_dcfa_pkt_prefix_t);
+    if (mpi_errno) {
+        MPIU_ERR_POP(mpi_errno);
+    }
 
-    /* fall back to the original handler */
-    /* we don't need to worry about the difference caused by embedding seq_num
-       because size of MPI-header of MPIDI_CH3_PKT_PUT equals to sizeof(MPIDI_CH3_Pkt_t) 
-       see MPID_nem_dcfa_iSendContig
-     */
-    MPIDI_msg_sz_t ch3_buflen = *buflen - sizeof(MPID_nem_dcfa_pkt_prefix_t);
-    mpi_errno = MPIDI_CH3_PktHandler_Accumulate(vc, (MPIDI_CH3_Pkt_t *)ch3_pkt, &ch3_buflen, rreqp);
-    *buflen = ch3_buflen + sizeof(MPID_nem_dcfa_pkt_prefix_t); 
-    if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
- 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_ACCUMULATE);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
@@ -1311,10 +1504,14 @@ int MPID_nem_dcfa_PktHandler_Accumulate( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, M
 #define FUNCNAME MPID_nem_dcfa_PktHandler_Get
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_PktHandler_Get( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */) {
-    MPID_nem_dcfa_pkt_prefix_t *netmod_pkt = (MPID_nem_dcfa_pkt_prefix_t*)pkt;
-    MPIDI_CH3_Pkt_get_t *ch3_pkt = (MPIDI_CH3_Pkt_get_t*)((void*)pkt + sizeof(MPID_nem_dcfa_pkt_prefix_t));
-    MPID_Request * rreq;
+int MPID_nem_dcfa_PktHandler_Get(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                 MPIDI_msg_sz_t * buflen /* out */ ,
+                                 MPID_Request ** rreqp /* out */)
+{
+    MPID_nem_dcfa_pkt_prefix_t *netmod_pkt = (MPID_nem_dcfa_pkt_prefix_t *) pkt;
+    MPIDI_CH3_Pkt_get_t *ch3_pkt =
+        (MPIDI_CH3_Pkt_get_t *) ((void *) pkt + sizeof(MPID_nem_dcfa_pkt_prefix_t));
+    MPID_Request *rreq;
     int found;
     int complete;
     char *data_buf;
@@ -1327,35 +1524,43 @@ int MPID_nem_dcfa_PktHandler_Get( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_ms
 
     /* Update occupation status of local SR (send request) queue */
     MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
-    dprintf("MPID_nem_dcfa_Pkthandler_Get,lsr_seq_num_tail=%d,get_pkt->seq_num_tail=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
-    vc_dcfa->ibcom->lsr_seq_num_tail = DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
-    dprintf("MPID_nem_dcfa_Pkthandler_Get,lsr_seq_num_tail updated to %d\n", vc_dcfa->ibcom->lsr_seq_num_tail);
+    dprintf("MPID_nem_dcfa_Pkthandler_Get,lsr_seq_num_tail=%d,get_pkt->seq_num_tail=%d\n",
+            vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    vc_dcfa->ibcom->lsr_seq_num_tail =
+        DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    dprintf("MPID_nem_dcfa_Pkthandler_Get,lsr_seq_num_tail updated to %d\n",
+            vc_dcfa->ibcom->lsr_seq_num_tail);
 
 #ifndef DISABLE_VAR_OCC_NOTIFY_RATE
     /* change remote notification policy of RDMA-write-to buf */
-    dprintf("pkthandler,put,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
-    MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, &vc_dcfa->ibcom->lsr_seq_num_tail);
-    dprintf("pkthandler,put,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
-#endif    
-    dprintf("pkthandler,put,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+    dprintf("pkthandler,put,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
+    MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa,
+                                                            &vc_dcfa->ibcom->lsr_seq_num_tail);
+    dprintf("pkthandler,put,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
+#endif
+    dprintf("pkthandler,put,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n",
+            MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom,
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
     /* try to send from sendq because at least one RDMA-write-to buffer has been released */
-        dprintf("pkthandler,get,send_progress\n");fflush(stdout);
-        MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET 
-
-    /* fall back to the original handler */
-    /* we don't need to worry about the difference caused by embedding seq_num
-       because size of MPI-header of MPIDI_CH3_PKT_PUT equals to sizeof(MPIDI_CH3_Pkt_t) 
-       see MPID_nem_dcfa_iSendContig
-     */
-    MPIDI_msg_sz_t ch3_buflen = *buflen - sizeof(MPID_nem_dcfa_pkt_prefix_t);
-    mpi_errno = MPIDI_CH3_PktHandler_Get(vc, (MPIDI_CH3_Pkt_t *)ch3_pkt, &ch3_buflen, rreqp);
+    dprintf("pkthandler,get,send_progress\n");
+    fflush(stdout);
+    MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET
+        /* fall back to the original handler */
+        /* we don't need to worry about the difference caused by embedding seq_num
+         * because size of MPI-header of MPIDI_CH3_PKT_PUT equals to sizeof(MPIDI_CH3_Pkt_t)
+         * see MPID_nem_dcfa_iSendContig
+         */
+        MPIDI_msg_sz_t ch3_buflen = *buflen - sizeof(MPID_nem_dcfa_pkt_prefix_t);
+    mpi_errno = MPIDI_CH3_PktHandler_Get(vc, (MPIDI_CH3_Pkt_t *) ch3_pkt, &ch3_buflen, rreqp);
     *buflen = ch3_buflen + sizeof(MPID_nem_dcfa_pkt_prefix_t);
-    if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
- 
- fn_exit:
+    if (mpi_errno) {
+        MPIU_ERR_POP(mpi_errno);
+    }
+
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_GET);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
@@ -1365,10 +1570,14 @@ int MPID_nem_dcfa_PktHandler_Get( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_ms
 #define FUNCNAME MPID_nem_dcfa_PktHandler_GetResp
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_PktHandler_GetResp( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */) {
-    MPID_nem_dcfa_pkt_prefix_t *netmod_pkt = (MPID_nem_dcfa_pkt_prefix_t*)pkt;
-    MPIDI_CH3_Pkt_get_t *ch3_pkt = (MPIDI_CH3_Pkt_get_t*)((void*)pkt + sizeof(MPID_nem_dcfa_pkt_prefix_t));
-    MPID_Request * rreq;
+int MPID_nem_dcfa_PktHandler_GetResp(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                     MPIDI_msg_sz_t * buflen /* out */ ,
+                                     MPID_Request ** rreqp /* out */)
+{
+    MPID_nem_dcfa_pkt_prefix_t *netmod_pkt = (MPID_nem_dcfa_pkt_prefix_t *) pkt;
+    MPIDI_CH3_Pkt_get_t *ch3_pkt =
+        (MPIDI_CH3_Pkt_get_t *) ((void *) pkt + sizeof(MPID_nem_dcfa_pkt_prefix_t));
+    MPID_Request *rreq;
     int found;
     int complete;
     char *data_buf;
@@ -1381,35 +1590,43 @@ int MPID_nem_dcfa_PktHandler_GetResp( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPID
 
     /* Update occupation status of local SR (send request) queue */
     MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
-    dprintf("MPID_nem_dcfa_Pkthandler_GetResp,lsr_seq_num_tail=%d,get_pkt->seq_num_tail=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
-    vc_dcfa->ibcom->lsr_seq_num_tail = DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
-    dprintf("MPID_nem_dcfa_Pkthandler_GetResp,lsr_seq_num_tail updated to %d\n", vc_dcfa->ibcom->lsr_seq_num_tail);
+    dprintf("MPID_nem_dcfa_Pkthandler_GetResp,lsr_seq_num_tail=%d,get_pkt->seq_num_tail=%d\n",
+            vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    vc_dcfa->ibcom->lsr_seq_num_tail =
+        DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    dprintf("MPID_nem_dcfa_Pkthandler_GetResp,lsr_seq_num_tail updated to %d\n",
+            vc_dcfa->ibcom->lsr_seq_num_tail);
 
 #ifndef DISABLE_VAR_OCC_NOTIFY_RATE
     /* change remote notification policy of RDMA-write-to buf */
-    dprintf("pkthandler,put,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
-    MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, &vc_dcfa->ibcom->lsr_seq_num_tail);
-    dprintf("pkthandler,put,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
-#endif    
-    dprintf("pkthandler,put,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+    dprintf("pkthandler,put,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
+    MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa,
+                                                            &vc_dcfa->ibcom->lsr_seq_num_tail);
+    dprintf("pkthandler,put,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
+#endif
+    dprintf("pkthandler,put,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n",
+            MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom,
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
     /* try to send from sendq because at least one RDMA-write-to buffer has been released */
-        dprintf("pkthandler,get,send_progress\n");fflush(stdout);
-        MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET 
-
-    /* fall back to the original handler */
-    /* we don't need to worry about the difference caused by embedding seq_num
-       because size of MPI-header of MPIDI_CH3_PKT_PUT equals to sizeof(MPIDI_CH3_Pkt_t) 
-       see MPID_nem_dcfa_iSendContig
-     */
-    MPIDI_msg_sz_t ch3_buflen = *buflen - sizeof(MPID_nem_dcfa_pkt_prefix_t);
-    mpi_errno = MPIDI_CH3_PktHandler_GetResp(vc, (MPIDI_CH3_Pkt_t *)ch3_pkt, &ch3_buflen, rreqp);
+    dprintf("pkthandler,get,send_progress\n");
+    fflush(stdout);
+    MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET
+        /* fall back to the original handler */
+        /* we don't need to worry about the difference caused by embedding seq_num
+         * because size of MPI-header of MPIDI_CH3_PKT_PUT equals to sizeof(MPIDI_CH3_Pkt_t)
+         * see MPID_nem_dcfa_iSendContig
+         */
+        MPIDI_msg_sz_t ch3_buflen = *buflen - sizeof(MPID_nem_dcfa_pkt_prefix_t);
+    mpi_errno = MPIDI_CH3_PktHandler_GetResp(vc, (MPIDI_CH3_Pkt_t *) ch3_pkt, &ch3_buflen, rreqp);
     *buflen = ch3_buflen + sizeof(MPID_nem_dcfa_pkt_prefix_t);
-    if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
- 
- fn_exit:
+    if (mpi_errno) {
+        MPIU_ERR_POP(mpi_errno);
+    }
+
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_GETRESP);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
@@ -1418,11 +1635,12 @@ int MPID_nem_dcfa_PktHandler_GetResp( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPID
 #define FUNCNAME MPID_nem_dcfa_pkt_GET_DONE_handler
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_pkt_GET_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp)
+int MPID_nem_dcfa_pkt_GET_DONE_handler(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                       MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp)
 {
     int mpi_errno = MPI_SUCCESS;
     int ibcom_errno;
-    MPID_nem_dcfa_pkt_lmt_get_done_t * const done_pkt = (MPID_nem_dcfa_pkt_lmt_get_done_t *)pkt;
+    MPID_nem_dcfa_pkt_lmt_get_done_t *const done_pkt = (MPID_nem_dcfa_pkt_lmt_get_done_t *) pkt;
     MPID_Request *req;
     MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_PKT_GET_DONE_HANDLER);
@@ -1435,10 +1653,9 @@ int MPID_nem_dcfa_pkt_GET_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPI
 
     MPIU_THREAD_CS_ENTER(LMT,);
 
-    switch (MPIDI_Request_get_type(req))
-    {
-    /* MPIDI_Request_set_type is not performed when
-       MPID_Isend --> FDU_or_AEP --> recv_posted --> dcfa_poll --> PUTCTS packet-handler */
+    switch (MPIDI_Request_get_type(req)) {
+        /* MPIDI_Request_set_type is not performed when
+         * MPID_Isend --> FDU_or_AEP --> recv_posted --> dcfa_poll --> PUTCTS packet-handler */
     case MPIDI_REQUEST_TYPE_RECV:
         MPIU_ERR_INTERNALANDJUMP(mpi_errno, "unexpected request type");
         break;
@@ -1447,28 +1664,35 @@ int MPID_nem_dcfa_pkt_GET_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPI
     case MPIDI_REQUEST_TYPE_SSEND:
     case MPIDI_REQUEST_TYPE_BSEND:
         /* extract embeded RDMA-write-to buffer occupancy information */
-        dprintf("get_done_handler,old lsr_seq_num_tail=%d,done_pkt->seq_num_tail=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail, done_pkt->seq_num_tail);
-        vc_dcfa->ibcom->lsr_seq_num_tail = DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, done_pkt->seq_num_tail);
+        dprintf("get_done_handler,old lsr_seq_num_tail=%d,done_pkt->seq_num_tail=%d\n",
+                vc_dcfa->ibcom->lsr_seq_num_tail, done_pkt->seq_num_tail);
+        vc_dcfa->ibcom->lsr_seq_num_tail =
+            DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, done_pkt->seq_num_tail);
         //dprintf("lmt_start_recv,new lsr_seq_num=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail);
-        
+
 #ifndef DISABLE_VAR_OCC_NOTIFY_RATE
         /* change remote notification policy of RDMA-write-to buf */
-        //dprintf("lmt_start_recv,reply_seq_num,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
-        MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, &vc_dcfa->ibcom->lsr_seq_num_tail);
-        //dprintf("lmt_start_recv,reply_seq_num,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
-#endif        
+        //dprintf("lmt_start_recv,reply_seq_num,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
+        MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa,
+                                                                &vc_dcfa->ibcom->lsr_seq_num_tail);
+        //dprintf("lmt_start_recv,reply_seq_num,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
+#endif
         //dprintf("lmt_start_recv,reply_seq_num,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
         /* try to send from sendq because at least one RDMA-write-to buffer has been released */
         //dprintf("lmt_start_recv,reply_seq_num,send_progress\n");
-        if(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) {
-            dprintf("get_done_handler,ncom=%d,ncqe=%d,diff=%d(%d-%d)\n", vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY, MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG,
-                   vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail);
+        if (!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) {
+            dprintf("get_done_handler,ncom=%d,ncqe=%d,diff=%d(%d-%d)\n",
+                    vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY,
+                    MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY,
+                    MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num,
+                                         vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG,
+                    vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail);
         }
-            dprintf("get_done_handler,send_progress\n");fflush(stdout);
-            MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS
-
-        mpi_errno = vc->ch.lmt_done_send(vc, req);
-        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        dprintf("get_done_handler,send_progress\n");
+        fflush(stdout);
+        MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS mpi_errno = vc->ch.lmt_done_send(vc, req);
+        if (mpi_errno)
+            MPIU_ERR_POP(mpi_errno);
         break;
     default:
         MPIU_ERR_INTERNALANDJUMP(mpi_errno, "unexpected request type");
@@ -1477,11 +1701,11 @@ int MPID_nem_dcfa_pkt_GET_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPI
 
     *rreqp = NULL;
 
- fn_exit:
+  fn_exit:
     MPIU_THREAD_CS_EXIT(LMT,);
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKT_GET_DONE_HANDLER);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
@@ -1489,11 +1713,12 @@ int MPID_nem_dcfa_pkt_GET_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPI
 #define FUNCNAME MPID_nem_dcfa_PktHandler_req_seq_num
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_PktHandler_req_seq_num(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp)
+int MPID_nem_dcfa_PktHandler_req_seq_num(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                         MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp)
 {
     int mpi_errno = MPI_SUCCESS;
     int ibcom_errno;
-    MPID_nem_dcfa_pkt_req_seq_num_t * const req_pkt = (MPID_nem_dcfa_pkt_req_seq_num_t *)pkt;
+    MPID_nem_dcfa_pkt_req_seq_num_t *const req_pkt = (MPID_nem_dcfa_pkt_req_seq_num_t *) pkt;
     MPID_Request *req;
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_REQ_SEQ_NUM);
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_REQ_SEQ_NUM);
@@ -1507,24 +1732,25 @@ int MPID_nem_dcfa_PktHandler_req_seq_num(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, M
     /* update occupancy info of SR */
     /* request piggy-backs seq_num although it's requesting responder's seq_num */
     MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
-    vc_dcfa->ibcom->lsr_seq_num_tail = DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, req_pkt->seq_num_tail);
+    vc_dcfa->ibcom->lsr_seq_num_tail =
+        DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, req_pkt->seq_num_tail);
 
     dprintf("PktHandler_req_seq_num,sendq=%d,ncom=%d,ncqe=%d,diff=%d(%d-%d)\n",
-           MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq),
-           vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY,
-           MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY,
-           MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG,
-           vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail
-           );
+            MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq),
+            vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY,
+            MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY,
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num,
+                                 vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG,
+            vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail);
 
     /* send reply */
     dprintf("PktHandler_req_seq_num,sending reply_seq_num,id=%d\n", vc_dcfa->ibcom->sseq_num);
     MPID_nem_dcfa_send_reply_seq_num(vc);
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_REQ_SEQ_NUM);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
@@ -1532,11 +1758,12 @@ int MPID_nem_dcfa_PktHandler_req_seq_num(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, M
 #define FUNCNAME MPID_nem_dcfa_PktHandler_reply_seq_num
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_PktHandler_reply_seq_num(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp)
+int MPID_nem_dcfa_PktHandler_reply_seq_num(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                           MPIDI_msg_sz_t * buflen, MPID_Request ** rreqp)
 {
     int mpi_errno = MPI_SUCCESS;
     int ibcom_errno;
-    MPID_nem_dcfa_pkt_reply_seq_num_t * const reply_pkt = (MPID_nem_dcfa_pkt_reply_seq_num_t *)pkt;
+    MPID_nem_dcfa_pkt_reply_seq_num_t *const reply_pkt = (MPID_nem_dcfa_pkt_reply_seq_num_t *) pkt;
     MPID_Request *req;
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_REPLY_SEQ_NUM);
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_REPLY_SEQ_NUM);
@@ -1551,7 +1778,8 @@ int MPID_nem_dcfa_PktHandler_reply_seq_num(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
     /* update occupancy info of RDMA-write-buf */
     int *lsr_seq_num_tail;
     MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
-    dprintf("pkthandler,reply_seq_num,old lsr_seq_num=%d,reply_pkt->seq_num_tail=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail, reply_pkt->seq_num_tail);
+    dprintf("pkthandler,reply_seq_num,old lsr_seq_num=%d,reply_pkt->seq_num_tail=%d\n",
+            vc_dcfa->ibcom->lsr_seq_num_tail, reply_pkt->seq_num_tail);
     ibcom_errno = ibcom_lsr_seq_num_tail_get(vc_dcfa->sc->fd, &lsr_seq_num_tail);
     MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_lsr_seq_num_tail_get");
     *lsr_seq_num_tail = DCFA_MAX(*lsr_seq_num_tail, reply_pkt->seq_num_tail);
@@ -1559,21 +1787,19 @@ int MPID_nem_dcfa_PktHandler_reply_seq_num(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 
 #ifndef DISABLE_VAR_OCC_NOTIFY_RATE
     /* change remote notification policy of RDMA-write-to buf */
-    //dprintf("pkthandler,reply_seq_num,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+    //dprintf("pkthandler,reply_seq_num,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
     MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, lsr_seq_num_tail);
-    //dprintf("pkthandler,reply_seq_num,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+    //dprintf("pkthandler,reply_seq_num,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
 #endif
 
     //dprintf("pkthandler,reply_seq_num,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
     /* try to send from sendq because at least one RDMA-write-to buffer has been released */
     //dprintf("pkthandler,reply_seq_num,send_progress\n");
-        dprintf("pkthandler,reply_seq_num,send_progress\n");
-        MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS
-
- fn_exit:
+    dprintf("pkthandler,reply_seq_num,send_progress\n");
+  MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_REPLY_SEQ_NUM);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
@@ -1581,11 +1807,15 @@ int MPID_nem_dcfa_PktHandler_reply_seq_num(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 #define FUNCNAME MPID_nem_dcfa_PktHandler_change_rdmabuf_occupancy_notify_state
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_PktHandler_change_rdmabuf_occupancy_notify_state(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp)
+int MPID_nem_dcfa_PktHandler_change_rdmabuf_occupancy_notify_state(MPIDI_VC_t * vc,
+                                                                   MPIDI_CH3_Pkt_t * pkt,
+                                                                   MPIDI_msg_sz_t * buflen,
+                                                                   MPID_Request ** rreqp)
 {
     int mpi_errno = MPI_SUCCESS;
     int ibcom_errno;
-    MPID_nem_dcfa_pkt_change_rdmabuf_occupancy_notify_state_t * const reply_pkt = (MPID_nem_dcfa_pkt_change_rdmabuf_occupancy_notify_state_t *)pkt;
+    MPID_nem_dcfa_pkt_change_rdmabuf_occupancy_notify_state_t *const reply_pkt =
+        (MPID_nem_dcfa_pkt_change_rdmabuf_occupancy_notify_state_t *) pkt;
     MPID_Request *req;
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_CHANGE_RDMABUF_OCCUPANCY_NOTIFY_STATE);
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_CHANGE_RDMABUF_OCCUPANCY_NOTIFY_STATE);
@@ -1599,17 +1829,22 @@ int MPID_nem_dcfa_PktHandler_change_rdmabuf_occupancy_notify_state(MPIDI_VC_t *v
 
     /* update occupancy info of SR */
     MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
-    dprintf("pkthandler,change notify state,old lstate=%d,pkt->state=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_lstate, reply_pkt->state);
+    dprintf("pkthandler,change notify state,old lstate=%d,pkt->state=%d\n",
+            vc_dcfa->ibcom->rdmabuf_occupancy_notify_lstate, reply_pkt->state);
     int *rdmabuf_occupancy_notify_lstate;
-    ibcom_errno = ibcom_rdmabuf_occupancy_notify_lstate_get(vc_dcfa->sc->fd, &rdmabuf_occupancy_notify_lstate);
-    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rdmabuf_occupancy_notify_lstate_get"); 
+    ibcom_errno =
+        ibcom_rdmabuf_occupancy_notify_lstate_get(vc_dcfa->sc->fd,
+                                                  &rdmabuf_occupancy_notify_lstate);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER,
+                        "**ibcom_rdmabuf_occupancy_notify_lstate_get");
     *rdmabuf_occupancy_notify_lstate = reply_pkt->state;
-    dprintf("pkthandler,change notify state,new lstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_lstate);
+    dprintf("pkthandler,change notify state,new lstate=%d\n",
+            vc_dcfa->ibcom->rdmabuf_occupancy_notify_lstate);
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_CHANGE_RDMABUF_OCCUPANCY_NOTIFY_STATE);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
@@ -1618,7 +1853,8 @@ int MPID_nem_dcfa_PktHandler_change_rdmabuf_occupancy_notify_state(MPIDI_VC_t *v
 #define FUNCNAME MPID_nem_dcfa_cm_drain_scq
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_cm_drain_scq() {
+int MPID_nem_dcfa_cm_drain_scq()
+{
 
     int mpi_errno = MPI_SUCCESS;
     int result;
@@ -1630,41 +1866,49 @@ int MPID_nem_dcfa_cm_drain_scq() {
 
     result = ibv_poll_cq(rc_shared_scq_lmt_put, IBCOM_MAX_CQ_HEIGHT_DRAIN, &cqe[0]);
     MPIU_ERR_CHKANDJUMP(result < 0, mpi_errno, MPI_ERR_OTHER, "**netmod,dcfa,ibv_poll_cq");
-    
-    if(result > 0) { dprintf("cm_drain_scq,found,result=%d\n", result); }
-    for(i = 0; i < result; i++)  {
-        
+
+    if (result > 0) {
+        dprintf("cm_drain_scq,found,result=%d\n", result);
+    }
+    for (i = 0; i < result; i++) {
+
 #ifdef DCFA
-        if(cqe[i].status != IBV_WC_SUCCESS) { dprintf("cm_drain_scq,status=%08x\n", cqe[i].status); }
+        if (cqe[i].status != IBV_WC_SUCCESS) {
+            dprintf("cm_drain_scq,status=%08x\n", cqe[i].status);
+        }
 #else
-        if(cqe[i].status != IBV_WC_SUCCESS) { dprintf("cm_drain_scq,status=%08x,%s\n", cqe[i].status, ibv_wc_status_str(cqe[i].status)); }
+        if (cqe[i].status != IBV_WC_SUCCESS) {
+            dprintf("cm_drain_scq,status=%08x,%s\n", cqe[i].status,
+                    ibv_wc_status_str(cqe[i].status));
+        }
 #endif
-        MPIU_ERR_CHKANDJUMP(cqe[i].status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_drain_scq");
+        MPIU_ERR_CHKANDJUMP(cqe[i].status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER,
+                            "**MPID_nem_dcfa_cm_drain_scq");
         /* TODO retry a connection request when it timed out */
 
-        if(cqe.wr_id == MPID_NEM_DCFA_SYN ||
-           cqe.wr_id == MPID_NEM_DCFA_SYNACK) {
+        if (cqe.wr_id == MPID_NEM_DCFA_SYN || cqe.wr_id == MPID_NEM_DCFA_SYNACK) {
 
             MPID_nem_dcfa_conn_ud_ibcom->ncom_lmt_put -= 1;
             MPID_nem_dcfa_ncqe_connect -= 1;
-            
+
             /* Try to send from sendq_connect */
-            if(!MPID_nem_dcfa_sendq_empty(sendq_connect) &&
-               MPID_nem_dcfa_ncom_lmt_put < IBCOM_MAX_SQ_CAPACITY &&
-               MPID_nem_dcfa_ncqe_lmt_put < IBCOM_MAX_CQ_CAPACITY) {
-                MPID_nem_dcfa_send_progress_connect(); 
+            if (!MPID_nem_dcfa_sendq_empty(sendq_connect) &&
+                MPID_nem_dcfa_ncom_lmt_put < IBCOM_MAX_SQ_CAPACITY &&
+                MPID_nem_dcfa_ncqe_lmt_put < IBCOM_MAX_CQ_CAPACITY) {
+                MPID_nem_dcfa_send_progress_connect();
             }
-        } else {
+        }
+        else {
             printf("unknown command=%d\n", cqe.wr_id);
             MPIU_ERR_CHKANDJUMP(1, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_drain_scq");
         }
     }
-    
- fn_exit:
+
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_CM_DRAIN_SCQ);
     return mpi_errno;
- fn_fail:
-   goto fn_exit;   
+  fn_fail:
+    goto fn_exit;
 }
 
 #undef FUNCNAME
@@ -1681,17 +1925,21 @@ int MPID_nem_dcfa_cm_poll()
     dprintf("cm_poll,enter\n");
 
 
-    volatile uint32_t* owner = (uint32_t*)(ibcom_scratch_pad->icom_mem[IBCOM_SCRATCH_PAD_TO]);
-    if(*owner == (uint32_t)-1) { goto fn_exit; } /* not acquired */
+    volatile uint32_t *owner = (uint32_t *) (ibcom_scratch_pad->icom_mem[IBCOM_SCRATCH_PAD_TO]);
+    if (*owner == (uint32_t) - 1) {
+        goto fn_exit;
+    }   /* not acquired */
 
-    IbCom* ibcom_scratch_pad;
-    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_scratch_pad_fds[*owner], &ibcom_scratch_pad); 
+    IbCom *ibcom_scratch_pad;
+    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_scratch_pad_fds[*owner], &ibcom_scratch_pad);
     MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_obtain_pointer");
 
-    MPID_nem_dcfa_cm_cmd_t* received = (MPID_nem_dcfa_cm_cmd_t*)(ibcom_scratch_pad->icom_mem[IBCOM_SCRATCH_PAD_TO] + sizeof(uint32_t));
+    MPID_nem_dcfa_cm_cmd_t *received =
+        (MPID_nem_dcfa_cm_cmd_t *) (ibcom_scratch_pad->icom_mem[IBCOM_SCRATCH_PAD_TO] +
+                                    sizeof(uint32_t));
     MPID_nem_dcfa_cm_cmd_t cmd;
     MPID_nem_dcfa_vc_area *vc_dcfa;
-    switch(received->type) {
+    switch (received->type) {
     case MPID_NEM_DCFA_CM_SYN:
         ibcom_errno = ibcomOpen(ib_port, IBCOM_OPEN_RC, &MPID_nem_dcfa_conns[*owner].fd);
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcomOpen");
@@ -1699,43 +1947,56 @@ int MPID_nem_dcfa_cm_poll()
         goto common_tail;
         break;
     case MPID_NEM_DCFA_CM_BUSINESSCARD:
-        ibcom_errno = ibcom_rts(MPID_nem_dcfa_conns[*owner].fd, received->qpnum, received->lid, &(received->gid));
+        ibcom_errno =
+            ibcom_rts(MPID_nem_dcfa_conns[*owner].fd, received->qpnum, received->lid,
+                      &(received->gid));
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rts");
-        ibcom_errno = ibcom_reg_mr_connect(MPID_nem_dcfa_conns[*owner].fd, received->rmem, received->rkey);
+        ibcom_errno =
+            ibcom_reg_mr_connect(MPID_nem_dcfa_conns[*owner].fd, received->rmem, received->rkey);
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_reg_mr_connect");
-        VC_FIELD(MPID_nem_dcfa_conns[owner].vc,is_connected) = 1;
+        VC_FIELD(MPID_nem_dcfa_conns[owner].vc, is_connected) = 1;
 
         cmd.type = MPID_NEM_DCFA_CM_ACK;
-        common_tail:        
-        ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[*owner].fd, IBCOM_INFOKEY_PORT_LID, &(cmd.lid), sizeof(uint16_t));
+      common_tail:
+        ibcom_errno =
+            ibcom_get_info_conn(MPID_nem_dcfa_conns[*owner].fd, IBCOM_INFOKEY_PORT_LID, &(cmd.lid),
+                                sizeof(uint16_t));
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
 
-        ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[*owner].fd, IBCOM_INFOKEY_PORT_GID, &(cmd.gid), sizeof(union ibv_gid));
+        ibcom_errno =
+            ibcom_get_info_conn(MPID_nem_dcfa_conns[*owner].fd, IBCOM_INFOKEY_PORT_GID, &(cmd.gid),
+                                sizeof(union ibv_gid));
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
 
-        ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[*owner].fd, IBCOM_INFOKEY_QP_QPN, &(cmd.qpnum), sizeof(uint32_t));
+        ibcom_errno =
+            ibcom_get_info_conn(MPID_nem_dcfa_conns[*owner].fd, IBCOM_INFOKEY_QP_QPN, &(cmd.qpnum),
+                                sizeof(uint32_t));
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
 
-        ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_conns[*owner].fd, IBCOM_SCRATCH_PAD_TO, IBCOM_INFOKEY_MR_ADDR, &(cmd.rmem), sizeof(void*));
+        ibcom_errno =
+            ibcom_get_info_mr(MPID_nem_dcfa_conns[*owner].fd, IBCOM_SCRATCH_PAD_TO,
+                              IBCOM_INFOKEY_MR_ADDR, &(cmd.rmem), sizeof(void *));
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
 
-        ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_conns[*owner].fd, IBCOM_SCRATCH_PAD_TO, IBCOM_INFOKEY_MR_RKEY, &(cmd.rkey), sizeof(int));
+        ibcom_errno =
+            ibcom_get_info_mr(MPID_nem_dcfa_conns[*owner].fd, IBCOM_SCRATCH_PAD_TO,
+                              IBCOM_INFOKEY_MR_RKEY, &(cmd.rkey), sizeof(int));
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
 
-        *owner = (uint32_t)-1; /* release */
+        *owner = (uint32_t) - 1;        /* release */
 
         mpi_errno = MPID_nem_dcfa_cm_send_core(rank, &cmd);
-        MPIU_ERR_CHKANDJUMP(mp_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_send_core"); 
+        MPIU_ERR_CHKANDJUMP(mp_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_send_core");
         break;
     default:
         printf("unknown connection command\n");
         MPIU_ERR_CHKANDJUMP(1, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_poll");
     }
 
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_CM_POLL);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
@@ -1743,7 +2004,8 @@ int MPID_nem_dcfa_cm_poll()
 #define FUNCNAME MPID_nem_dcfa_cm_accept
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_cm_accept() {
+int MPID_nem_dcfa_cm_accept()
+{
     int mpi_errno = MPI_SUCCESS;
     int ibcom_errno;
     int result;
@@ -1756,58 +2018,68 @@ int MPID_nem_dcfa_cm_accept() {
     result = ibv_poll_cq(ud_shared_rcq, IBCOM_MAX_CQ_HEIGHT_DRAIN, &cqe);
     MPIU_ERR_CHKANDJUMP(result < 0, mpi_errno, MPI_ERR_OTHER, "**netmod,dcfa,ibv_poll_cq");
 
-    if(result > 0) {
-        dprintf("accept,result=%d\n", result); 
+    if (result > 0) {
+        dprintf("accept,result=%d\n", result);
     }
-    for(i = 0; i < result; i++)  {
+    for (i = 0; i < result; i++) {
         dprintf("accept,i=%d\n", i);
 
-        MPIU_ERR_CHKANDJUMP(cqe.status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_accept");
-        
-        void* rbuf;
+        MPIU_ERR_CHKANDJUMP(cqe.status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER,
+                            "**MPID_nem_dcfa_cm_accept");
+
+        void *rbuf;
         ibcom_errno = ibcom_mem_udwr_to(MPID_nem_dcfa_conn_ud_fd, &rbuf);
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_mem_udwr_to");
-        MPID_nem_dcfa_conn_pkt_t* rpkt = (MPID_nem_dcfa_conn_pkt_t*)(rbuf + 40);
-        if(rpkt->type == MPID_NEM_DCFA_SYN) {
+        MPID_nem_dcfa_conn_pkt_t *rpkt = (MPID_nem_dcfa_conn_pkt_t *) (rbuf + 40);
+        if (rpkt->type == MPID_NEM_DCFA_SYN) {
 
-            dprintf("accept,%d<-%d,type=%08x\n", MPID_nem_dcfa_myrank, rpkt->remote_rank, rpkt->type);                            
+            dprintf("accept,%d<-%d,type=%08x\n", MPID_nem_dcfa_myrank, rpkt->remote_rank,
+                    rpkt->type);
 
-            void* sbuf;
+            void *sbuf;
             ibcom_errno = ibcom_mem_udwr_from(MPID_nem_dcfa_conn_ud_fd, &sbuf);
             MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_mem_udwr_from");
-            MPID_nem_dcfa_conn_pkt_t* spkt = (MPID_nem_dcfa_conn_pkt_t*)(sbuf + 40);
+            MPID_nem_dcfa_conn_pkt_t *spkt = (MPID_nem_dcfa_conn_pkt_t *) (sbuf + 40);
             spkt->remote_rank = MPID_nem_dcfa_myrank;
             spkt->type = MPID_NEM_DCFA_SYNACK;
 
-            ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[rpkt->remote_rank].fd, IBCOM_INFOKEY_QP_QPN, &spkt->qpn, sizeof(uint32_t));
+            ibcom_errno =
+                ibcom_get_info_conn(MPID_nem_dcfa_conns[rpkt->remote_rank].fd, IBCOM_INFOKEY_QP_QPN,
+                                    &spkt->qpn, sizeof(uint32_t));
             MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
 
-            ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_conns[remote_rank].fd, IBCOM_RDMAWR_TO, IBCOM_INFOKEY_MR_ADDR, &spkt->rmem, sizeof(void*));
+            ibcom_errno =
+                ibcom_get_info_mr(MPID_nem_dcfa_conns[remote_rank].fd, IBCOM_RDMAWR_TO,
+                                  IBCOM_INFOKEY_MR_ADDR, &spkt->rmem, sizeof(void *));
             MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
 
-            ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_conns[remote_rank].fd, IBCOM_RDMAWR_TO, IBCOM_INFOKEY_MR_RKEY, &spkt->rkey, sizeof(int));
+            ibcom_errno =
+                ibcom_get_info_mr(MPID_nem_dcfa_conns[remote_rank].fd, IBCOM_RDMAWR_TO,
+                                  IBCOM_INFOKEY_MR_RKEY, &spkt->rkey, sizeof(int));
             MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
-            
+
             /* kokomade. add udsend_core(synack) */
-            if(MPID_nem_dcfa_conn_ibcom->ncom < &&
-               MPID_nem_dcfa_ncqe_connect < ) {
+            if (MPID_nem_dcfa_conn_ibcom->ncom < &&MPID_nem_dcfa_ncqe_connect <) {
                 MPID_nem_dcfa_conn_send_core(rpkt->remote_rank);
-            } else {
-                MPID_nem_dcfa_sendq_conn_entry_t* entry = MPIU_Malloc(sizeof(MPID_nem_dcfa_sendq_conn_entry_t));
+            }
+            else {
+                MPID_nem_dcfa_sendq_conn_entry_t *entry =
+                    MPIU_Malloc(sizeof(MPID_nem_dcfa_sendq_conn_entry_t));
                 MPIU_ERR_CHKANDJUMP(!entry, mpi_errno, MPI_ERR_OTHER, "**outofmemory");
                 entry->pending_pkt = *spkt;
                 MPID_nem_dcfa_conn_sendq_enqueue(MPID_nem_dcfa_conn_sendq, entry);
             }
 
 
-        } else {
-            dprintf("accept,unknown type=%08x\n", *((uint32_t*)(rbuf + 44)));
-        }                        
+        }
+        else {
+            dprintf("accept,unknown type=%08x\n", *((uint32_t *) (rbuf + 44)));
+        }
     }
- fn_exit:
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_CM_ACCEPT);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 #endif
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_reg_mr.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_reg_mr.c
index 160c49d..a8c020a 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_reg_mr.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_reg_mr.c
@@ -28,9 +28,11 @@
 /* arena allocator */
 
 #define NIALLOCID 32
-typedef struct { char* next; } free_list_t;
-static char* free_list_front[NIALLOCID] = { 0 };
-static char* arena_flist[NIALLOCID] = { 0 };
+typedef struct {
+    char *next;
+} free_list_t;
+static char *free_list_front[NIALLOCID] = { 0 };
+static char *arena_flist[NIALLOCID] = { 0 };
 
 #define SZARENA 4096
 #define CLUSTER_SIZE (SZARENA/sz)
@@ -38,30 +40,35 @@ static char* arena_flist[NIALLOCID] = { 0 };
 #define NCLUST_SLAB 1
 #define IBCOM_AALLOC_ID_MRCACHE 0
 
-static inline void* aalloc(size_t sz, int id) {
-#if 1 /* debug */
+static inline void *aalloc(size_t sz, int id)
+{
+#if 1   /* debug */
     return malloc(sz);
 #else
-    char* p = free_list_front[id];
-    if((unsigned long)p & (SZARENA-1)) {
-        free_list_front[id] += sz; return p;
-    } else {
-        char* q, r;
-        if(arena_flist[id]) {
+    char *p = free_list_front[id];
+    if ((unsigned long) p & (SZARENA - 1)) {
+        free_list_front[id] += sz;
+        return p;
+    }
+    else {
+        char *q, r;
+        if (arena_flist[id]) {
             q = arena_flist[id];
-            arena_flist[id] = ((free_list_t*)arena_flist[id])->next;
-        } else {
-            q = mmap(NULL, ROUNDUP64(SZARENA*NCLUST_SLAB, 4096), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+            arena_flist[id] = ((free_list_t *) arena_flist[id])->next;
+        }
+        else {
+            q = mmap(NULL, ROUNDUP64(SZARENA * NCLUST_SLAB, 4096), PROT_READ | PROT_WRITE,
+                     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 #if NCLUST_SLAB > 1
             arena_flist[id] = q + SZARENA;
-            for(p = arena_flist[id]; p < q + (NCLUST_SLAB-1) * SZARENA; p += SZARENA) {
-                ((free_list_t*)p)->next = p + SZARENA;
+            for (p = arena_flist[id]; p < q + (NCLUST_SLAB - 1) * SZARENA; p += SZARENA) {
+                ((free_list_t *) p)->next = p + SZARENA;
             }
-            ((free_list_t*)p)->next = 0;
+            ((free_list_t *) p)->next = 0;
 #endif
         }
-        *((int*)q) = CLUSTER_SIZE-1;
-        //	dprintf("q=%llx\n", q);
+        *((int *) q) = CLUSTER_SIZE - 1;
+        //      dprintf("q=%llx\n", q);
         q += sz + (SZARENA % sz);
         free_list_front[id] = q + sz;
         return q;
@@ -69,210 +76,237 @@ static inline void* aalloc(size_t sz, int id) {
 #endif
 }
 
-static inline void afree(const void* p, int id) {
-#if 1 /* debug */
-    return free((void*)p);
+static inline void afree(const void *p, int id)
+{
+#if 1   /* debug */
+    return free((void *) p);
 #else
-    p = (void*)((unsigned long)p & ~(SZARENA-1));
-    if(!(--(*((int*)p)))) {
-        ((free_list_t*)p)->next = arena_flist[id];
-        arena_flist[id] = (char*)p;
+    p = (void *) ((unsigned long) p & ~(SZARENA - 1));
+    if (!(--(*((int *) p)))) {
+        ((free_list_t *) p)->next = arena_flist[id];
+        arena_flist[id] = (char *) p;
     }
 #endif
 }
 
 struct ibcom_reg_mr_listnode_t {
-	struct ibcom_reg_mr_listnode_t *lru_next;
-	struct ibcom_reg_mr_listnode_t *lru_prev;
+    struct ibcom_reg_mr_listnode_t *lru_next;
+    struct ibcom_reg_mr_listnode_t *lru_prev;
 };
 
 struct ibcom_reg_mr_cache_entry_t {
     /* : public ibcom_reg_mr_listnode_t */
-	struct ibcom_reg_mr_listnode_t *lru_next;
-	struct ibcom_reg_mr_listnode_t *lru_prev;
+    struct ibcom_reg_mr_listnode_t *lru_next;
+    struct ibcom_reg_mr_listnode_t *lru_prev;
 
-	struct ibv_mr *mr;
-	void* addr;
-	int len;
-	int refc;
+    struct ibv_mr *mr;
+    void *addr;
+    int len;
+    int refc;
 };
 
 static struct ibcom_reg_mr_listnode_t ibcom_reg_mr_cache[IBCOM_REG_MR_NLINE];
 
-__inline__ int ibcom_hash_func(char *addr) {
-	unsigned int v = (unsigned int) (unsigned long) addr;
-	//v = v >> IBCOM_REG_MR_LOGSZPAGE; /* assume it is page aligned */
-	v = v & (IBCOM_REG_MR_NLINE - 1);
-	return (int) v;
+__inline__ int ibcom_hash_func(char *addr)
+{
+    unsigned int v = (unsigned int) (unsigned long) addr;
+    //v = v >> IBCOM_REG_MR_LOGSZPAGE; /* assume it is page aligned */
+    v = v & (IBCOM_REG_MR_NLINE - 1);
+    return (int) v;
 }
 
-void ibcom_reg_mr_insert(struct ibcom_reg_mr_listnode_t *c, struct ibcom_reg_mr_listnode_t *e) {
-	struct ibcom_reg_mr_listnode_t *next;
+void ibcom_reg_mr_insert(struct ibcom_reg_mr_listnode_t *c, struct ibcom_reg_mr_listnode_t *e)
+{
+    struct ibcom_reg_mr_listnode_t *next;
     struct ibcom_reg_mr_listnode_t *prev;
-	prev = c;
-	next = prev->lru_next;
-	e->lru_next = next;
-	e->lru_prev = prev;
-	next->lru_prev = e;
+    prev = c;
+    next = prev->lru_next;
+    e->lru_next = next;
+    e->lru_prev = prev;
+    next->lru_prev = e;
     prev->lru_next = e;
 }
 
-void ibcom_reg_mr_unlink(struct ibcom_reg_mr_listnode_t *e) {
-	struct ibcom_reg_mr_listnode_t *next, *prev;
-	next = e->lru_next;
-	prev = e->lru_prev;
-	next->lru_prev = prev;
-	prev->lru_next = next;
+void ibcom_reg_mr_unlink(struct ibcom_reg_mr_listnode_t *e)
+{
+    struct ibcom_reg_mr_listnode_t *next, *prev;
+    next = e->lru_next;
+    prev = e->lru_prev;
+    next->lru_prev = prev;
+    prev->lru_next = next;
 }
 
-static inline void __lru_queue_display() {
-	struct ibcom_reg_mr_cache_entry_t *p;
-	int i = 0;
-	for (i = 0; i < IBCOM_REG_MR_NLINE; i++) {
-		dprintf("---- hash %d\n", i);
-		for (p = (struct ibcom_reg_mr_cache_entry_t*)ibcom_reg_mr_cache[i].lru_next; p != (struct ibcom_reg_mr_cache_entry_t*)&ibcom_reg_mr_cache[i]; p = (struct ibcom_reg_mr_cache_entry_t*)p->lru_next) {
-			if (p && p->addr) {
-				dprintf("-------- p=%p,addr=%p,len=%d,refc=%d,lru_next=%p\n", p, p->addr, p->len, p->refc, p->lru_next);
-			} else {
-				dprintf("-------- p=%p,lru_next=%p\n", p, p->lru_next);
-			}
-		}
-	}
+static inline void __lru_queue_display()
+{
+    struct ibcom_reg_mr_cache_entry_t *p;
+    int i = 0;
+    for (i = 0; i < IBCOM_REG_MR_NLINE; i++) {
+        dprintf("---- hash %d\n", i);
+        for (p = (struct ibcom_reg_mr_cache_entry_t *) ibcom_reg_mr_cache[i].lru_next;
+             p != (struct ibcom_reg_mr_cache_entry_t *) &ibcom_reg_mr_cache[i];
+             p = (struct ibcom_reg_mr_cache_entry_t *) p->lru_next) {
+            if (p && p->addr) {
+                dprintf("-------- p=%p,addr=%p,len=%d,refc=%d,lru_next=%p\n", p, p->addr, p->len,
+                        p->refc, p->lru_next);
+            }
+            else {
+                dprintf("-------- p=%p,lru_next=%p\n", p, p->lru_next);
+            }
+        }
+    }
 }
 
-struct ibv_mr *ibcom_reg_mr_fetch(void *addr, int len) {
-#if 0 /* debug */
+struct ibv_mr *ibcom_reg_mr_fetch(void *addr, int len)
+{
+#if 0   /* debug */
     struct ibv_mr *mr;
-	int ibcom_errno = ibcom_reg_mr(addr, len, &mr);
-    printf("mrcache,ibcom_reg_mr,error,addr=%p,len=%d,lkey=%08x,rkey=%08x\n", addr, len, mr->lkey, mr->rkey);
-    if(ibcom_errno != 0) {
+    int ibcom_errno = ibcom_reg_mr(addr, len, &mr);
+    printf("mrcache,ibcom_reg_mr,error,addr=%p,len=%d,lkey=%08x,rkey=%08x\n", addr, len, mr->lkey,
+           mr->rkey);
+    if (ibcom_errno != 0) {
         goto fn_fail;
     }
- fn_exit:
+  fn_exit:
     return mr;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 #else
     int ibcom_errno;
-	int key;
-	struct ibcom_reg_mr_cache_entry_t *e;
+    int key;
+    struct ibcom_reg_mr_cache_entry_t *e;
 
-#if 1 /*def DCFA*/
+#if 1   /*def DCFA */
     /* we can't change addr because ibv_post_send assumes mr->host_addr (output of this function)
-       must have an exact mirror value of addr (input of this function) */
-    void* addr_aligned = addr;
+     * must have an exact mirror value of addr (input of this function) */
+    void *addr_aligned = addr;
     int len_aligned = len;
 #else
-    void* addr_aligned = (void*)((unsigned long)addr & ~(IBCOM_REG_MR_SZPAGE-1));
-    int len_aligned = ((((unsigned long)addr + len) - (unsigned long)addr_aligned + IBCOM_REG_MR_SZPAGE-1) & ~(IBCOM_REG_MR_SZPAGE-1));
+    void *addr_aligned = (void *) ((unsigned long) addr & ~(IBCOM_REG_MR_SZPAGE - 1));
+    int len_aligned =
+        ((((unsigned long) addr + len) - (unsigned long) addr_aligned + IBCOM_REG_MR_SZPAGE -
+          1) & ~(IBCOM_REG_MR_SZPAGE - 1));
 #endif
-	key = ibcom_hash_func(addr);
-
-	dprintf("[MrCache] addr=%p, len=%d\n", addr, len);
-	dprintf("[MrCache] aligned addr=%p, len=%d\n", addr_aligned, len_aligned);
-
-	//__lru_queue_display();
-    int way = 0; 
-	for(e = (struct ibcom_reg_mr_cache_entry_t*)ibcom_reg_mr_cache[key].lru_next; e != (struct ibcom_reg_mr_cache_entry_t*)&ibcom_reg_mr_cache[key]; e = (struct ibcom_reg_mr_cache_entry_t*)e->lru_next, way++) {
-		//dprintf("e=%p, e->hash_next=%p\n", e, e->lru_next);
-
-		if(e->addr <= addr_aligned && addr_aligned + len_aligned <= e->addr + e->len) {
-			dprintf("ibcom_reg_mr_fetch,hit,entry addr=%p,len=%d,mr addr=%p,len=%ld,requested addr=%p,len=%d\n", e->addr, e->len, e->mr->addr, e->mr->length, addr, len);
-			goto hit;
-		}
-	}
+    key = ibcom_hash_func(addr);
+
+    dprintf("[MrCache] addr=%p, len=%d\n", addr, len);
+    dprintf("[MrCache] aligned addr=%p, len=%d\n", addr_aligned, len_aligned);
+
+    //__lru_queue_display();
+    int way = 0;
+    for (e = (struct ibcom_reg_mr_cache_entry_t *) ibcom_reg_mr_cache[key].lru_next;
+         e != (struct ibcom_reg_mr_cache_entry_t *) &ibcom_reg_mr_cache[key];
+         e = (struct ibcom_reg_mr_cache_entry_t *) e->lru_next, way++) {
+        //dprintf("e=%p, e->hash_next=%p\n", e, e->lru_next);
+
+        if (e->addr <= addr_aligned && addr_aligned + len_aligned <= e->addr + e->len) {
+            dprintf
+                ("ibcom_reg_mr_fetch,hit,entry addr=%p,len=%d,mr addr=%p,len=%ld,requested addr=%p,len=%d\n",
+                 e->addr, e->len, e->mr->addr, e->mr->length, addr, len);
+            goto hit;
+        }
+    }
 
     // miss
 
     // evict an entry and de-register its MR when the cache-set is full
-    if(way > IBCOM_REG_MR_NWAY) {
-        struct ibcom_reg_mr_cache_entry_t* victim = (struct ibcom_reg_mr_cache_entry_t*)e->lru_prev;
-        ibcom_reg_mr_unlink((struct ibcom_reg_mr_listnode_t*)victim);
+    if (way > IBCOM_REG_MR_NWAY) {
+        struct ibcom_reg_mr_cache_entry_t *victim =
+            (struct ibcom_reg_mr_cache_entry_t *) e->lru_prev;
+        ibcom_reg_mr_unlink((struct ibcom_reg_mr_listnode_t *) victim);
 
-        dprintf("ibcom_reg_mr,evict,entry addr=%p,len=%d,mr addr=%p,len=%ld\n", e->addr, e->len, e->mr->addr, e->mr->length);
+        dprintf("ibcom_reg_mr,evict,entry addr=%p,len=%d,mr addr=%p,len=%ld\n", e->addr, e->len,
+                e->mr->addr, e->mr->length);
         int ibcom_errno = ibcom_dereg_mr(victim->mr);
-        if(ibcom_errno) {
+        if (ibcom_errno) {
             printf("mrcache,ibcom_dereg_mr\n");
             goto fn_fail;
         }
         afree(victim, IBCOM_AALLOC_ID_MRCACHE);
     }
 
-	e = aalloc(sizeof(struct ibcom_reg_mr_cache_entry_t), IBCOM_AALLOC_ID_MRCACHE);
+    e = aalloc(sizeof(struct ibcom_reg_mr_cache_entry_t), IBCOM_AALLOC_ID_MRCACHE);
     /* reference counter is used when evicting entry */
     e->refc = 1;
 
-	dprintf("ibcom_reg_mr_fetch,miss,addr=%p,len=%d\n", addr_aligned, len_aligned);
-	/* register memory */
-	ibcom_errno = ibcom_reg_mr(addr_aligned, len_aligned, &e->mr);
-    if(ibcom_errno != 0) {
+    dprintf("ibcom_reg_mr_fetch,miss,addr=%p,len=%d\n", addr_aligned, len_aligned);
+    /* register memory */
+    ibcom_errno = ibcom_reg_mr(addr_aligned, len_aligned, &e->mr);
+    if (ibcom_errno != 0) {
         fprintf(stderr, "mrcache,ibcom_reg_mr\n");
         goto fn_fail;
     }
-	e->addr = addr_aligned;
-	e->len = len_aligned;
+    e->addr = addr_aligned;
+    e->len = len_aligned;
 
-	dprintf("ibcom_reg_mr_fetch,fill,e=%p,key=%d,mr=%p,mr addr=%p,len=%ld,lkey=%08x,rkey=%08x\n", e, key, e->mr, e->mr->addr, e->mr->length, e->mr->lkey, e->mr->rkey);
+    dprintf("ibcom_reg_mr_fetch,fill,e=%p,key=%d,mr=%p,mr addr=%p,len=%ld,lkey=%08x,rkey=%08x\n", e,
+            key, e->mr, e->mr->addr, e->mr->length, e->mr->lkey, e->mr->rkey);
 
-	/* register to cache */
-    ibcom_reg_mr_insert(&ibcom_reg_mr_cache[key], (struct ibcom_reg_mr_listnode_t*)e);
+    /* register to cache */
+    ibcom_reg_mr_insert(&ibcom_reg_mr_cache[key], (struct ibcom_reg_mr_listnode_t *) e);
 
-	//__lru_queue_display();
+    //__lru_queue_display();
 
-	goto fn_exit;
+    goto fn_exit;
 
- hit:
+  hit:
 
     /* reference counter is used when evicting entry */
     e->refc++;
-#if 0 /* disable for debug */
+#if 0   /* disable for debug */
     /* move to head of the list */
-	if(e != (struct ibcom_reg_mr_cache_entry_t*)ibcom_reg_mr_cache[key].lru_next) {
-        ibcom_reg_mr_unlink((struct ibcom_reg_mr_listnode_t*)e);
-        ibcom_reg_mr_insert(&ibcom_reg_mr_cache[key], (struct ibcom_reg_mr_listnode_t*)e);
+    if (e != (struct ibcom_reg_mr_cache_entry_t *) ibcom_reg_mr_cache[key].lru_next) {
+        ibcom_reg_mr_unlink((struct ibcom_reg_mr_listnode_t *) e);
+        ibcom_reg_mr_insert(&ibcom_reg_mr_cache[key], (struct ibcom_reg_mr_listnode_t *) e);
     }
 #endif
-	dprintf("[MrCache] reuse e=%p,key=%d,mr=%p,refc=%d,addr=%p,len=%ld,lkey=%08x,rkey=%08x\n", e, key, e->mr, e->refc, e->mr->addr, e->mr->length, e->mr->lkey, e->mr->rkey);
+    dprintf("[MrCache] reuse e=%p,key=%d,mr=%p,refc=%d,addr=%p,len=%ld,lkey=%08x,rkey=%08x\n", e,
+            key, e->mr, e->refc, e->mr->addr, e->mr->length, e->mr->lkey, e->mr->rkey);
+
+    //__lru_queue_display();
 
-	//__lru_queue_display();
-    
- fn_exit:
-	return e->mr;
- fn_fail:
+  fn_exit:
+    return e->mr;
+  fn_fail:
     goto fn_exit;
 #endif
 }
 
-void ibcom_reg_mr_dereg(struct ibv_mr *mr) {
+void ibcom_reg_mr_dereg(struct ibv_mr *mr)
+{
 
-	struct ibcom_reg_mr_cache_entry_t *e;
-	struct ibcom_reg_mr_cache_entry_t *zero = 0;
-	unsigned long offset = (unsigned long)zero->mr;
-	e = (struct ibcom_reg_mr_cache_entry_t *) ((unsigned long)mr - offset);
-	e->refc--;
+    struct ibcom_reg_mr_cache_entry_t *e;
+    struct ibcom_reg_mr_cache_entry_t *zero = 0;
+    unsigned long offset = (unsigned long) zero->mr;
+    e = (struct ibcom_reg_mr_cache_entry_t *) ((unsigned long) mr - offset);
+    e->refc--;
 
-	dprintf("ibcom_reg_mr_dereg,entry=%p,mr=%p,addr=%p,refc=%d,offset=%lx\n", e, mr, e->mr->addr, e->refc, offset);
+    dprintf("ibcom_reg_mr_dereg,entry=%p,mr=%p,addr=%p,refc=%d,offset=%lx\n", e, mr, e->mr->addr,
+            e->refc, offset);
 }
 
-void ibcom_RegisterCacheInit() {
-	int i;
+void ibcom_RegisterCacheInit()
+{
+    int i;
 
     /* Using the address to the start node to express the end of the list
-       instead of using NULL */
-    for(i = 0; i < IBCOM_REG_MR_NLINE; i++) {
-        ibcom_reg_mr_cache[i].lru_next = (struct ibcom_reg_mr_listnode_t*)&ibcom_reg_mr_cache[i];
-        ibcom_reg_mr_cache[i].lru_prev = (struct ibcom_reg_mr_listnode_t*)&ibcom_reg_mr_cache[i];
+     * instead of using NULL */
+    for (i = 0; i < IBCOM_REG_MR_NLINE; i++) {
+        ibcom_reg_mr_cache[i].lru_next = (struct ibcom_reg_mr_listnode_t *) &ibcom_reg_mr_cache[i];
+        ibcom_reg_mr_cache[i].lru_prev = (struct ibcom_reg_mr_listnode_t *) &ibcom_reg_mr_cache[i];
     }
-	
-	dprintf("[MrCache] cache initializes %d entries\n", IBCOM_REG_MR_NLINE);
+
+    dprintf("[MrCache] cache initializes %d entries\n", IBCOM_REG_MR_NLINE);
 }
 
-void ibcom_RegisterCacheDestroy() {
-	struct ibcom_reg_mr_cache_entry_t *p;
-	int i = 0, cnt = 0;
+void ibcom_RegisterCacheDestroy()
+{
+    struct ibcom_reg_mr_cache_entry_t *p;
+    int i = 0, cnt = 0;
 
-	for(i = 0; i < IBCOM_REG_MR_NLINE; i++) {
-        for(p = (struct ibcom_reg_mr_cache_entry_t*)ibcom_reg_mr_cache[i].lru_next; p != (struct ibcom_reg_mr_cache_entry_t*)&ibcom_reg_mr_cache[i]; p = (struct ibcom_reg_mr_cache_entry_t*)p->lru_next) {
+    for (i = 0; i < IBCOM_REG_MR_NLINE; i++) {
+        for (p = (struct ibcom_reg_mr_cache_entry_t *) ibcom_reg_mr_cache[i].lru_next;
+             p != (struct ibcom_reg_mr_cache_entry_t *) &ibcom_reg_mr_cache[i];
+             p = (struct ibcom_reg_mr_cache_entry_t *) p->lru_next) {
             if (p && p->addr > 0) {
                 ibcom_dereg_mr(p->mr);
                 afree(p, IBCOM_AALLOC_ID_MRCACHE);
@@ -281,7 +315,7 @@ void ibcom_RegisterCacheDestroy() {
         }
     }
 
-	//__lru_queue_display();
+    //__lru_queue_display();
 
-	dprintf("[MrCache] cache destroyed %d entries\n", cnt);
+    dprintf("[MrCache] cache destroyed %d entries\n", cnt);
 }
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_send.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_send.c
index afd783d..bf5b580 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_send.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_send.c
@@ -8,7 +8,7 @@
 #include "dcfa_impl.h"
 
 //#define DEBUG_DCFA_SEND
-#ifdef dprintf /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
+#ifdef dprintf  /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
 #undef dprintf
 #endif
 #ifdef DEBUG_DCFA_SEND
@@ -29,7 +29,7 @@ static int entered_send_progress = 0;
 #define MPID_NEM_DCFA_PBODY(ptr) ((ptr) + sizeof(int)*4)
 
 #define ALLOCATE(map, key, key_length, initial) {                        \
-    if(map->length + key_length + sizeof(int)*4 > map->max_length) { \
+    if (map->length + key_length + sizeof(int)*4 > map->max_length) { \
         map->max_length = map->max_length ? map->max_length * 2 : 4096; \
         map->data = realloc(map->data, map->max_length); \
     } \
@@ -42,91 +42,97 @@ static int entered_send_progress = 0;
     map->length += sizeof(int)*4 + key_length; \
 }
 
-void MPID_nem_dcfa_cm_map_set(MPID_nem_dcfa_cm_map_t* map, char* key, int key_length, int val) {
-    char* pTree = map->data;
+void MPID_nem_dcfa_cm_map_set(MPID_nem_dcfa_cm_map_t * map, char *key, int key_length, int val)
+{
+    char *pTree = map->data;
     dprintf("MPID_nem_dcfa_cm_map_set,val=%d\n", val);
-    
-	if(!pTree) {
+
+    if (!pTree) {
         ALLOCATE(map, key, key_length, val);
         dprintf("pTree was empty\n");
         return;
     }
     int s1_minus_s2;
-    while(1) {
+    while (1) {
         int lmin = key_length < MPID_NEM_DCFA_LEN(pTree) ? key_length : MPID_NEM_DCFA_LEN(pTree);
         int residual = key_length - MPID_NEM_DCFA_LEN(pTree);
         s1_minus_s2 = memcmp(key, MPID_NEM_DCFA_PBODY(pTree), lmin);
-        
-        if(!s1_minus_s2 && !residual) {
+
+        if (!s1_minus_s2 && !residual) {
             MPID_NEM_DCFA_VAL(pTree) = val;
             dprintf("found\n");
-            return; // same string, same length
-        } else if(s1_minus_s2 < 0 || !s1_minus_s2 && residual < 0) { 
+            return;     // same string, same length
+        }
+        else if (s1_minus_s2 < 0 || !s1_minus_s2 && residual < 0) {
             // psArg is "smaller" OR same substring, psArg is shorter
-            if(MPID_NEM_DCFA_LPTR(pTree) == 0) {
-                MPID_NEM_DCFA_LPTR(pTree) = map->length; // pointer write
+            if (MPID_NEM_DCFA_LPTR(pTree) == 0) {
+                MPID_NEM_DCFA_LPTR(pTree) = map->length;        // pointer write
                 /* left child */
                 ALLOCATE(map, key, key_length, val);
                 dprintf("stored as left child\n");
                 return;
             }
-            pTree = map->data + MPID_NEM_DCFA_LPTR(pTree); // go to left child
-        } else {
+            pTree = map->data + MPID_NEM_DCFA_LPTR(pTree);      // go to left child
+        }
+        else {
             //  psArg is "larger" OR same substring, psArg is longer
-            if(MPID_NEM_DCFA_RPTR(pTree) == 0) {
-                MPID_NEM_DCFA_RPTR(pTree) = map->length; // pointer write
+            if (MPID_NEM_DCFA_RPTR(pTree) == 0) {
+                MPID_NEM_DCFA_RPTR(pTree) = map->length;        // pointer write
                 /* right child */
-                ALLOCATE(map, key, key_length, val); 
+                ALLOCATE(map, key, key_length, val);
                 dprintf("stored as right child\n");
-                return; 
+                return;
             }
-            pTree = map->data + MPID_NEM_DCFA_RPTR(pTree); // go to right child
+            pTree = map->data + MPID_NEM_DCFA_RPTR(pTree);      // go to right child
         }
     }
 }
 
-int MPID_nem_dcfa_cm_map_get(MPID_nem_dcfa_cm_map_t* map, char* key, int key_length, int *val) {
+int MPID_nem_dcfa_cm_map_get(MPID_nem_dcfa_cm_map_t * map, char *key, int key_length, int *val)
+{
     int llc_errno = LLC_SUCCESS;
-    char* pTree = map->data;
+    char *pTree = map->data;
 
     dprintf("MPID_nem_dcfa_cm_map_get,key=%s\n", key);
 
-	if(!pTree) {
+    if (!pTree) {
         llc_errno = -1;
         dprintf("pTree is empty\n");
         goto fn_fail;
     }
     int s1_minus_s2;
-    while(1) {
+    while (1) {
         int lmin = key_length < MPID_NEM_DCFA_LEN(pTree) ? key_length : MPID_NEM_DCFA_LEN(pTree);
         int residual = key_length - MPID_NEM_DCFA_LEN(pTree);
         s1_minus_s2 = memcmp(key, MPID_NEM_DCFA_PBODY(pTree), lmin);
 
-	if(!s1_minus_s2 && !residual) {
-        *val = MPID_NEM_DCFA_VAL(pTree);
-        dprintf("value found=%d\n", *val);
-	    goto fn_exit; // same string, same length
-	} else if(s1_minus_s2 < 0 || !s1_minus_s2 && residual < 0) { 
-	    // psArg is "smaller" OR same substring, psArg is shorter
-	    if(MPID_NEM_DCFA_LPTR(pTree) == 0) {
-		llc_errno = -1;
-        dprintf("left is null\n");
-        goto fn_fail;
-	    }
-	    pTree = map->data + MPID_NEM_DCFA_LPTR(pTree); // go to left child
-	} else {
-	    //  psArg is "larger" OR same substring, psArg is longer
-	    if(MPID_NEM_DCFA_RPTR(pTree) == 0) {
-		llc_errno = -1;
-        dprintf("right is null\n");
-        goto fn_fail;
-	    }
-	    pTree = map->data + MPID_NEM_DCFA_RPTR(pTree); // go to right child
-	}
+        if (!s1_minus_s2 && !residual) {
+            *val = MPID_NEM_DCFA_VAL(pTree);
+            dprintf("value found=%d\n", *val);
+            goto fn_exit;       // same string, same length
+        }
+        else if (s1_minus_s2 < 0 || !s1_minus_s2 && residual < 0) {
+            // psArg is "smaller" OR same substring, psArg is shorter
+            if (MPID_NEM_DCFA_LPTR(pTree) == 0) {
+                llc_errno = -1;
+                dprintf("left is null\n");
+                goto fn_fail;
+            }
+            pTree = map->data + MPID_NEM_DCFA_LPTR(pTree);      // go to left child
+        }
+        else {
+            //  psArg is "larger" OR same substring, psArg is longer
+            if (MPID_NEM_DCFA_RPTR(pTree) == 0) {
+                llc_errno = -1;
+                dprintf("right is null\n");
+                goto fn_fail;
+            }
+            pTree = map->data + MPID_NEM_DCFA_RPTR(pTree);      // go to right child
+        }
     }
- fn_exit:
+  fn_exit:
     return llc_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 #endif
@@ -136,13 +142,13 @@ int MPID_nem_dcfa_cm_map_get(MPID_nem_dcfa_cm_map_t* map, char* key, int key_len
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 static int MPID_nem_dcfa_iSendContig_core(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
-                              MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz)
+                                          MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz)
 {
     int mpi_errno = MPI_SUCCESS;
     int ibcom_errno;
     MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
     MPID_nem_dcfa_pkt_prefix_t pkt_netmod;
-    void* netmod_hdr;
+    void *netmod_hdr;
     int sz_netmod_hdr;
 
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_ISENDCONTIG_CORE);
@@ -153,27 +159,28 @@ static int MPID_nem_dcfa_iSendContig_core(MPIDI_VC_t * vc, MPID_Request * sreq,
     /* remote SR sequence number which is last sent */
     int *rsr_seq_num_tail;
     ibcom_errno = ibcom_rsr_seq_num_tail_get(vc_dcfa->sc->fd, &rsr_seq_num_tail);
-    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rsr_seq_num_tail_get"); 
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rsr_seq_num_tail_get");
 
     /* remote SR sequence number which is last sent */
     int *rsr_seq_num_tail_last_sent;
-    ibcom_errno = ibcom_rsr_seq_num_tail_last_sent_get(vc_dcfa->sc->fd, &rsr_seq_num_tail_last_sent);
-    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rsr_seq_num_tail_last_sent_get"); 
+    ibcom_errno =
+        ibcom_rsr_seq_num_tail_last_sent_get(vc_dcfa->sc->fd, &rsr_seq_num_tail_last_sent);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER,
+                        "**ibcom_rsr_seq_num_tail_last_sent_get");
 
     //dprintf("isendcontig,rsr_seq_num_tail=%d,rsr_seq_num_tail_last_sent=%d\n", *rsr_seq_num_tail, *rsr_seq_num_tail_last_sent);
 
     int notify_rate;
     ibcom_errno = ibcom_rdmabuf_occupancy_notify_rate_get(vc_dcfa->sc->fd, &notify_rate);
-    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_sq_occupancy_notify_rate_get");
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER,
+                        "**ibcom_sq_occupancy_notify_rate_get");
 
     /* send RDMA-write-to buffer occupancy information */
     /* embed SR occupancy information and remember the last one sent */
-    MPIDI_CH3_Pkt_t* ch3_hdr = (MPIDI_CH3_Pkt_t*)hdr;
-    if(
-       MPID_nem_dcfa_diff32(*rsr_seq_num_tail, *rsr_seq_num_tail_last_sent) > notify_rate
-       ) {
-#if 1 /* debug, disabling piggy-back */
-        switch(ch3_hdr->type) {
+    MPIDI_CH3_Pkt_t *ch3_hdr = (MPIDI_CH3_Pkt_t *) hdr;
+    if (MPID_nem_dcfa_diff32(*rsr_seq_num_tail, *rsr_seq_num_tail_last_sent) > notify_rate) {
+#if 1   /* debug, disabling piggy-back */
+        switch (ch3_hdr->type) {
         case MPIDI_CH3_PKT_EAGER_SEND:
             pkt_netmod.subtype = MPIDI_NEM_DCFA_PKT_EAGER_SEND;
             goto common_tail;
@@ -188,11 +195,11 @@ static int MPID_nem_dcfa_iSendContig_core(MPIDI_VC_t * vc, MPID_Request * sreq,
             goto common_tail;
         case MPIDI_CH3_PKT_GET_RESP:
             pkt_netmod.subtype = MPIDI_NEM_DCFA_PKT_GET_RESP;
-        common_tail:
+          common_tail:
             pkt_netmod.type = MPIDI_NEM_PKT_NETMOD;
             pkt_netmod.seq_num_tail = *rsr_seq_num_tail;
             *rsr_seq_num_tail_last_sent = *rsr_seq_num_tail;
-            netmod_hdr = (void*)&pkt_netmod;
+            netmod_hdr = (void *) &pkt_netmod;
             sz_netmod_hdr = sizeof(MPID_nem_dcfa_pkt_prefix_t);
             break;
         default:
@@ -204,7 +211,8 @@ static int MPID_nem_dcfa_iSendContig_core(MPIDI_VC_t * vc, MPID_Request * sreq,
         netmod_hdr = NULL;
         sz_netmod_hdr = 0;
 #endif
-    } else {
+    }
+    else {
         netmod_hdr = NULL;
         sz_netmod_hdr = 0;
     }
@@ -214,48 +222,52 @@ static int MPID_nem_dcfa_iSendContig_core(MPIDI_VC_t * vc, MPID_Request * sreq,
 
     /* send myrank as wr_id so that receiver can find vc using MPID_nem_dcfa_conns in poll */
     /* packet handler of MPIDI_CH3_PKT_EAGER_SEND uses sizeof(MPIDI_CH3_Pkt_t), so ignoring hdr_sz */
-    
+
     /* MPIDI_CH3_ReqHandler_GetSendRespComplete, drain_scq decrement it */
-    if(((MPIDI_CH3_Pkt_t*)hdr)->type == MPIDI_CH3_PKT_GET_RESP) {
+    if (((MPIDI_CH3_Pkt_t *) hdr)->type == MPIDI_CH3_PKT_GET_RESP) {
         //        MPIR_Request_add_ref(sreq);
         //printf("isendcontig_core,MPIDI_CH3_PKT_GET_RESP,ref_count=%d\n", sreq->ref_count);
     }
 
     /* increment cc because PktHandler_EagerSyncAck, ssend.c, drain_scq decrement it */
-    if(((MPIDI_CH3_Pkt_t*)hdr)->type == MPIDI_CH3_PKT_EAGER_SYNC_SEND) {
+    if (((MPIDI_CH3_Pkt_t *) hdr)->type == MPIDI_CH3_PKT_EAGER_SYNC_SEND) {
         MPIR_Request_add_ref(sreq);
     }
-    if(((MPIDI_CH3_Pkt_t*)hdr)->type == MPIDI_CH3_PKT_GET) {
+    if (((MPIDI_CH3_Pkt_t *) hdr)->type == MPIDI_CH3_PKT_GET) {
         //printf("isendcontig_core,MPIDI_CH3_PKT_GET,ref_count=%d\n", sreq->ref_count);
     }
-    if(hdr&&((MPIDI_CH3_Pkt_t*)hdr)->type == MPIDI_CH3_PKT_ACCUM_IMMED) {
+    if (hdr && ((MPIDI_CH3_Pkt_t *) hdr)->type == MPIDI_CH3_PKT_ACCUM_IMMED) {
         dprintf("isendcontig_core,MPIDI_CH3_PKT_ACCUM_IMMED,ref_count=%d\n", sreq->ref_count);
     }
-    if(hdr&&((MPIDI_CH3_Pkt_t*)hdr)->type == MPIDI_CH3_PKT_ACCUMULATE) {
+    if (hdr && ((MPIDI_CH3_Pkt_t *) hdr)->type == MPIDI_CH3_PKT_ACCUMULATE) {
         dprintf("isendcontig_core,MPIDI_CH3_PKT_ACCUMULATE,ref_count=%d\n", sreq->ref_count);
     }
 
     int msg_type = MPIDI_Request_get_msg_type(sreq);
 
-    dprintf("isendcontig_core,netmod_hdr=%p,sz_netmod_hdr=%d,hdr=%p,sz_hdr=%ld,data=%p,sz_data=%d\n", netmod_hdr, sz_netmod_hdr, hdr, hdr_sz, data, (int)data_sz);
+    dprintf
+        ("isendcontig_core,netmod_hdr=%p,sz_netmod_hdr=%d,hdr=%p,sz_hdr=%ld,data=%p,sz_data=%d\n",
+         netmod_hdr, sz_netmod_hdr, hdr, hdr_sz, data, (int) data_sz);
 
-    if(sizeof(MPIDI_CH3_Pkt_t) != hdr_sz) {
-        printf("type=%d,subtype=%d\n", ((MPID_nem_pkt_netmod_t*)hdr)->type, ((MPID_nem_pkt_netmod_t*)hdr)->subtype);
+    if (sizeof(MPIDI_CH3_Pkt_t) != hdr_sz) {
+        printf("type=%d,subtype=%d\n", ((MPID_nem_pkt_netmod_t *) hdr)->type,
+               ((MPID_nem_pkt_netmod_t *) hdr)->subtype);
     }
 
     int copied;
-    ibcom_errno = ibcom_isend(vc_dcfa->sc->fd, (uint64_t)sreq, netmod_hdr, sz_netmod_hdr, hdr, hdr_sz, data, (int)data_sz, &copied);
+    ibcom_errno =
+        ibcom_isend(vc_dcfa->sc->fd, (uint64_t) sreq, netmod_hdr, sz_netmod_hdr, hdr, hdr_sz, data,
+                    (int) data_sz, &copied);
     MPIU_ERR_CHKFATALANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_isend");
     MPID_nem_dcfa_ncqe += 1;
     //dprintf("isendcontig_core,ncqe=%d\n", MPID_nem_dcfa_ncqe);
-    dprintf("isendcontig_core,isend,kind=%d,msg_type=%d,copied=%d\n", sreq->kind, msg_type, copied);/*suspicious lines,(if1,on,on,off,if0) works*/
+    dprintf("isendcontig_core,isend,kind=%d,msg_type=%d,copied=%d\n", sreq->kind, msg_type, copied);    /*suspicious lines,(if1,on,on,off,if0) works */
 #if 0
-#define TLBPREF_AHEAD 20//20
+#define TLBPREF_AHEAD 20        //20
     int tlb_pref_ahd = 4096 * TLBPREF_AHEAD;
     __asm__ __volatile__
         ("movq %0, %%rsi;"
-         "movq 0(%%rsi), %%rax;"
-         : : "r"((uint64_t)data + tlb_pref_ahd) : "%rsi", "%rax");
+         "movq 0(%%rsi), %%rax;"::"r"((uint64_t) data + tlb_pref_ahd):"%rsi", "%rax");
 #endif
 #if 1
 #ifdef __MIC__
@@ -264,85 +276,93 @@ static int MPID_nem_dcfa_iSendContig_core(MPIDI_VC_t * vc, MPID_Request * sreq,
          "vprefetch0 0x00(%%rsi);"
          "vprefetch0 0x40(%%rsi);"
          "vprefetch0 0x80(%%rsi);"
-         "vprefetch0 0xc0(%%rsi);"
-         : : "r"((uint64_t)data + 4 * data_sz) : "%rsi");
+         "vprefetch0 0xc0(%%rsi);"::"r"((uint64_t) data + 4 * data_sz):"%rsi");
 #else
     __asm__ __volatile__
         ("movq %0, %%rsi;"
          "prefetchnta 0x00(%%rsi);"
          "prefetchnta 0x40(%%rsi);"
          "prefetchnta 0x80(%%rsi);"
-         "prefetchnta 0xc0(%%rsi);"
-         : : "r"((uint64_t)data + 4 * data_sz) : "%rsi");
+         "prefetchnta 0xc0(%%rsi);"::"r"((uint64_t) data + 4 * data_sz):"%rsi");
 #endif
 #endif
-    
+
     MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "dcfa_send, fd=%d", vc_dcfa->sc->fd));
     vc_dcfa->pending_sends += 1;
-    sreq->ch.vc = vc; /* used in poll */
+    sreq->ch.vc = vc;   /* used in poll */
 
     /* calling drain_scq from progress_send deprives of chance
-       for dcfa_poll to drain-sendq using ncqe
-       however transfers events to
-       (not to reply_seq_num because it's regulated by the rate)
-       fire on dcfa_poll using nces
-       (make SCQ full once, then put one command in sendq,
-       then send-->drain-scq to reduce CQE level under the threashold)
-       so we need to perform 
-       progress_send for all of VCs using nces in dcfa_poll
-       (if we have drain-sendq in dcfa_poll, this isn't needed. */
-#if 0 /* debug,disabling fast-dec-cc when copied */
-    if(copied && !sreq->dev.OnDataAvail) { /* skip poll scq */
-           int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
-           
-           (VC_FIELD(sreq->ch.vc, pending_sends)) -= 1;
-           
-           /* as in the template */
-           reqFn = sreq->dev.OnDataAvail;
-           if (!reqFn){
-               /* MPID_Request_release is called in 
-                  MPI_Wait (in src/mpi/pt2pt/wait.c)
-                    MPIR_Wait_impl (in src/mpi/pt2pt/wait.c)
-                      MPIR_Request_complete (in /src/mpi/pt2pt/mpir_request.c) */
-               int incomplete;
-               MPIDI_CH3U_Request_decrement_cc(sreq, &incomplete);
-               if(!incomplete) { MPIDI_CH3_Progress_signal_completion(); }
-               //dprintf("isendcontig_core,cc_ptr=%d\n", *(sreq->cc_ptr));
-               dprintf("sendcontig_core,copied,complete,req=%p,cc incremented to %d,ref_count=%d\n", sreq, MPIDI_CH3I_progress_completion_count.v, sreq->ref_count);
-               MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
-           } else {
-               MPIDI_VC_t *vc = sreq->ch.vc;
-               int complete = 0;
-               mpi_errno = reqFn(vc, sreq, &complete);
-               if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-               /* not-completed case is not implemented */
-               MPIU_Assert(complete == TRUE);
-               MPIU_Assert(0); /* decrement ref_count and free sreq causes problem */
-           }
-    } else {
-        MPID_nem_dcfa_ncqe_nces += 1; /* it has different meaning, counting non-copied eager-send */
+     * for dcfa_poll to drain-sendq using ncqe
+     * however transfers events to
+     * (not to reply_seq_num because it's regulated by the rate)
+     * fire on dcfa_poll using nces
+     * (make SCQ full once, then put one command in sendq,
+     * then send-->drain-scq to reduce CQE level under the threashold)
+     * so we need to perform
+     * progress_send for all of VCs using nces in dcfa_poll
+     * (if we have drain-sendq in dcfa_poll, this isn't needed. */
+#if 0   /* debug,disabling fast-dec-cc when copied */
+    if (copied && !sreq->dev.OnDataAvail) {     /* skip poll scq */
+        int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);
+
+        (VC_FIELD(sreq->ch.vc, pending_sends)) -= 1;
+
+        /* as in the template */
+        reqFn = sreq->dev.OnDataAvail;
+        if (!reqFn) {
+            /* MPID_Request_release is called in
+             * MPI_Wait (in src/mpi/pt2pt/wait.c)
+             * MPIR_Wait_impl (in src/mpi/pt2pt/wait.c)
+             * MPIR_Request_complete (in /src/mpi/pt2pt/mpir_request.c) */
+            int incomplete;
+            MPIDI_CH3U_Request_decrement_cc(sreq, &incomplete);
+            if (!incomplete) {
+                MPIDI_CH3_Progress_signal_completion();
+            }
+            //dprintf("isendcontig_core,cc_ptr=%d\n", *(sreq->cc_ptr));
+            dprintf("sendcontig_core,copied,complete,req=%p,cc incremented to %d,ref_count=%d\n",
+                    sreq, MPIDI_CH3I_progress_completion_count.v, sreq->ref_count);
+            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
+        }
+        else {
+            MPIDI_VC_t *vc = sreq->ch.vc;
+            int complete = 0;
+            mpi_errno = reqFn(vc, sreq, &complete);
+            if (mpi_errno)
+                MPIU_ERR_POP(mpi_errno);
+            /* not-completed case is not implemented */
+            MPIU_Assert(complete == TRUE);
+            MPIU_Assert(0);     /* decrement ref_count and free sreq causes problem */
+        }
+    }
+    else {
+        MPID_nem_dcfa_ncqe_nces += 1;   /* it has different meaning, counting non-copied eager-send */
     }
 #else
-    MPID_nem_dcfa_ncqe_nces += 1; /* it has different meaning, counting non-copied eager-send */
+    MPID_nem_dcfa_ncqe_nces += 1;       /* it has different meaning, counting non-copied eager-send */
 #endif
 
 #ifndef DISABLE_VAR_OCC_NOTIFY_RATE
-           //dprintf("isendcontig,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);  
+    //dprintf("isendcontig,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
     int *notify_rstate;
     ibcom_errno = ibcom_rdmabuf_occupancy_notify_rstate_get(vc_dcfa->sc->fd, &notify_rstate);
-    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rdmabuf_occupancy_notify_rstate_get"); 
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER,
+                        "**ibcom_rdmabuf_occupancy_notify_rstate_get");
 
-    dprintf("isendcontig,head=%d,tail=%d,hw=%d\n", vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail, IBCOM_RDMABUF_HIGH_WATER_MARK);
+    dprintf("isendcontig,head=%d,tail=%d,hw=%d\n", vc_dcfa->ibcom->sseq_num,
+            vc_dcfa->ibcom->lsr_seq_num_tail, IBCOM_RDMABUF_HIGH_WATER_MARK);
     /* if the number of slots in RMDA-write-to buffer have hit the high water-mark */
-    if(*notify_rstate == IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_LW &&
-       MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) > IBCOM_RDMABUF_HIGH_WATER_MARK) {
+    if (*notify_rstate == IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_LW &&
+        MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num,
+                             vc_dcfa->ibcom->lsr_seq_num_tail) > IBCOM_RDMABUF_HIGH_WATER_MARK) {
         dprintf("changing notify_rstate,id=%d\n", vc_dcfa->ibcom->sseq_num);
         /* remember remote notifying policy so that local can know when to change remote policy back to LW */
         *notify_rstate = IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW;
         /* change remote notifying policy of RDMA-write-to buf occupancy info */
-        MPID_nem_dcfa_send_change_rdmabuf_occupancy_notify_state(vc, IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW);
+        MPID_nem_dcfa_send_change_rdmabuf_occupancy_notify_state(vc,
+                                                                 IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW);
     }
-    //dprintf("isendcontig_core,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);  
+    //dprintf("isendcontig_core,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
 #endif
 
   fn_exit:
@@ -371,30 +391,34 @@ int MPID_nem_dcfa_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
     MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr);
 
 #ifdef DCFA_ONDEMAND
-    if(!vc_dcfa->is_connected) { MPID_nem_dcfa_send_syn(vc); }
+    if (!vc_dcfa->is_connected) {
+        MPID_nem_dcfa_send_syn(vc);
+    }
 #endif
 
 #if 0
     /* aggressively perform drain_scq */
     /* try to clear the road blocks, i.e. ncom, ncqe */
-    if(vc_dcfa->ibcom->ncom >= /*IBCOM_MAX_SQ_CAPACITY*/IBCOM_MAX_SQ_HEIGHT_DRAIN ||
-       MPID_nem_dcfa_ncqe >= /*IBCOM_MAX_CQ_CAPACITY*/IBCOM_MAX_CQ_HEIGHT_DRAIN) {
+    if (vc_dcfa->ibcom->ncom >= /*IBCOM_MAX_SQ_CAPACITY */ IBCOM_MAX_SQ_HEIGHT_DRAIN ||
+        MPID_nem_dcfa_ncqe >= /*IBCOM_MAX_CQ_CAPACITY */ IBCOM_MAX_CQ_HEIGHT_DRAIN) {
         //printf("isendcontig,kick drain_scq\n");
-        ibcom_errno = MPID_nem_dcfa_drain_scq(1); /* set 1st arg to one which means asking it for not calling send_progress because it recursively call isendcontig_core */
+        ibcom_errno = MPID_nem_dcfa_drain_scq(1);       /* set 1st arg to one which means asking it for not calling send_progress because it recursively call isendcontig_core */
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
     }
 #endif
     /* set it for drain_scq */
-	MPIDI_Request_set_msg_type( sreq, MPIDI_REQUEST_EAGER_MSG );
+    MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG);
 
 #if 0
     /* anticipating received message releases RDMA-write-to buffer or IB command-queue entry */
     /* Unexpected state MPIDI_VC_STATE_CLOSED in vc 0xf1fed0 (expecting MPIDI_VC_STATE_ACTIVE)
-       Assertion failed in file src/mpid/ch3/src/ch3u_handle_connection.c at line 326: vc->state == MPIDI_VC_STATE_ACTIVE */
-    if(vc->state == MPIDI_VC_STATE_ACTIVE &&
-       MPID_nem_dcfa_tsc_poll - MPID_nem_dcfa_rdtsc() > MPID_NEM_DCFA_POLL_PERIOD_SEND) {
+     * Assertion failed in file src/mpid/ch3/src/ch3u_handle_connection.c at line 326: vc->state == MPIDI_VC_STATE_ACTIVE */
+    if (vc->state == MPIDI_VC_STATE_ACTIVE &&
+        MPID_nem_dcfa_tsc_poll - MPID_nem_dcfa_rdtsc() > MPID_NEM_DCFA_POLL_PERIOD_SEND) {
         mpi_errno = MPID_nem_dcfa_poll(0);
-        if(mpi_errno) { MPIU_ERR_POP (mpi_errno); }
+        if (mpi_errno) {
+            MPIU_ERR_POP(mpi_errno);
+        }
     }
 #endif
 
@@ -402,53 +426,63 @@ int MPID_nem_dcfa_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
     /* sequence number of (largest) completed send command */
     ibcom_errno = ibcom_lsr_seq_num_tail_get(vc_dcfa->sc->fd, &lsr_seq_num_tail);
     MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_lsr_seq_num_tail_get");
-    
+
     int lsr_seq_num_head;
     /* sequence number of (largest) in-flight send command */
     ibcom_errno = ibcom_sseq_num_get(vc_dcfa->sc->fd, &lsr_seq_num_head);
     MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_sseq_num_get");
-    
-    dprintf("isendcontig,%d->%d,type=%d,subtype=%d,data_sz=%ld,ldiff=%d(%d-%d),rdiff=%d(%d-%d)\n", MPID_nem_dcfa_myrank, vc->pg_rank, ((MPIDI_CH3_Pkt_t *)hdr)->type, ((MPID_nem_pkt_netmod_t*)hdr)->subtype, data_sz, 
+
+    dprintf("isendcontig,%d->%d,type=%d,subtype=%d,data_sz=%ld,ldiff=%d(%d-%d),rdiff=%d(%d-%d)\n",
+            MPID_nem_dcfa_myrank, vc->pg_rank, ((MPIDI_CH3_Pkt_t *) hdr)->type,
+            ((MPID_nem_pkt_netmod_t *) hdr)->subtype, data_sz,
             MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail),
             vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail,
-            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent),
-            vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent
-            );
-    dprintf("isendcontig,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail,
+                                 vc_dcfa->ibcom->rsr_seq_num_tail_last_sent),
+            vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent);
+    dprintf("isendcontig,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n",
+            MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe,
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
     /* if IB command overflow-queue is empty AND local IB command queue isn't full AND remote RDMA-write-to buf isn't getting overrun */
-    MPIDI_CH3_Pkt_t* ch3_hdr = (MPIDI_CH3_Pkt_t*)hdr;
-    MPID_nem_pkt_netmod_t* netmod_hdr = (MPID_nem_pkt_netmod_t *)hdr;
-    /* reserve one slot for control packet bringing sequence number 
-       to avoid dead-lock */
-    int slack =  (
-                 (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD || netmod_hdr->subtype != MPIDI_NEM_DCFA_REQ_SEQ_NUM) &&
-                 (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD || netmod_hdr->subtype != MPIDI_NEM_DCFA_REPLY_SEQ_NUM) &&
-                 (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD || netmod_hdr->subtype != MPIDI_NEM_DCFA_PKT_LMT_GET_DONE) &&
-                  ch3_hdr->type != MPIDI_NEM_PKT_LMT_RTS && 
-                  ch3_hdr->type != MPIDI_NEM_PKT_LMT_CTS
-                  ) ? IBCOM_AMT_SLACK : 0;
-    /* make control packet bringing sequence number go ahead of 
-       queued packets to avoid dead-lock */
-    int goahead = 
-        (ch3_hdr->type == MPIDI_NEM_PKT_NETMOD && netmod_hdr->subtype == MPIDI_NEM_DCFA_REQ_SEQ_NUM) ||
-        (ch3_hdr->type == MPIDI_NEM_PKT_NETMOD && netmod_hdr->subtype == MPIDI_NEM_DCFA_REPLY_SEQ_NUM) ||
-        (ch3_hdr->type == MPIDI_NEM_PKT_NETMOD && netmod_hdr->subtype == MPIDI_NEM_DCFA_PKT_LMT_GET_DONE)
+    MPIDI_CH3_Pkt_t *ch3_hdr = (MPIDI_CH3_Pkt_t *) hdr;
+    MPID_nem_pkt_netmod_t *netmod_hdr = (MPID_nem_pkt_netmod_t *) hdr;
+    /* reserve one slot for control packet bringing sequence number
+     * to avoid dead-lock */
+    int slack = ((ch3_hdr->type != MPIDI_NEM_PKT_NETMOD ||
+                  netmod_hdr->subtype != MPIDI_NEM_DCFA_REQ_SEQ_NUM) &&
+                 (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD ||
+                  netmod_hdr->subtype != MPIDI_NEM_DCFA_REPLY_SEQ_NUM) &&
+                 (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD ||
+                  netmod_hdr->subtype != MPIDI_NEM_DCFA_PKT_LMT_GET_DONE) &&
+                 ch3_hdr->type != MPIDI_NEM_PKT_LMT_RTS &&
+                 ch3_hdr->type != MPIDI_NEM_PKT_LMT_CTS) ? IBCOM_AMT_SLACK : 0;
+    /* make control packet bringing sequence number go ahead of
+     * queued packets to avoid dead-lock */
+    int goahead =
+        (ch3_hdr->type == MPIDI_NEM_PKT_NETMOD && netmod_hdr->subtype == MPIDI_NEM_DCFA_REQ_SEQ_NUM)
+        || (ch3_hdr->type == MPIDI_NEM_PKT_NETMOD &&
+            netmod_hdr->subtype == MPIDI_NEM_DCFA_REPLY_SEQ_NUM) ||
+        (ch3_hdr->type == MPIDI_NEM_PKT_NETMOD &&
+         netmod_hdr->subtype == MPIDI_NEM_DCFA_PKT_LMT_GET_DONE)
         ? 1 : 0;
     dprintf("isendcontig,slack=%d,goahead=%d\n", slack, goahead);
 
-    if(
+    if (
 #ifdef DCFA_ONDEMAND
-       vc_dcfa->is_connected &&
+           vc_dcfa->is_connected &&
 #endif
-       (goahead || MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) &&
-       vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY - slack &&
-       MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY - slack &&
-       MPID_nem_dcfa_diff32(lsr_seq_num_head, *lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG - slack) {
+           (goahead || MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) &&
+           vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY - slack &&
+           MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY - slack &&
+           MPID_nem_dcfa_diff32(lsr_seq_num_head, *lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG - slack) {
 
         mpi_errno = MPID_nem_dcfa_iSendContig_core(vc, sreq, hdr, hdr_sz, data, data_sz);
-        if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+        if (mpi_errno) {
+            MPIU_ERR_POP(mpi_errno);
+        }
 
-    } else {
+    }
+    else {
 
         /* enqueue command into send_queue */
         dprintf("isendcontig,enqueuing,sendq=%d,ncom=%d,ncqe=%d,ldiff=%d(%d-%d),slack=%d\n",
@@ -456,70 +490,73 @@ int MPID_nem_dcfa_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
                 vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY - slack,
                 MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY - slack,
                 MPID_nem_dcfa_diff32(lsr_seq_num_head, *lsr_seq_num_tail),
-                vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail,
-                slack
-               );
+                vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail, slack);
 
         /* store required info. see MPIDI_CH3_iSendv in src/mpid/ch3/channels/nemesis/src/ch3_isendv.c */
         sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) hdr;
-        sreq->dev.iov[0].MPID_IOV_BUF = (char *)&sreq->dev.pending_pkt;
+        sreq->dev.iov[0].MPID_IOV_BUF = (char *) &sreq->dev.pending_pkt;
         sreq->dev.iov[0].MPID_IOV_LEN = hdr_sz;
-        sreq->dev.iov[1].MPID_IOV_BUF = (char *)data;
+        sreq->dev.iov[1].MPID_IOV_BUF = (char *) data;
         sreq->dev.iov[1].MPID_IOV_LEN = data_sz;
-        
+
         sreq->dev.iov_count = 2;
         sreq->dev.iov_offset = 0;
-        sreq->ch.noncontig = FALSE; /* used in send_progress */
+        sreq->ch.noncontig = FALSE;     /* used in send_progress */
         sreq->ch.vc = vc;
 
-        if(data_sz > 0) {
-            dprintf("isendcontig,hdr=%p,hdr_sz=%ld,data=%p,data_sz=%ld,*(sreq->dev.iov[1].MPID_IOV_BUF)=%08x,sz=%ld,sz=%ld\n", hdr, hdr_sz, data, data_sz, *((uint32_t*)sreq->dev.iov[1].MPID_IOV_BUF), sizeof(sreq->dev.pending_pkt), sizeof(MPIDI_CH3_Pkt_t));
+        if (data_sz > 0) {
+            dprintf
+                ("isendcontig,hdr=%p,hdr_sz=%ld,data=%p,data_sz=%ld,*(sreq->dev.iov[1].MPID_IOV_BUF)=%08x,sz=%ld,sz=%ld\n",
+                 hdr, hdr_sz, data, data_sz, *((uint32_t *) sreq->dev.iov[1].MPID_IOV_BUF),
+                 sizeof(sreq->dev.pending_pkt), sizeof(MPIDI_CH3_Pkt_t));
         }
 
         /* enqueue control message telling tail position of ring buffer for eager-send
-           at the head of software MPI command queue. We explain the reason. Consider this case.
-           rank-0 performs 64 eager-sends and 48 of them are enqueued.
-           rank-1 consumes 2 of them and send the control message.
-           rank-0 drains 2 commands from the command queue.
-           ...
-           rank-0 finds that head of ring buffer for receiving messages from rank-1 is
-              growing by the control message from rank-1 and try to send the control message,
-              but the command is queued at the tail.
-           rank-1 stops sending the control message to rank-1 because the ring buffer is full
-           rank-0 stops draining command queue.
-        */
-        dprintf("isendcontig,enqueuing,type=%d,\n", ((MPIDI_CH3_Pkt_t *)hdr)->type);
+         * at the head of software MPI command queue. We explain the reason. Consider this case.
+         * rank-0 performs 64 eager-sends and 48 of them are enqueued.
+         * rank-1 consumes 2 of them and send the control message.
+         * rank-0 drains 2 commands from the command queue.
+         * ...
+         * rank-0 finds that head of ring buffer for receiving messages from rank-1 is
+         * growing by the control message from rank-1 and try to send the control message,
+         * but the command is queued at the tail.
+         * rank-1 stops sending the control message to rank-1 because the ring buffer is full
+         * rank-0 stops draining command queue.
+         */
+        dprintf("isendcontig,enqueuing,type=%d,\n", ((MPIDI_CH3_Pkt_t *) hdr)->type);
 #if 0
-        if(((MPIDI_CH3_Pkt_t *)hdr)->type == MPIDI_NEM_DCFA_REPLY_SEQ_NUM) {
-            printf("enqueuing REPLY_SEQ_NUM\ %d->%d,%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, MPID_nem_dcfa_ncqe);
+        if (((MPIDI_CH3_Pkt_t *) hdr)->type == MPIDI_NEM_DCFA_REPLY_SEQ_NUM) {
+            printf("enqueuing REPLY_SEQ_NUM\ %d->%d,%d\n", MPID_nem_dcfa_myrank, vc->pg_rank,
+                   MPID_nem_dcfa_ncqe);
         }
-        //if(((MPIDI_CH3_Pkt_t *)hdr)->type == MPIDI_CH3_PKT_ACCUMULATE) {
+        //if (((MPIDI_CH3_Pkt_t *)hdr)->type == MPIDI_CH3_PKT_ACCUMULATE) {
         //printf("enqueuing ACCUMULATE\n");
         //}
-        if(((MPIDI_CH3_Pkt_t *)hdr)->type == MPIDI_CH3_PKT_GET_RESP) {
+        if (((MPIDI_CH3_Pkt_t *) hdr)->type == MPIDI_CH3_PKT_GET_RESP) {
             printf("enqueuing GET_RESP\n");
         }
-        if(((MPIDI_CH3_Pkt_t *)hdr)->type == MPIDI_CH3_PKT_GET) {
+        if (((MPIDI_CH3_Pkt_t *) hdr)->type == MPIDI_CH3_PKT_GET) {
             printf("enqueuing GET\n");
         }
-        if(((MPIDI_CH3_Pkt_t *)hdr)->type == MPIDI_CH3_PKT_PUT) {
+        if (((MPIDI_CH3_Pkt_t *) hdr)->type == MPIDI_CH3_PKT_PUT) {
             printf("enqueuing PUT\n");
         }
 #endif
-        if(((MPIDI_CH3_Pkt_t *)hdr)->type == MPIDI_NEM_PKT_LMT_DONE) {
+        if (((MPIDI_CH3_Pkt_t *) hdr)->type == MPIDI_NEM_PKT_LMT_DONE) {
             dprintf("isendcontig,enqueue,DONE\n");
         }
-        if(((MPIDI_CH3_Pkt_t *)hdr)->type == MPIDI_NEM_DCFA_REPLY_SEQ_NUM) {
+        if (((MPIDI_CH3_Pkt_t *) hdr)->type == MPIDI_NEM_DCFA_REPLY_SEQ_NUM) {
             dprintf("isendcontig,REPLY_SEQ_NUM,enqueue_at_head\n");
             MPID_nem_dcfa_sendq_enqueue_at_head(&vc_dcfa->sendq, sreq);
-        } else {
+        }
+        else {
             MPID_nem_dcfa_sendq_enqueue(&vc_dcfa->sendq, sreq);
         }
         /* we don't need to perform send_progress() here because
-           the events where RDMA-write-to buffer release is detected or release of IB command queue id detected happens
-           only after dcfa_poll is called. it's different than the case where write(2) is used */
+         * the events where RDMA-write-to buffer release is detected or release of IB command queue id detected happens
+         * only after dcfa_poll is called. it's different than the case where write(2) is used */
     }
-    
+
   fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_ISENDCONTIG);
     return mpi_errno;
@@ -549,17 +586,17 @@ int MPID_nem_dcfa_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, MPIDI_msg_sz_t hdr
 
     /* FIXME: avoid creating a request when not queued */
 
-    if(hdr&&((MPIDI_CH3_Pkt_t*)hdr)->type == MPIDI_CH3_PKT_GET) {
+    if (hdr && ((MPIDI_CH3_Pkt_t *) hdr)->type == MPIDI_CH3_PKT_GET) {
         //printf("istarctontig,MPIDI_CH3_PKT_GET,ref_count=%d\n", sreq->ref_count);
         /* sreq here is released by drain_scq, caller
-           request in MPIDI_CH3I_Recv_rma_msg is
-           released by PKT_GET_RESP, MPIDI_CH3I_RMAListComplete*/
+         * request in MPIDI_CH3I_Recv_rma_msg is
+         * released by PKT_GET_RESP, MPIDI_CH3I_RMAListComplete */
     }
 
     //tscs = MPID_nem_dcfa_rdtsc();
     sreq = MPID_Request_create();
-    MPIU_Assert (sreq != NULL);
-    MPIU_Object_set_ref (sreq, 2);
+    MPIU_Assert(sreq != NULL);
+    MPIU_Object_set_ref(sreq, 2);
     sreq->kind = MPID_REQUEST_SEND;
     sreq->dev.OnDataAvail = 0;
     //tsce = MPID_nem_dcfa_rdtsc(); printf("rc,%ld\n", tsce - tscs); // 124.15 cycles
@@ -568,18 +605,23 @@ int MPID_nem_dcfa_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, MPIDI_msg_sz_t hdr
     ibcom_errno = ibcom_sseq_num_get(vc_dcfa->sc->fd, &sseq_num);
     MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_sseq_num_get");
 
-    if(hdr) {
+    if (hdr) {
 
-        MPIDI_CH3_Pkt_t *pkt = (MPIDI_CH3_Pkt_t*)hdr;
-        MPIDI_CH3_Pkt_close_t * close_pkt = &pkt->close;
-        dprintf("isend(istartcontig),%d->%d,seq_num=%d,type=%d,ack=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, sseq_num, close_pkt->type, close_pkt->ack);
-    } else {
-        dprintf("isend(istartcontig),%d->%d,seq_num=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, sseq_num);
+        MPIDI_CH3_Pkt_t *pkt = (MPIDI_CH3_Pkt_t *) hdr;
+        MPIDI_CH3_Pkt_close_t *close_pkt = &pkt->close;
+        dprintf("isend(istartcontig),%d->%d,seq_num=%d,type=%d,ack=%d\n", MPID_nem_dcfa_myrank,
+                vc->pg_rank, sseq_num, close_pkt->type, close_pkt->ack);
+    }
+    else {
+        dprintf("isend(istartcontig),%d->%d,seq_num=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank,
+                sseq_num);
     }
 #endif
 
     mpi_errno = MPID_nem_dcfa_iSendContig(vc, sreq, hdr, hdr_sz, data, data_sz);
-    if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+    if (mpi_errno) {
+        MPIU_ERR_POP(mpi_errno);
+    }
 
   fn_exit:
     *sreq_ptr = sreq;
@@ -594,7 +636,9 @@ int MPID_nem_dcfa_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, MPIDI_msg_sz_t hdr
 #define FUNCNAME MPID_nem_dcfa_SendNoncontig_core
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_SendNoncontig_core(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr, MPIDI_msg_sz_t hdr_sz) {
+int MPID_nem_dcfa_SendNoncontig_core(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
+                                     MPIDI_msg_sz_t hdr_sz)
+{
     int mpi_errno = MPI_SUCCESS;
     int ibcom_errno;
     MPIDI_msg_sz_t last;
@@ -605,85 +649,100 @@ int MPID_nem_dcfa_SendNoncontig_core(MPIDI_VC_t * vc, MPID_Request * sreq, void
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_SENDNONCONTIG_CORE);
 
     MPIU_Assert(sreq->dev.segment_first == 0);
-    last = sreq->dev.segment_size; /* segment_size is byte offset */
+    last = sreq->dev.segment_size;      /* segment_size is byte offset */
     if (last > 0) {
-        REQ_FIELD(sreq, lmt_pack_buf) = MPIU_Malloc((size_t)sreq->dev.segment_size);
-        MPIU_ERR_CHKANDJUMP(!REQ_FIELD(sreq, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER, "**outofmemory");
-        MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, (char *)REQ_FIELD(sreq, lmt_pack_buf));
+        REQ_FIELD(sreq, lmt_pack_buf) = MPIU_Malloc((size_t) sreq->dev.segment_size);
+        MPIU_ERR_CHKANDJUMP(!REQ_FIELD(sreq, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER,
+                            "**outofmemory");
+        MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last,
+                          (char *) REQ_FIELD(sreq, lmt_pack_buf));
         MPIU_Assert(last == sreq->dev.segment_size);
     }
-        
+
     /* increment cc because PktHandler_EagerSyncAck, ssend.c, drain_scq decrement it */
-    if(((MPIDI_CH3_Pkt_t*)hdr)->type == MPIDI_CH3_PKT_EAGER_SYNC_SEND) {
+    if (((MPIDI_CH3_Pkt_t *) hdr)->type == MPIDI_CH3_PKT_EAGER_SYNC_SEND) {
         MPIR_Request_add_ref(sreq);
     }
 
     ibcom_errno = ibcom_sseq_num_get(vc_dcfa->sc->fd, &sseq_num);
     MPIU_ERR_CHKANDJUMP(ibcom_errno != 0, mpi_errno, MPI_ERR_OTHER, "**ibcom_sseq_num_get");
-        
+
     int copied;
-    dprintf("sendnoncontig_core,isend,%d->%d,seq_num=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, sseq_num);
-    ibcom_errno = ibcom_isend(vc_dcfa->sc->fd, (uint64_t)sreq, NULL, 0, hdr, sizeof(MPIDI_CH3_Pkt_t), (void*)REQ_FIELD(sreq, lmt_pack_buf), (int)last, &copied);
+    dprintf("sendnoncontig_core,isend,%d->%d,seq_num=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank,
+            sseq_num);
+    ibcom_errno =
+        ibcom_isend(vc_dcfa->sc->fd, (uint64_t) sreq, NULL, 0, hdr, sizeof(MPIDI_CH3_Pkt_t),
+                    (void *) REQ_FIELD(sreq, lmt_pack_buf), (int) last, &copied);
     MPIU_ERR_CHKANDJUMP(ibcom_errno != 0, mpi_errno, MPI_ERR_OTHER, "**ibcom_isend");
     MPID_nem_dcfa_ncqe += 1;
     dprintf("sendnoncontig_core,ncqe=%d\n", MPID_nem_dcfa_ncqe);
 
     vc_dcfa->pending_sends += 1;
-    sreq->ch.vc = vc; /* used in poll */
-
-#if 0 /* see contig */
-    if(copied) { /* skip poll scq */
-           int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
-           
-           (VC_FIELD(sreq->ch.vc, pending_sends)) -= 1;
-           
-           /* as in the template */
-           reqFn = sreq->dev.OnDataAvail;
-           if (!reqFn){
-               /* MPID_Request_release is called in 
-                  MPI_Wait (in src/mpi/pt2pt/wait.c)
-                    MPIR_Wait_impl (in src/mpi/pt2pt/wait.c)
-                      MPIR_Request_complete (in /src/mpi/pt2pt/mpir_request.c) */
-               int incomplete;
-               MPIDI_CH3U_Request_decrement_cc(sreq, &incomplete);
-               if(!incomplete) { MPIDI_CH3_Progress_signal_completion(); }
-               //dprintf("isendcontig_core,cc_ptr=%d\n", *(sreq->cc_ptr));
-               dprintf("sendcontig_core,complete,req=%p,cc incremented to %d\n", sreq, MPIDI_CH3I_progress_completion_count.v);
-               MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
-           } else {
-               MPIDI_VC_t *vc = sreq->ch.vc;
-               int complete = 0;
-               mpi_errno = reqFn(vc, sreq, &complete);
-               if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-               /* not-completed case is not implemented */
-               MPIU_Assert(complete == TRUE);
-               MPIU_Assert(0); /* decrement ref_count and free sreq causes problem */
-           }
-    } else {
-        MPID_nem_dcfa_ncqe_nces += 1; /* it has different meaning, counting non-copied eager-short */
+    sreq->ch.vc = vc;   /* used in poll */
+
+#if 0   /* see contig */
+    if (copied) {       /* skip poll scq */
+        int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);
+
+        (VC_FIELD(sreq->ch.vc, pending_sends)) -= 1;
+
+        /* as in the template */
+        reqFn = sreq->dev.OnDataAvail;
+        if (!reqFn) {
+            /* MPID_Request_release is called in
+             * MPI_Wait (in src/mpi/pt2pt/wait.c)
+             * MPIR_Wait_impl (in src/mpi/pt2pt/wait.c)
+             * MPIR_Request_complete (in /src/mpi/pt2pt/mpir_request.c) */
+            int incomplete;
+            MPIDI_CH3U_Request_decrement_cc(sreq, &incomplete);
+            if (!incomplete) {
+                MPIDI_CH3_Progress_signal_completion();
+            }
+            //dprintf("isendcontig_core,cc_ptr=%d\n", *(sreq->cc_ptr));
+            dprintf("sendcontig_core,complete,req=%p,cc incremented to %d\n", sreq,
+                    MPIDI_CH3I_progress_completion_count.v);
+            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
+        }
+        else {
+            MPIDI_VC_t *vc = sreq->ch.vc;
+            int complete = 0;
+            mpi_errno = reqFn(vc, sreq, &complete);
+            if (mpi_errno)
+                MPIU_ERR_POP(mpi_errno);
+            /* not-completed case is not implemented */
+            MPIU_Assert(complete == TRUE);
+            MPIU_Assert(0);     /* decrement ref_count and free sreq causes problem */
+        }
+    }
+    else {
+        MPID_nem_dcfa_ncqe_nces += 1;   /* it has different meaning, counting non-copied eager-short */
     }
 #else
-        MPID_nem_dcfa_ncqe_nces += 1; /* it has different meaning, counting non-copied eager-short */
+    MPID_nem_dcfa_ncqe_nces += 1;       /* it has different meaning, counting non-copied eager-short */
 #endif
 
 #ifndef DISABLE_VAR_OCC_NOTIFY_RATE
 #if 1
-           //dprintf("isendcontig,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);  
+    //dprintf("isendcontig,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
     int *notify_rstate;
     ibcom_errno = ibcom_rdmabuf_occupancy_notify_rstate_get(vc_dcfa->sc->fd, &notify_rstate);
-    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rdmabuf_occupancy_notify_rstate_get"); 
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER,
+                        "**ibcom_rdmabuf_occupancy_notify_rstate_get");
 
-    dprintf("isendcontig,head=%d,tail=%d,hw=%d\n", vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail, IBCOM_RDMABUF_HIGH_WATER_MARK);
+    dprintf("isendcontig,head=%d,tail=%d,hw=%d\n", vc_dcfa->ibcom->sseq_num,
+            vc_dcfa->ibcom->lsr_seq_num_tail, IBCOM_RDMABUF_HIGH_WATER_MARK);
     /* if the number of slots in RMDA-write-to buffer have hit the high water-mark */
-    if(*notify_rstate == IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_LW &&
-       MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) > IBCOM_RDMABUF_HIGH_WATER_MARK) {
+    if (*notify_rstate == IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_LW &&
+        MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num,
+                             vc_dcfa->ibcom->lsr_seq_num_tail) > IBCOM_RDMABUF_HIGH_WATER_MARK) {
         dprintf("changing notify_rstate,id=%d\n", vc_dcfa->ibcom->sseq_num);
         /* remember remote notifying policy so that local can know when to change remote policy back to LW */
         *notify_rstate = IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW;
         /* change remote notifying policy of RDMA-write-to buf occupancy info */
-        MPID_nem_dcfa_send_change_rdmabuf_occupancy_notify_state(vc, IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW);
+        MPID_nem_dcfa_send_change_rdmabuf_occupancy_notify_state(vc,
+                                                                 IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW);
     }
-    //dprintf("isendcontig_core,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);  
+    //dprintf("isendcontig_core,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);
 #endif
 #endif
   fn_exit:
@@ -698,7 +757,9 @@ int MPID_nem_dcfa_SendNoncontig_core(MPIDI_VC_t * vc, MPID_Request * sreq, void
 #define FUNCNAME MPID_nem_dcfa_SendNoncontig
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr, MPIDI_msg_sz_t hdr_sz) {
+int MPID_nem_dcfa_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
+                                MPIDI_msg_sz_t hdr_sz)
+{
     int mpi_errno = MPI_SUCCESS;
     int ibcom_errno;
     MPIDI_msg_sz_t last;
@@ -710,40 +771,46 @@ int MPID_nem_dcfa_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
     MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
     MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "dcfa_SendNoncontig");
 
-    dprintf("sendnoncontig,%d->%d,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+    dprintf("sendnoncontig,%d->%d,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_myrank,
+            vc->pg_rank, MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom,
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
 #if 0
     /* aggressively perform drain_scq */
     /* try to clear the road blocks, i.e. ncom, ncqe */
-    if(vc_dcfa->ibcom->ncom >= /*IBCOM_MAX_SQ_CAPACITY*/IBCOM_MAX_SQ_HEIGHT_DRAIN ||
-       MPID_nem_dcfa_ncqe >= /*IBCOM_MAX_CQ_CAPACITY*/IBCOM_MAX_CQ_HEIGHT_DRAIN) {
+    if (vc_dcfa->ibcom->ncom >= /*IBCOM_MAX_SQ_CAPACITY */ IBCOM_MAX_SQ_HEIGHT_DRAIN ||
+        MPID_nem_dcfa_ncqe >= /*IBCOM_MAX_CQ_CAPACITY */ IBCOM_MAX_CQ_HEIGHT_DRAIN) {
         //printf("isendcontig,kick drain_scq\n");
-        ibcom_errno = MPID_nem_dcfa_drain_scq(1); /* set 1st arg to one which means asking it for not calling send_progress because it recursively call isendcontig_core */
+        ibcom_errno = MPID_nem_dcfa_drain_scq(1);       /* set 1st arg to one which means asking it for not calling send_progress because it recursively call isendcontig_core */
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
     }
 #endif
     /* set it for drain_scq */
-	MPIDI_Request_set_msg_type( sreq, MPIDI_REQUEST_EAGER_MSG );
+    MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG);
 
     /* if IB command overflow-queue is empty AND local IB command queue isn't full AND remote RDMA-write-to buf isn't getting overrun */
     /* set it for drain_scq */
-    int slack = IBCOM_AMT_SLACK; /* slack for control packet bringing sequence number */
-    if(MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq) &&
-       vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY - slack &&
-       MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY - slack &&
-       MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG - slack) {
+    int slack = IBCOM_AMT_SLACK;        /* slack for control packet bringing sequence number */
+    if (MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq) &&
+        vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY - slack &&
+        MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY - slack &&
+        MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num,
+                             vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG - slack) {
 
         mpi_errno = MPID_nem_dcfa_SendNoncontig_core(vc, sreq, hdr, hdr_sz);
-        if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+        if (mpi_errno) {
+            MPIU_ERR_POP(mpi_errno);
+        }
 
-    } else {
+    }
+    else {
         /* enqueue command into send_queue */
         dprintf("sendnoncontig, enqueuing");
 
         /* store required info. see MPIDI_CH3_iSendv in src/mpid/ch3/channels/nemesis/src/ch3_isendv.c */
         sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) hdr;
-        sreq->dev.iov[0].MPID_IOV_BUF = (char *)&sreq->dev.pending_pkt;
+        sreq->dev.iov[0].MPID_IOV_BUF = (char *) &sreq->dev.pending_pkt;
         sreq->dev.iov[0].MPID_IOV_LEN = hdr_sz;
-        
+
         sreq->dev.iov_count = 1;
         sreq->dev.iov_offset = 0;
         sreq->ch.noncontig = TRUE;
@@ -764,7 +831,7 @@ int MPID_nem_dcfa_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
 #define FUNCNAME MPID_nem_dcfa_send_progress
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_send_progress(MPID_nem_dcfa_vc_area *vc_dcfa)
+int MPID_nem_dcfa_send_progress(MPID_nem_dcfa_vc_area * vc_dcfa)
 {
     int mpi_errno = MPI_SUCCESS;
     int ibcom_errno;
@@ -780,177 +847,241 @@ int MPID_nem_dcfa_send_progress(MPID_nem_dcfa_vc_area *vc_dcfa)
     //dprintf("send_progress,enter\n");
 
     /* prevent a call path send_progress -> drain_scq -> send_progress */
-    if(entered_send_progress) { goto fn_exit; }
+    if (entered_send_progress) {
+        goto fn_exit;
+    }
     entered_send_progress = 1;
 
     sreq = MPID_nem_dcfa_sendq_head(vc_dcfa->sendq);
-    if(sreq) {
-       prev_sreq = NULL;
-       do {
+    if (sreq) {
+        prev_sreq = NULL;
+        do {
 #if 0
-    /* aggressively perform drain_scq */
-    /* try to clear the road blocks, i.e. ncom, ncqe */
-    if(vc_dcfa->ibcom->ncom >= /*IBCOM_MAX_SQ_CAPACITY*/IBCOM_MAX_SQ_HEIGHT_DRAIN ||
-       MPID_nem_dcfa_ncqe >= /*IBCOM_MAX_CQ_CAPACITY*/IBCOM_MAX_CQ_HEIGHT_DRAIN) {
-        dprintf("send_progress,kick drain_scq\n");
-        ibcom_errno = MPID_nem_dcfa_drain_scq(1); /* set 1st arg to one which means asking it for not calling send_progress because it recursively call isendcontig_core */
-        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
-    }
+            /* aggressively perform drain_scq */
+            /* try to clear the road blocks, i.e. ncom, ncqe */
+            if (vc_dcfa->ibcom->ncom >= /*IBCOM_MAX_SQ_CAPACITY */ IBCOM_MAX_SQ_HEIGHT_DRAIN ||
+                MPID_nem_dcfa_ncqe >= /*IBCOM_MAX_CQ_CAPACITY */ IBCOM_MAX_CQ_HEIGHT_DRAIN) {
+                dprintf("send_progress,kick drain_scq\n");
+                ibcom_errno = MPID_nem_dcfa_drain_scq(1);       /* set 1st arg to one which means asking it for not calling send_progress because it recursively call isendcontig_core */
+                MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER,
+                                    "**MPID_nem_dcfa_drain_scq");
+            }
 #endif
-    msg_type = MPIDI_Request_get_msg_type(sreq);
-
-           MPIDI_CH3_Pkt_t* ch3_hdr = (MPIDI_CH3_Pkt_t *)sreq->dev.iov[0].MPID_IOV_BUF;
-           MPID_nem_pkt_netmod_t* netmod_hdr = (MPID_nem_pkt_netmod_t *)sreq->dev.iov[0].MPID_IOV_BUF;
-           int slack = (msg_type == MPIDI_REQUEST_EAGER_MSG) ? /* guard from RDMA-read or RDMA-write */
-               (
-                (
-                 (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD || netmod_hdr->subtype != MPIDI_NEM_DCFA_REQ_SEQ_NUM) &&
-                 (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD || netmod_hdr->subtype != MPIDI_NEM_DCFA_REPLY_SEQ_NUM) &&
-                 (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD || netmod_hdr->subtype != MPIDI_NEM_DCFA_PKT_LMT_GET_DONE) &&
-                 ch3_hdr->type != MPIDI_NEM_PKT_LMT_RTS && 
-                 ch3_hdr->type != MPIDI_NEM_PKT_LMT_CTS
-                 ) ? IBCOM_AMT_SLACK : 0
-                ) : IBCOM_AMT_SLACK;
-           if(vc_dcfa->ibcom->ncom >= IBCOM_MAX_SQ_CAPACITY - slack ||
-              MPID_nem_dcfa_ncqe >= IBCOM_MAX_CQ_CAPACITY - slack ||
-              MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) >= IBCOM_RDMABUF_NSEG - slack) {
-               break; 
-           }
-
-
-           if(vc_dcfa != MPID_nem_dcfa_debug_current_vc_dcfa) {
-               dprintf("send_progress,vc_dcfa != MPID_nem_dcfa_debug_current_vc_dcfa\n");
-           }
-           dprintf("send_progress,kind=%d,msg_type=%d\n", sreq->kind, msg_type);
-           if(msg_type == MPIDI_REQUEST_EAGER_MSG) {
-               dprintf("send_progress,type=%d\n", ch3_hdr->type);
-           }
-           dprintf("send_progress,%d->%d,rdiff=%d(%d-%d),ldiff=%d(%d-%d),slack=%d\n", MPID_nem_dcfa_myrank, sreq->ch.vc->pg_rank, 
-                   MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent),
-                   vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent,
-                   MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail),
-                   vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail,
-                   slack
-                   );
-    if(sreq->kind == MPID_REQUEST_SEND && msg_type == MPIDI_REQUEST_EAGER_MSG) {
-        if(!sreq->ch.noncontig) {
-            dprintf("send_progress,contig,type=%d,sseq_num=%d,MPIDI_NEM_PKT_LMT_RTS=%d,MPIDI_NEM_DCFA_PKT_LMT_GET_DONE=%d\n", ch3_hdr->type, vc_dcfa->ibcom->sseq_num, MPIDI_NEM_PKT_LMT_RTS, MPIDI_NEM_DCFA_PKT_LMT_GET_DONE);
-            if(sreq->dev.iov[1].MPID_IOV_LEN > 0) { dprintf("send_progress,send,contig,sreq->dev.iov[1].MPID_IOV_BUF)=%p,*(sreq->dev.iov[1].MPID_IOV_BUF)=%08x\n", sreq->dev.iov[1].MPID_IOV_BUF, *((uint32_t*)sreq->dev.iov[1].MPID_IOV_BUF)); }
-            MPIU_Assert(sreq->dev.iov_count > 0);
-            
-            switch(ch3_hdr->type) {
-            /* send current rsr_seq_num_tail because message from target to initiator
-               might have happened while being queued */
-            case MPIDI_NEM_PKT_LMT_RTS: {
-                MPID_nem_dcfa_lmt_cookie_t* s_cookie_buf = (MPID_nem_dcfa_lmt_cookie_t*)sreq->dev.iov[1].MPID_IOV_BUF;
-                dprintf("send_progress,MPIDI_NEM_PKT_LMT_RTS,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
-                /* embed RDMA-write-to buffer occupancy information */
-                s_cookie_buf->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail;
-                /* remember the last one sent */
-                vc_dcfa->ibcom->rsr_seq_num_tail_last_sent = vc_dcfa->ibcom->rsr_seq_num_tail;
-                break; }
-                
-            case MPIDI_NEM_PKT_LMT_CTS: {
-                MPID_nem_dcfa_lmt_cookie_t* s_cookie_buf = (MPID_nem_dcfa_lmt_cookie_t*)sreq->dev.iov[1].MPID_IOV_BUF;
-                dprintf("send_progress,MPIDI_NEM_PKT_LMT_CTS,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
-                /* embed RDMA-write-to buffer occupancy information */
-                s_cookie_buf->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail;
-                /* remember the last one sent */
-                vc_dcfa->ibcom->rsr_seq_num_tail_last_sent = vc_dcfa->ibcom->rsr_seq_num_tail;
-                break; }
-
-            default:;
+            msg_type = MPIDI_Request_get_msg_type(sreq);
+
+            MPIDI_CH3_Pkt_t *ch3_hdr = (MPIDI_CH3_Pkt_t *) sreq->dev.iov[0].MPID_IOV_BUF;
+            MPID_nem_pkt_netmod_t *netmod_hdr =
+                (MPID_nem_pkt_netmod_t *) sreq->dev.iov[0].MPID_IOV_BUF;
+            int slack = (msg_type == MPIDI_REQUEST_EAGER_MSG) ? /* guard from RDMA-read or RDMA-write */
+                (((ch3_hdr->type != MPIDI_NEM_PKT_NETMOD ||
+                   netmod_hdr->subtype != MPIDI_NEM_DCFA_REQ_SEQ_NUM) &&
+                  (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD ||
+                   netmod_hdr->subtype != MPIDI_NEM_DCFA_REPLY_SEQ_NUM) &&
+                  (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD ||
+                   netmod_hdr->subtype != MPIDI_NEM_DCFA_PKT_LMT_GET_DONE) &&
+                  ch3_hdr->type != MPIDI_NEM_PKT_LMT_RTS &&
+                  ch3_hdr->type != MPIDI_NEM_PKT_LMT_CTS) ? IBCOM_AMT_SLACK : 0) : IBCOM_AMT_SLACK;
+            if (vc_dcfa->ibcom->ncom >= IBCOM_MAX_SQ_CAPACITY - slack ||
+                MPID_nem_dcfa_ncqe >= IBCOM_MAX_CQ_CAPACITY - slack ||
+                MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num,
+                                     vc_dcfa->ibcom->lsr_seq_num_tail) >=
+                IBCOM_RDMABUF_NSEG - slack) {
+                break;
             }
 
-            if(ch3_hdr->type == MPIDI_NEM_PKT_NETMOD) {
-                switch(netmod_hdr->subtype) {
-                /* send current rsr_seq_num_tail because message from target to initiator
-                   might have happened while being queued */
-                case MPIDI_NEM_DCFA_PKT_LMT_GET_DONE: {
-                    MPID_nem_dcfa_pkt_lmt_get_done_t* _done_pkt = (MPID_nem_dcfa_pkt_lmt_get_done_t*)sreq->dev.iov[0].MPID_IOV_BUF;
-                    dprintf("send_progress,MPIDI_NEM_DCFA_PKT_LMT_GET_DONE,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
-                    /* embed SR occupancy information */
-                    _done_pkt->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail;
-                    /* remember the last one sent */
-                    vc_dcfa->ibcom->rsr_seq_num_tail_last_sent = vc_dcfa->ibcom->rsr_seq_num_tail;
-                    break; }
-                case MPIDI_NEM_DCFA_REPLY_SEQ_NUM: {
-                    MPID_nem_dcfa_pkt_reply_seq_num_t* _pkt = (MPID_nem_dcfa_pkt_reply_seq_num_t*)sreq->dev.iov[0].MPID_IOV_BUF;
-                    dprintf("send_progress,MPIDI_NEM_DCFA_REPLY_SEQ_NUM,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
-                    /* embed SR occupancy information */
-                    _pkt->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail;
-                    /* remember the last one sent */
-                    vc_dcfa->ibcom->rsr_seq_num_tail_last_sent = vc_dcfa->ibcom->rsr_seq_num_tail;
-                    break; }
-                    
-                default:;
+
+            if (vc_dcfa != MPID_nem_dcfa_debug_current_vc_dcfa) {
+                dprintf("send_progress,vc_dcfa != MPID_nem_dcfa_debug_current_vc_dcfa\n");
+            }
+            dprintf("send_progress,kind=%d,msg_type=%d\n", sreq->kind, msg_type);
+            if (msg_type == MPIDI_REQUEST_EAGER_MSG) {
+                dprintf("send_progress,type=%d\n", ch3_hdr->type);
+            }
+            dprintf("send_progress,%d->%d,rdiff=%d(%d-%d),ldiff=%d(%d-%d),slack=%d\n",
+                    MPID_nem_dcfa_myrank, sreq->ch.vc->pg_rank,
+                    MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail,
+                                         vc_dcfa->ibcom->rsr_seq_num_tail_last_sent),
+                    vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent,
+                    MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num,
+                                         vc_dcfa->ibcom->lsr_seq_num_tail),
+                    vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail, slack);
+            if (sreq->kind == MPID_REQUEST_SEND && msg_type == MPIDI_REQUEST_EAGER_MSG) {
+                if (!sreq->ch.noncontig) {
+                    dprintf
+                        ("send_progress,contig,type=%d,sseq_num=%d,MPIDI_NEM_PKT_LMT_RTS=%d,MPIDI_NEM_DCFA_PKT_LMT_GET_DONE=%d\n",
+                         ch3_hdr->type, vc_dcfa->ibcom->sseq_num, MPIDI_NEM_PKT_LMT_RTS,
+                         MPIDI_NEM_DCFA_PKT_LMT_GET_DONE);
+                    if (sreq->dev.iov[1].MPID_IOV_LEN > 0) {
+                        dprintf
+                            ("send_progress,send,contig,sreq->dev.iov[1].MPID_IOV_BUF)=%p,*(sreq->dev.iov[1].MPID_IOV_BUF)=%08x\n",
+                             sreq->dev.iov[1].MPID_IOV_BUF,
+                             *((uint32_t *) sreq->dev.iov[1].MPID_IOV_BUF));
+                    }
+                    MPIU_Assert(sreq->dev.iov_count > 0);
+
+                    switch (ch3_hdr->type) {
+                        /* send current rsr_seq_num_tail because message from target to initiator
+                         * might have happened while being queued */
+                    case MPIDI_NEM_PKT_LMT_RTS:{
+                            MPID_nem_dcfa_lmt_cookie_t *s_cookie_buf =
+                                (MPID_nem_dcfa_lmt_cookie_t *) sreq->dev.iov[1].MPID_IOV_BUF;
+                            dprintf("send_progress,MPIDI_NEM_PKT_LMT_RTS,rsr_seq_num_tail=%d\n",
+                                    vc_dcfa->ibcom->rsr_seq_num_tail);
+                            /* embed RDMA-write-to buffer occupancy information */
+                            s_cookie_buf->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail;
+                            /* remember the last one sent */
+                            vc_dcfa->ibcom->rsr_seq_num_tail_last_sent =
+                                vc_dcfa->ibcom->rsr_seq_num_tail;
+                            break;
+                        }
+
+                    case MPIDI_NEM_PKT_LMT_CTS:{
+                            MPID_nem_dcfa_lmt_cookie_t *s_cookie_buf =
+                                (MPID_nem_dcfa_lmt_cookie_t *) sreq->dev.iov[1].MPID_IOV_BUF;
+                            dprintf("send_progress,MPIDI_NEM_PKT_LMT_CTS,rsr_seq_num_tail=%d\n",
+                                    vc_dcfa->ibcom->rsr_seq_num_tail);
+                            /* embed RDMA-write-to buffer occupancy information */
+                            s_cookie_buf->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail;
+                            /* remember the last one sent */
+                            vc_dcfa->ibcom->rsr_seq_num_tail_last_sent =
+                                vc_dcfa->ibcom->rsr_seq_num_tail;
+                            break;
+                        }
+
+                    default:;
+                    }
+
+                    if (ch3_hdr->type == MPIDI_NEM_PKT_NETMOD) {
+                        switch (netmod_hdr->subtype) {
+                            /* send current rsr_seq_num_tail because message from target to initiator
+                             * might have happened while being queued */
+                        case MPIDI_NEM_DCFA_PKT_LMT_GET_DONE:{
+                                MPID_nem_dcfa_pkt_lmt_get_done_t *_done_pkt =
+                                    (MPID_nem_dcfa_pkt_lmt_get_done_t *) sreq->dev.iov[0].
+                                    MPID_IOV_BUF;
+                                dprintf
+                                    ("send_progress,MPIDI_NEM_DCFA_PKT_LMT_GET_DONE,rsr_seq_num_tail=%d\n",
+                                     vc_dcfa->ibcom->rsr_seq_num_tail);
+                                /* embed SR occupancy information */
+                                _done_pkt->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail;
+                                /* remember the last one sent */
+                                vc_dcfa->ibcom->rsr_seq_num_tail_last_sent =
+                                    vc_dcfa->ibcom->rsr_seq_num_tail;
+                                break;
+                            }
+                        case MPIDI_NEM_DCFA_REPLY_SEQ_NUM:{
+                                MPID_nem_dcfa_pkt_reply_seq_num_t *_pkt =
+                                    (MPID_nem_dcfa_pkt_reply_seq_num_t *) sreq->dev.iov[0].
+                                    MPID_IOV_BUF;
+                                dprintf
+                                    ("send_progress,MPIDI_NEM_DCFA_REPLY_SEQ_NUM,rsr_seq_num_tail=%d\n",
+                                     vc_dcfa->ibcom->rsr_seq_num_tail);
+                                /* embed SR occupancy information */
+                                _pkt->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail;
+                                /* remember the last one sent */
+                                vc_dcfa->ibcom->rsr_seq_num_tail_last_sent =
+                                    vc_dcfa->ibcom->rsr_seq_num_tail;
+                                break;
+                            }
+
+                        default:;
+                        }
+                    }
+
+
+                    mpi_errno =
+                        MPID_nem_dcfa_iSendContig_core(sreq->ch.vc, sreq,
+                                                       sreq->dev.iov[0].MPID_IOV_BUF,
+                                                       sreq->dev.iov[0].MPID_IOV_LEN,
+                                                       sreq->dev.iov[1].MPID_IOV_BUF,
+                                                       sreq->dev.iov[1].MPID_IOV_LEN);
+                    if (mpi_errno) {
+                        MPIU_ERR_POP(mpi_errno);
+                    }
+                }
+                else {
+                    dprintf("send_progress,send,noncontig\n");
+                    mpi_errno =
+                        MPID_nem_dcfa_SendNoncontig_core(sreq->ch.vc, sreq,
+                                                         sreq->dev.iov[0].MPID_IOV_BUF,
+                                                         sreq->dev.iov[0].MPID_IOV_LEN);
+                    if (mpi_errno) {
+                        MPIU_ERR_POP(mpi_errno);
+                    }
                 }
             }
+            else if (sreq->kind == MPID_REQUEST_RECV && msg_type == MPIDI_REQUEST_RNDV_MSG) {
+
+                dprintf("send_progress,kick lmt_start_recv_core,prev=%p,next=%p\n", prev_sreq,
+                        MPID_nem_dcfa_sendq_next(sreq));
+                mpi_errno =
+                    MPID_nem_dcfa_lmt_start_recv_core(sreq, REQ_FIELD(sreq, lmt_raddr),
+                                                      REQ_FIELD(sreq, lmt_rkey), REQ_FIELD(sreq,
+                                                                                           lmt_write_to_buf));
+                if (mpi_errno) {
+                    MPIU_ERR_POP(mpi_errno);
+                }
+            }
+            else if (sreq->kind == MPID_REQUEST_SEND && msg_type == MPIDI_REQUEST_RNDV_MSG) {
+            }
+            else {
+                dprintf("send_progress,unknown sreq->type=%d,msg_type=%d\n", sreq->kind, msg_type);
+                assert(0);
+                MPIU_ERR_INTERNALANDJUMP(mpi_errno, "send_progress,unknown type");
+            }
 
 
-            mpi_errno = MPID_nem_dcfa_iSendContig_core(sreq->ch.vc, sreq, sreq->dev.iov[0].MPID_IOV_BUF, sreq->dev.iov[0].MPID_IOV_LEN, sreq->dev.iov[1].MPID_IOV_BUF, sreq->dev.iov[1].MPID_IOV_LEN);
-            if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-        } else {
-            dprintf("send_progress,send,noncontig\n");
-            mpi_errno = MPID_nem_dcfa_SendNoncontig_core(sreq->ch.vc, sreq, sreq->dev.iov[0].MPID_IOV_BUF, sreq->dev.iov[0].MPID_IOV_LEN);
-            if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-        }
-    } else if(sreq->kind == MPID_REQUEST_RECV && msg_type == MPIDI_REQUEST_RNDV_MSG) {
-
-            dprintf("send_progress,kick lmt_start_recv_core,prev=%p,next=%p\n", prev_sreq, MPID_nem_dcfa_sendq_next(sreq));
-            mpi_errno = MPID_nem_dcfa_lmt_start_recv_core(sreq, REQ_FIELD(sreq, lmt_raddr), REQ_FIELD(sreq, lmt_rkey), REQ_FIELD(sreq, lmt_write_to_buf));
-            if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-    } else if(sreq->kind == MPID_REQUEST_SEND && msg_type == MPIDI_REQUEST_RNDV_MSG) {
-    } else {
-        dprintf("send_progress,unknown sreq->type=%d,msg_type=%d\n", sreq->kind, msg_type);
-        assert(0);
-        MPIU_ERR_INTERNALANDJUMP(mpi_errno, "send_progress,unknown type");
-    }
-
-
-           /* unlink sreq */
-           if(prev_sreq != NULL) {
-               MPID_nem_dcfa_sendq_next(prev_sreq) = MPID_nem_dcfa_sendq_next(sreq);
-           } else {
-               MPID_nem_dcfa_sendq_head(vc_dcfa->sendq) = MPID_nem_dcfa_sendq_next(sreq);
-           }
-           if(MPID_nem_dcfa_sendq_next(sreq) == NULL) { vc_dcfa->sendq.tail = prev_sreq; }
+            /* unlink sreq */
+            if (prev_sreq != NULL) {
+                MPID_nem_dcfa_sendq_next(prev_sreq) = MPID_nem_dcfa_sendq_next(sreq);
+            }
+            else {
+                MPID_nem_dcfa_sendq_head(vc_dcfa->sendq) = MPID_nem_dcfa_sendq_next(sreq);
+            }
+            if (MPID_nem_dcfa_sendq_next(sreq) == NULL) {
+                vc_dcfa->sendq.tail = prev_sreq;
+            }
 
-           /* save sreq->dev.next (and sreq) because decrementing reference-counter might free sreq */
-           MPID_Request *tmp_sreq = sreq;
-           sreq = MPID_nem_dcfa_sendq_next(sreq);
-           goto next_unlinked;
-       next:
-           prev_sreq = sreq;
-           sreq = MPID_nem_dcfa_sendq_next(sreq);
-       next_unlinked:;
-       } while(sreq);
+            /* save sreq->dev.next (and sreq) because decrementing reference-counter might free sreq */
+            MPID_Request *tmp_sreq = sreq;
+            sreq = MPID_nem_dcfa_sendq_next(sreq);
+            goto next_unlinked;
+          next:
+            prev_sreq = sreq;
+            sreq = MPID_nem_dcfa_sendq_next(sreq);
+          next_unlinked:;
+        } while (sreq);
     }
 
- out:
+  out:
 
     //dprintf("send_progress,exit,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
 
- fn_exit:
+  fn_exit:
     entered_send_progress = 0;
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_SEND_PROGRESS);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 
 #ifdef DCFA_ONDEMAND
-int MPID_nem_dcfa_cm_send_core(int rank, MPID_nem_dcfa_cm_cmd_t* cmd) {
-    IbCom* ibcom_scratch_pad;
-    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_scratch_pad_fds[rank], &ibcom_scratch_pad); 
+int MPID_nem_dcfa_cm_send_core(int rank, MPID_nem_dcfa_cm_cmd_t * cmd)
+{
+    IbCom *ibcom_scratch_pad;
+    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_scratch_pad_fds[rank], &ibcom_scratch_pad);
     MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_obtain_pointer");
 
-    if(MPID_nem_dcfa_ncqe_scratch_pad >= IBCOM_MAX_SQ_CAPACITY || ibcom_scratch_pad->ncom_scratch_pad >= IBCOM_MAX_CQ_CAPACITY) {
+    if (MPID_nem_dcfa_ncqe_scratch_pad >= IBCOM_MAX_SQ_CAPACITY ||
+        ibcom_scratch_pad->ncom_scratch_pad >= IBCOM_MAX_CQ_CAPACITY) {
         mpi_errno = MPID_nem_dcfa_drain_scq_scratch_pad();
-        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq_scratch_pad");
+        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER,
+                            "**MPID_nem_dcfa_drain_scq_scratch_pad");
     }
 
-    ibcom_errno = ibcom_put_scratch_pad(MPID_nem_dcfa_scratch_pad_fds[rank], (uint64_t)ibcom_scratch_pad, sizeof(uint32_t), sizeof(MPID_nem_dcfa_cm_cmd_t), (void*)cmd);
+    ibcom_errno =
+        ibcom_put_scratch_pad(MPID_nem_dcfa_scratch_pad_fds[rank], (uint64_t) ibcom_scratch_pad,
+                              sizeof(uint32_t), sizeof(MPID_nem_dcfa_cm_cmd_t), (void *) cmd);
     MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_put_scratch_pad");
     MPID_nem_dcfa_ncqe_scratch_pad += 1;
 
@@ -961,7 +1092,8 @@ int MPID_nem_dcfa_cm_send_core(int rank, MPID_nem_dcfa_cm_cmd_t* cmd) {
 #define FUNCNAME MPID_nem_dcfa_cm_connect
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-int MPID_nem_dcfa_cm_connect(MPIDI_VC_t * vc) {
+int MPID_nem_dcfa_cm_connect(MPIDI_VC_t * vc)
+{
     int mpi_errno = MPI_SUCCESS;
     int val;
     MPID_nem_dcfa_cm_cmd_t cmd;
@@ -973,12 +1105,12 @@ int MPID_nem_dcfa_cm_connect(MPIDI_VC_t * vc) {
 
     cmd.type = MPID_NEM_DCFA_CM_SYN;
     mpi_errno = MPID_nem_dcfa_cm_send_core(rank, &cmd);
-    MPIU_ERR_CHKANDJUMP(mp_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_put"); 
-    
- fn_exit:
+    MPIU_ERR_CHKANDJUMP(mp_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_put");
+
+  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_CM_CONNECT);
     return mpi_errno;
- fn_fail:
+  fn_fail:
     goto fn_exit;
 }
 #endif

http://git.mpich.org/mpich.git/commitdiff/d5afe938afe806c5b1d434659ac158b9423b7f58

commit d5afe938afe806c5b1d434659ac158b9423b7f58
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Thu Nov 28 21:55:57 2013 -0600

    Remove misplaced brace.
    
    Signed-off-by: Masamichi Takagi <masamichi.takagi at gmail.com>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
index ac37bd5..2f88ec8 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
@@ -1698,7 +1698,7 @@ int MPID_nem_dcfa_cm_poll()
         cmd.type = MPID_NEM_DCFA_CM_SYNACK;
         goto common_tail;
         break;
-    case MPID_NEM_DCFA_CM_BUSINESSCARD: {
+    case MPID_NEM_DCFA_CM_BUSINESSCARD:
         ibcom_errno = ibcom_rts(MPID_nem_dcfa_conns[*owner].fd, received->qpnum, received->lid, &(received->gid));
         MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rts");
         ibcom_errno = ibcom_reg_mr_connect(MPID_nem_dcfa_conns[*owner].fd, received->rmem, received->rkey);

http://git.mpich.org/mpich.git/commitdiff/f29c8a467691c36cc73237e11689190b7ea8480d

commit f29c8a467691c36cc73237e11689190b7ea8480d
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Thu Nov 28 21:53:59 2013 -0600

    Move brace outside of the ifdef, so it's always defined.
    
    Signed-off-by: Masamichi Takagi <masamichi.takagi at gmail.com>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
index d900995..ac37bd5 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
@@ -837,20 +837,20 @@ int MPID_nem_dcfa_poll(int in_blocking_poll) {
 
    // lazy feching of completion queue entry because it causes cache-miss
 #if !defined (LMT_PUT_DONE) && defined (LMT_GET_CQE)
-   if(MPID_nem_dcfa_ncqe_to_drain > 0 || MPID_nem_dcfa_ncqe_nces > 0 || MPID_nem_dcfa_ncqe >= IBCOM_MAX_CQ_HEIGHT_DRAIN || ncom_almost_full) {
+   if(MPID_nem_dcfa_ncqe_to_drain > 0 || MPID_nem_dcfa_ncqe_nces > 0 || MPID_nem_dcfa_ncqe >= IBCOM_MAX_CQ_HEIGHT_DRAIN || ncom_almost_full)
 #endif
 #if !defined (LMT_PUT_DONE) && !defined (LMT_GET_CQE)
-   if(/*(in_blocking_poll && result == 0) ||*/ MPID_nem_dcfa_ncqe_nces > 0 || MPID_nem_dcfa_ncqe >= IBCOM_MAX_CQ_HEIGHT_DRAIN || ncom_almost_full) {
-
+   if(/*(in_blocking_poll && result == 0) ||*/ MPID_nem_dcfa_ncqe_nces > 0 || MPID_nem_dcfa_ncqe >= IBCOM_MAX_CQ_HEIGHT_DRAIN || ncom_almost_full)
 #endif
+   {
 #if defined (TIMER_WAIT_DCFA_POLL)
-   if(in_blocking_poll) { tsc[0] = MPI_rdtsc(); }
+       if(in_blocking_poll) { tsc[0] = MPI_rdtsc(); }
 #endif
-   //dprintf("dcfa_poll,calling drain_scq\n");
+       //dprintf("dcfa_poll,calling drain_scq\n");
        ibcom_errno = MPID_nem_dcfa_drain_scq(0);
        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
 #if defined (TIMER_WAIT_DCFA_POLL)
-   if(in_blocking_poll) { stsc[0] += MPI_rdtsc() - tsc[0]; }
+       if(in_blocking_poll) { stsc[0] += MPI_rdtsc() - tsc[0]; }
 #endif
    }
 #if 1

http://git.mpich.org/mpich.git/commitdiff/a43515ec77b043ce8378b947206915d9c831ba5a

commit a43515ec77b043ce8378b947206915d9c831ba5a
Author: Masamichi Takagi <masamichi.takagi at gmail.com>
Date:   Fri Nov 29 09:21:27 2013 +0900

    Correct error string
    
    dcfa_poll.c and dcfa_send.c are modified.
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
index 4b22939..d900995 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
@@ -1725,11 +1725,11 @@ int MPID_nem_dcfa_cm_poll()
         *owner = (uint32_t)-1; /* release */
 
         mpi_errno = MPID_nem_dcfa_cm_send_core(rank, &cmd);
-        MPIU_ERR_CHKANDJUMP(mp_errno, mpi_errno, MPI_ERR_OTHER, "MPID_nem_dcfa_cm_send_core"); 
+        MPIU_ERR_CHKANDJUMP(mp_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_send_core"); 
         break;
     default:
         printf("unknown connection command\n");
-        MPIU_ERR_CHKANDJUMP(1, mpi_errno, MPI_ERR_OTHER, "MPID_nem_dcfa_cm_poll");
+        MPIU_ERR_CHKANDJUMP(1, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_poll");
     }
 
  fn_exit:
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_send.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_send.c
index 2017cfb..afd783d 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_send.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_send.c
@@ -973,7 +973,7 @@ int MPID_nem_dcfa_cm_connect(MPIDI_VC_t * vc) {
 
     cmd.type = MPID_NEM_DCFA_CM_SYN;
     mpi_errno = MPID_nem_dcfa_cm_send_core(rank, &cmd);
-    MPIU_ERR_CHKANDJUMP(mp_errno, mpi_errno, MPI_ERR_OTHER, "MPID_nem_dcfa_cm_put"); 
+    MPIU_ERR_CHKANDJUMP(mp_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_put"); 
     
  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_CM_CONNECT);

http://git.mpich.org/mpich.git/commitdiff/28cb70ba8597f09917c694f9f4ee002b44bce29f

commit 28cb70ba8597f09917c694f9f4ee002b44bce29f
Author: Masamichi Takagi <masamichi.takagi at gmail.com>
Date:   Fri Nov 29 09:10:09 2013 +0900

    Comment out debug messages in dcfa_impl.h
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_impl.h b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_impl.h
index 8d77c47..d41a372 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_impl.h
@@ -224,7 +224,7 @@ static inline void MPID_nem_dcfa_cbf_add(uint64_t addr, uint8_t* array) {
     MPID_nem_dcfa_cbf_inc(array, MPID_nem_dcfa_cbf_hash1(addr));
     MPID_nem_dcfa_cbf_inc(array, MPID_nem_dcfa_cbf_hash2(addr));
     MPID_nem_dcfa_cbf_inc(array, MPID_nem_dcfa_cbf_hash3(addr));
-    dprintf("cbf_add,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
+    //dprintf("cbf_add,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
 }
 static inline void MPID_nem_dcfa_cbf_delete(uint64_t addr, uint8_t* array) {
     //dprintf("cbf_delete,addr=%08lx,%08x,%08x,%08x\n", addr, MPID_nem_dcfa_cbf_hash1(addr), MPID_nem_dcfa_cbf_hash2(addr), MPID_nem_dcfa_cbf_hash3(addr));
@@ -232,7 +232,7 @@ static inline void MPID_nem_dcfa_cbf_delete(uint64_t addr, uint8_t* array) {
     MPID_nem_dcfa_cbf_dec(array, MPID_nem_dcfa_cbf_hash1(addr));
     MPID_nem_dcfa_cbf_dec(array, MPID_nem_dcfa_cbf_hash2(addr));
     MPID_nem_dcfa_cbf_dec(array, MPID_nem_dcfa_cbf_hash3(addr));
-    dprintf("cbf_delete,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
+    //dprintf("cbf_delete,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
 }
 static inline int MPID_nem_dcfa_cbf_query(uint64_t addr, uint8_t* array) {
     //dprintf("cbf_query,addr=%08lx,%08x,%08x,%08x\n", addr, MPID_nem_dcfa_cbf_hash1(addr), MPID_nem_dcfa_cbf_hash2(addr), MPID_nem_dcfa_cbf_hash3(addr));

http://git.mpich.org/mpich.git/commitdiff/3f133e8e7c33452aa6ea78b64f5e9838346e5f93

commit 3f133e8e7c33452aa6ea78b64f5e9838346e5f93
Author: Masamichi Takagi <masamichi.takagi at gmail.com>
Date:   Fri Nov 29 09:16:52 2013 +0900

    Use MPIR_STATUS_SET_COUNT for the request struct
    
    Replace manipulating req->status.count with MPIR_STATUS_SET_COUNT.
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_lmt.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_lmt.c
index 1a83117..58fff57 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_lmt.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_lmt.c
@@ -449,7 +449,7 @@ int MPID_nem_dcfa_lmt_done_recv(struct MPIDI_VC *vc, struct MPID_Request *rreq)
             /* received data was not entirely consumed by unpack() 
                because too few bytes remained to fill the next basic
                datatype */
-            rreq->status.count = (int)last;
+            MPIR_STATUS_SET_COUNT(rreq->status, last);
             rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**MPID_nem_dcfa_lmt_done_recv", 0);
             /* --END ERROR HANDLING-- */
         }
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
index 8db10f7..4b22939 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
@@ -271,7 +271,7 @@ int MPID_nem_dcfa_drain_scq(int dont_call_progress) {
                    /* received data was not entirely consumed by unpack() 
                       because too few bytes remained to fill the next basic
                       datatype */
-                   req->status.count = (int)last;
+                   MPIR_STATUS_SET_COUNT(req->status, last);
                    req->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**MPID_nem_dcfa_poll", 0);
                    /* --END ERROR HANDLING-- */
                }
@@ -745,7 +745,7 @@ int MPID_nem_dcfa_poll(int in_blocking_poll) {
                    /* received data was not entirely consumed by unpack() 
                       because too few bytes remained to fill the next basic
                       datatype */
-                   rreq->status.count = (int)last;
+                   MPIR_STATUS_SET_COUNT(rreq->status, last);
                    rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**MPID_nem_dcfa_poll", 0);
                    /* --END ERROR HANDLING-- */
                }

http://git.mpich.org/mpich.git/commitdiff/76b0b6696dfeac8ceb1078131d1459f611e43d75

commit 76b0b6696dfeac8ceb1078131d1459f611e43d75
Author: Masamichi Takagi <masamichi.takagi at gmail.com>
Date:   Wed Nov 27 15:54:57 2013 +0900

    Add netmod files for Xeon Phi on DCFA/McKernel
    
    DCFA is IB drivers for Xeon Phi running on McKernel. McKernel is an OS
    developed by the University of Tokyo.
    
    Signed-off-by: Pavan Balaji <balaji at mcs.anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/Makefile.mk b/src/mpid/ch3/channels/nemesis/netmod/Makefile.mk
index 1b56dd0..06f653f 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/Makefile.mk
+++ b/src/mpid/ch3/channels/nemesis/netmod/Makefile.mk
@@ -11,3 +11,4 @@ include $(top_srcdir)/src/mpid/ch3/channels/nemesis/netmod/none/Makefile.mk
 include $(top_srcdir)/src/mpid/ch3/channels/nemesis/netmod/newmad/Makefile.mk
 include $(top_srcdir)/src/mpid/ch3/channels/nemesis/netmod/scif/Makefile.mk
 include $(top_srcdir)/src/mpid/ch3/channels/nemesis/netmod/portals4/Makefile.mk
+include $(top_srcdir)/src/mpid/ch3/channels/nemesis/netmod/dcfa/Makefile.mk
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/Makefile.mk b/src/mpid/ch3/channels/nemesis/netmod/dcfa/Makefile.mk
new file mode 100644
index 0000000..f2bea46
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/Makefile.mk
@@ -0,0 +1,22 @@
+## -*- Mode: Makefile; -*-
+## vim: set ft=automake :
+##
+## (C) 2011 by Argonne National Laboratory.
+##     See COPYRIGHT in top-level directory.
+##
+
+if BUILD_NEMESIS_NETMOD_DCFA
+
+lib_lib at MPILIBNAME@_la_SOURCES +=				\
+    src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_finalize.c	\
+    src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_init.c	\
+    src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_lmt.c	\
+    src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c	\
+    src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_reg_mr.c	\
+    src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_send.c	\
+    src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.c 
+
+noinst_HEADERS +=						\
+    src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_impl.h
+
+endif BUILD_NEMESIS_NETMOD_DCFA
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_finalize.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_finalize.c
new file mode 100644
index 0000000..f283f86
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_finalize.c
@@ -0,0 +1,47 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2012 NEC Corporation
+ *      Author: Masamichi Takagi
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "dcfa_impl.h"
+
+//#define DEBUG_DCFA_FINALIZE
+#ifdef dprintf /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
+#undef dprintf
+#endif
+#ifdef DEBUG_DCFA_FINALIZE
+#define dprintf printf
+#else
+#define dprintf(...)
+#endif
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_finalize
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_finalize(void)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    int i;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_FINALIZE);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_FINALIZE);
+
+#if 0
+    for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
+        ibcom_errno = ibcom_close(MPID_nem_dcfa_conns[i].fd);
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_close");        
+
+    }
+#endif
+
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_FINALIZE);
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.c
new file mode 100644
index 0000000..47f9e84
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.c
@@ -0,0 +1,1986 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2012 NEC Corporation
+ *      Author: Masamichi Takagi
+ *  (C) 2012 Oct 14 Yutaka Ishikawa, ishikawa at is.s.u-tokyo.ac.jp
+ *      See COPYRIGHT in top-level directory.
+ */
+
+/*
+ * TODO:
+ *	- ibcomClean might not clean all allocated memory area. Need to FIX it.
+ *	- During error processing in each function, some memory area might not
+ *	  be deallocated. Look at all functions.
+ */
+#include "dcfa_ibcom.h"
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <assert.h>
+
+//#define DEBUG_IBCOM
+#ifdef dprintf /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
+#undef dprintf
+#endif
+#ifdef DEBUG_IBCOM
+#define dprintf printf
+#else
+#define dprintf(...)
+#endif
+
+int	dflag;
+
+static int                  sendwr_id = 10;
+static IbCom                contab[IBCOM_SIZE];
+static int                  ib_initialized = 0;
+static int                  maxcon;
+static struct ibv_device	**ib_devlist;
+static struct ibv_context	*ib_ctx;
+struct ibv_context *ib_ctx_export; /* for SC13 demo connector */
+static struct ibv_pd		*ib_pd;
+struct ibv_pd		*ib_pd_export; /* for SC13 demo connector */
+struct ibv_cq               *rc_shared_scq;
+struct ibv_cq               *rc_shared_scq_lmt_put;
+struct ibv_cq               *rc_shared_scq_scratch_pad;
+static struct ibv_cq		*rc_shared_rcq;
+static struct ibv_cq		*rc_shared_rcq_lmt_put;
+static struct ibv_cq		*rc_shared_rcq_scratch_pad;
+static struct ibv_cq		*ud_shared_scq;
+struct ibv_cq		*ud_shared_rcq;
+static uint8_t *scratch_pad = 0;
+
+#define RANGE_CHECK(condesc, conp)			\
+{							\
+    if (condesc < 0 || condesc >= IBCOM_SIZE) return;	\
+    conp = &contab[condesc];				\
+    if (conp->icom_used != 1) return;			\
+}
+
+#define RANGE_CHECK_WITH_ERROR(condesc, conp)		\
+{							\
+    if (condesc < 0 || condesc >= IBCOM_SIZE) {		\
+        return -1;                                  \
+    }                                               \
+    conp = &contab[condesc];                        \
+    IBCOM_ERR_CHKANDJUMP(conp->icom_used != 1, -1, dprintf("RANGE_CHECK_WITH_ERROR,conp->icom_used=%d\n", conp->icom_used)); \
+}
+
+static int modify_qp_to_init(struct ibv_qp *qp, int ib_port) {
+    struct ibv_qp_attr attr;
+    int flags;
+    int rc;
+
+    memset(&attr, 0, sizeof(attr));
+    attr.qp_state = IBV_QPS_INIT;
+    attr.port_num = ib_port;
+    attr.pkey_index = 0;
+    attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE;
+    flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS;
+    rc = ibv_modify_qp(qp, &attr, flags);
+    if (rc) {
+        fprintf(stderr, "failed to modify QP state to INIT\n");
+    }
+    return rc;
+}
+
+static int modify_qp_to_rtr(struct ibv_qp *qp, uint32_t remote_qpn, uint16_t dlid, union ibv_gid *dgid, int ib_port, int gid_idx) {
+    struct ibv_qp_attr	attr;
+    int		flags;
+    int		rc;
+
+    memset(&attr, 0, sizeof(attr));
+    attr.qp_state = IBV_QPS_RTR;
+    attr.path_mtu = IBV_MTU_2048;
+    //attr.path_mtu = IBV_MTU_1024;
+    //attr.path_mtu = IBV_MTU_256; /* DCFA */
+    attr.dest_qp_num = remote_qpn;
+    attr.rq_psn = 0;
+    attr.max_dest_rd_atomic = IBCOM_MAX_RD_ATOMIC;
+    //attr.max_dest_rd_atomic = 1;
+    //attr.max_dest_rd_atomic = 0; /* DCFA */
+
+    /* Default is 0x12 (= 5.12ms) see IB Spec. Rel. 1.2, Vol. 1, 9.7.5.2.8 */
+    attr.min_rnr_timer = 0x12; 
+
+    attr.ah_attr.dlid = dlid;
+    attr.ah_attr.sl = 0;
+    attr.ah_attr.src_path_bits = 0;
+    attr.ah_attr.is_global = 0;
+    attr.ah_attr.port_num = ib_port;
+
+    /* In dcfa gid is not set and for testing here it is also not set */
+#if 1
+#ifdef DCFA /* DCFA doesn't use gid */
+#else
+    if (gid_idx >= 0) {
+        attr.ah_attr.is_global = 1;
+        attr.ah_attr.port_num = ib_port;
+        memcpy(&attr.ah_attr.grh.dgid, dgid, 16);
+        attr.ah_attr.grh.flow_label = 0;
+        attr.ah_attr.grh.hop_limit = 1;
+        attr.ah_attr.grh.sgid_index = gid_idx;
+        attr.ah_attr.grh.traffic_class = 0;
+    }
+#endif
+#endif
+
+    flags = IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN
+	| IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER;
+    rc = ibv_modify_qp(qp, &attr, flags);
+    if(rc) { dprintf("failed to modify QP state to RTR\n"); }
+    return rc;
+}
+
+static int modify_qp_to_rts(struct ibv_qp *qp) {
+    struct ibv_qp_attr	attr;
+    int		flags;
+    int		rc;
+
+    memset(&attr, 0, sizeof(attr));
+    attr.qp_state = IBV_QPS_RTS;
+    attr.timeout = (0x14); /* timeout 4.096us * 2^x */
+    attr.retry_cnt = 7;
+    attr.rnr_retry = 7;
+    attr.sq_psn = 0;
+    attr.max_rd_atomic = IBCOM_MAX_RD_ATOMIC;
+    //attr.max_rd_atomic = 1;
+    //attr.max_rd_atomic = 0;	/* DCFA */
+
+    flags = IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT
+	| IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC;
+    rc = ibv_modify_qp(qp, &attr, flags);
+    if (rc){
+        fprintf(stderr, "failed to modify QP state to RTS\n");
+    }
+    return rc;
+}
+
+/* called from ibcomOpen if needed */
+static int ibcomDeviceinit() {
+    int ibcom_errno = 0;
+    int		dev_num;
+    char	*dev_name;
+    int i;
+    
+    if (ib_initialized == 1) { 
+        dprintf("ibcomDeviceinit,already initialized\n");
+        return 0;
+    }
+    if (ib_initialized == -1) return -1;
+
+    /* initialize ibv_reg_mr cache */
+    ibcom_RegisterCacheInit();
+    
+    /* Get the device list */
+    ib_devlist = ibv_get_device_list(&dev_num);
+    if(!ib_devlist || !dev_num) {
+        fprintf(stderr, "No IB device is found\n");
+        return -1;
+    }
+
+#ifdef DCFA
+    for(i = 0; i < dev_num; i++) {
+        if(ib_devlist[i]) { goto dev_found; }
+    }    
+#else
+    for(i = 0; i < dev_num; i++) {
+        if(!strcmp(ibv_get_device_name(ib_devlist[i]), "mlx4_0")) { goto dev_found; }
+    }
+#endif
+    IBCOM_ERR_SETANDJUMP(-1, printf("IB device not found"));
+ dev_found:
+
+    /* Open the requested device */
+    if(ib_ctx_export) {
+        ib_ctx = ib_ctx_export;
+    } else {
+        ib_ctx = ibv_open_device(ib_devlist[i]);
+    }
+    dprintf("ibcomDeviceinit,ib_ctx_export=%p,ib_ctx=%p\n", ib_ctx_export, ib_ctx);
+    if(!ib_ctx){	
+        fprintf(stderr, "failed to open IB device\n");
+        goto err_exit;
+    }
+    ib_ctx_export = ib_ctx;
+#ifdef DCFA
+#else
+    dev_name = strdup(ibv_get_device_name(ib_devlist[i]));
+	dprintf("ibcomDeviceinit,dev_name=%s\n", dev_name);
+#endif
+    /* Create a PD */
+    if(ib_pd_export) {
+        ib_pd = ib_pd_export;
+    } else {
+        ib_pd = ibv_alloc_pd(ib_ctx);
+    }
+    dprintf("ibcomDeviceinit,ib_pd_export=%p,ib_pd=%p\n", ib_pd_export, ib_pd);
+    if (!ib_pd){
+        fprintf(stderr, "ibv_alloc_pd failed\n");
+        goto err_exit;
+    }
+    ib_pd_export = ib_pd;
+
+    ib_initialized = 1;
+ fn_exit:
+    return ibcom_errno;
+
+ err_exit:
+    ib_initialized = -1;
+    if (ib_devlist) ibv_free_device_list(ib_devlist);
+    if (ib_ctx) ibv_close_device(ib_ctx);
+    return -1;
+ fn_fail:
+    goto fn_exit;
+}
+
+static void ibcomClean(IbCom *conp) {
+    int	i;
+
+    if(conp->icom_qp) ibv_destroy_qp(conp->icom_qp);
+    if(conp->icom_mrlist && conp->icom_mrlen > 0){
+        switch(conp->open_flag) {
+        case IBCOM_OPEN_RC:
+            for(i = 0; i < IBCOM_NBUF_RDMA; i++) {
+                if (conp->icom_mrlist[i]) {
+                    ibv_dereg_mr(conp->icom_mrlist[i]);
+                }
+            }
+            break;
+        case IBCOM_OPEN_SCRATCH_PAD:
+            for(i = 0; i < IBCOM_NBUF_SCRATCH_PAD; i++) {
+                if (conp->icom_mrlist[i]) {
+                    ibv_dereg_mr(conp->icom_mrlist[i]);
+                }
+            }
+            break;
+        case IBCOM_OPEN_UD: 
+            for(i = 0; i < IBCOM_NBUF_UD; i++) {
+                if (conp->icom_mrlist[i]) {
+                    ibv_dereg_mr(conp->icom_mrlist[i]);
+                }
+            }
+            break;
+        }
+        free(conp->icom_mrlist);
+    }
+    if(conp->icom_mem[IBCOM_RDMAWR_FROM]) {
+        munmap(conp->icom_mem[IBCOM_RDMAWR_FROM], IBCOM_RDMABUF_SZ);
+    }
+    if(conp->icom_mem[IBCOM_RDMAWR_TO]) {
+        munmap(conp->icom_mem[IBCOM_RDMAWR_TO], IBCOM_RDMABUF_SZ);
+    }
+    if(conp->icom_scq) {
+        ibv_destroy_cq(conp->icom_scq);
+    }
+    if(conp->icom_rcq) {
+        ibv_destroy_cq(conp->icom_rcq);
+    }
+    if(conp->icom_rmem) {
+        free(conp->icom_rmem);
+    }
+    if(conp->icom_rsize) {
+        free(conp->icom_rsize);
+    }
+    if(conp->icom_rkey) {
+        free(conp->icom_rkey);
+    }
+    memset(conp, 0, sizeof(IbCom));
+    // TODO: free ah, sge, command template, ...
+}
+
+int ibcomOpen(int ib_port, int ibcom_open_flag, int* condesc) {
+    int ibcom_errno = 0, ib_errno;
+    IbCom	*conp;
+    struct ibv_qp_init_attr	qp_init_attr;
+    struct ibv_sge		*sge;
+    struct ibv_send_wr		*sr;
+    struct ibv_recv_wr		*rr, *bad_wr;
+    int		mr_flags;
+    int		i;
+
+    dprintf("ibcomOpen,port=%d,flag=%08x\n", ib_port, ibcom_open_flag);
+
+    int ibcom_open_flag_conn = ibcom_open_flag;
+    if(ibcom_open_flag_conn != IBCOM_OPEN_RC &&
+       ibcom_open_flag_conn != IBCOM_OPEN_RC_LMT_PUT &&
+       ibcom_open_flag_conn != IBCOM_OPEN_UD &&
+       ibcom_open_flag_conn != IBCOM_OPEN_SCRATCH_PAD) {
+        dprintf("ibcomOpen,bad flag\n");
+        ibcom_errno = -1;
+        goto fn_fail;
+    }
+
+    /* device open error */
+    if(ibcomDeviceinit() < 0) { ibcom_errno = -1; goto fn_fail; }
+
+    /* no more connection can be estabilished */
+    if(maxcon == IBCOM_SIZE) { ibcom_errno = -1; goto fn_fail; }
+
+    for(*condesc = 0; *condesc < IBCOM_SIZE; (*condesc)++) {
+        //dprintf("*condesc=%d,used=%d\n", *condesc, contab[*condesc].icom_used);
+        if(contab[*condesc].icom_used == 0) { goto ok_cont; }
+    }
+    /* count says not full, but we couldn't fine vacant slot */
+    dprintf("contable has inconsistent\n");
+    ibcom_errno = -1; 
+    goto fn_fail;
+
+ok_cont:
+    dprintf("ibcomOpen,condesc=%d\n", *condesc);
+    conp = &contab[*condesc];
+    memset(conp, 0, sizeof(IbCom));
+    conp->icom_used = 1;
+    conp->icom_port = ib_port;
+    conp->open_flag = ibcom_open_flag;
+    conp->rsr_seq_num_poll = 0; /* it means slot 0 is polled */
+    conp->rsr_seq_num_tail = -1; /* it means slot 0 is not released */
+    conp->rsr_seq_num_tail_last_sent = -1;
+    conp->lsr_seq_num_tail = -1;
+    conp->lsr_seq_num_tail_last_requested = -2;
+    conp->rdmabuf_occupancy_notify_rstate = IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_LW;
+    conp->rdmabuf_occupancy_notify_lstate = IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_LW;
+    //dprintf("ibcomOpen,ptr=%p,rsr_seq_num_poll=%d\n", conp, conp->rsr_seq_num_poll);
+
+#ifdef DCFA
+#else
+    if(ibv_query_port(ib_ctx, ib_port, &conp->icom_pattr)) {
+        dprintf("ibv_query_port on port %u failed\n", ib_port);
+        goto err_exit;
+    }
+#endif
+
+    /* Create send/recv CQ */
+    switch(ibcom_open_flag) {
+    case IBCOM_OPEN_RC:
+        if(!rc_shared_scq) {
+#ifdef DCFA
+            rc_shared_scq = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
+#else
+            rc_shared_scq = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY, NULL, NULL, 0);
+#endif
+            IBCOM_ERR_CHKANDJUMP(!rc_shared_scq, -1, dprintf("rc_shared_scq"));
+        }
+        conp->icom_scq = rc_shared_scq;
+    
+        if(!rc_shared_rcq) {
+#ifdef DCFA
+            rc_shared_rcq = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
+#else
+            rc_shared_rcq = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY, NULL, NULL, 0);
+#endif
+            IBCOM_ERR_CHKANDJUMP(!rc_shared_rcq, -1, dprintf("rc_shared_rcq"));
+        }
+        conp->icom_rcq = rc_shared_rcq;
+        break;
+    case IBCOM_OPEN_SCRATCH_PAD:
+        if(!rc_shared_scq_scratch_pad) {
+#ifdef DCFA
+            rc_shared_scq_scratch_pad = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
+#else
+            rc_shared_scq_scratch_pad = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY, NULL, NULL, 0);
+#endif
+            IBCOM_ERR_CHKANDJUMP(!rc_shared_scq_scratch_pad, -1, dprintf("rc_shared_scq"));
+        }
+        conp->icom_scq = rc_shared_scq_scratch_pad;
+    
+        if(!rc_shared_rcq_scratch_pad) {
+#ifdef DCFA
+            rc_shared_rcq_scratch_pad = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
+#else
+            rc_shared_rcq_scratch_pad = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY, NULL, NULL, 0);
+#endif
+            IBCOM_ERR_CHKANDJUMP(!rc_shared_rcq_scratch_pad, -1, dprintf("rc_shared_rcq"));
+        }
+        conp->icom_rcq = rc_shared_rcq_scratch_pad;
+        break;
+    case IBCOM_OPEN_RC_LMT_PUT:
+        if(!rc_shared_scq_lmt_put) {
+#ifdef DCFA
+            rc_shared_scq_lmt_put = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
+#else
+            rc_shared_scq_lmt_put = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY, NULL, NULL, 0);
+#endif
+            IBCOM_ERR_CHKANDJUMP(!rc_shared_scq_lmt_put, -1, dprintf("rc_shared_scq"));
+        }
+        conp->icom_scq = rc_shared_scq_lmt_put;
+    
+        if(!rc_shared_rcq_lmt_put) {
+#ifdef DCFA
+            rc_shared_rcq_lmt_put = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
+#else
+            rc_shared_rcq_lmt_put = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY, NULL, NULL, 0);
+#endif
+            IBCOM_ERR_CHKANDJUMP(!rc_shared_rcq_lmt_put, -1, dprintf("rc_shared_rcq"));
+        }
+        conp->icom_rcq = rc_shared_rcq_lmt_put;
+        break;
+    case IBCOM_OPEN_UD:
+        if(!ud_shared_scq) {
+#ifdef DCFA
+            ud_shared_scq = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
+#else
+            ud_shared_scq = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY, NULL, NULL, 0);
+#endif
+            IBCOM_ERR_CHKANDJUMP(!ud_shared_scq, -1, dprintf("ud_shared_scq"));
+        }
+        conp->icom_scq = ud_shared_scq;
+
+        if(!ud_shared_rcq) {
+#ifdef DCFA
+            ud_shared_rcq = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY);
+#else
+            ud_shared_rcq = ibv_create_cq(ib_ctx, IBCOM_MAX_CQ_CAPACITY, NULL, NULL, 0);
+#endif
+            IBCOM_ERR_CHKANDJUMP(!ud_shared_rcq, -1, dprintf("ud_shared_rcq")); 
+        }
+        conp->icom_rcq = ud_shared_rcq; 
+        break;
+    }        
+
+    /* Create QP */
+    memset(&qp_init_attr, 0, sizeof(qp_init_attr));
+    qp_init_attr.send_cq = conp->icom_scq;
+    qp_init_attr.recv_cq = conp->icom_rcq;
+    qp_init_attr.cap.max_send_wr = IBCOM_MAX_SQ_CAPACITY;
+    qp_init_attr.cap.max_recv_wr = IBCOM_MAX_RQ_CAPACITY;
+    qp_init_attr.cap.max_send_sge = IBCOM_MAX_SGE_CAPACITY;
+    qp_init_attr.cap.max_recv_sge = IBCOM_MAX_SGE_CAPACITY;
+    qp_init_attr.cap.max_inline_data = IBCOM_INLINE_DATA;
+    switch(ibcom_open_flag) {
+    case IBCOM_OPEN_RC:
+    case IBCOM_OPEN_RC_LMT_PUT:
+    case IBCOM_OPEN_SCRATCH_PAD:
+        qp_init_attr.qp_type = IBV_QPT_RC;
+        break;
+    case IBCOM_OPEN_UD:
+        qp_init_attr.qp_type = IBV_QPT_UD;
+        break;
+    default:
+        IBCOM_ERR_CHKANDJUMP(1, -1, dprintf("invalid ibcom_open_flag\n"));
+        break;
+    }
+    qp_init_attr.sq_sig_all = 1;
+
+    conp->icom_qp = ibv_create_qp(ib_pd, &qp_init_attr);
+    IBCOM_ERR_CHKANDJUMP(!conp->icom_qp, -1, printf("ibv_create_qp\n"));
+
+    conp->max_send_wr = qp_init_attr.cap.max_send_wr;
+    conp->max_recv_wr = qp_init_attr.cap.max_recv_wr;
+    conp->max_inline_data = qp_init_attr.cap.max_inline_data;
+
+    dprintf("ibcomOpen,max_send_wr=%d,max_recv_wr=%d,max_inline_data=%d\n", qp_init_attr.cap.max_send_wr, qp_init_attr.cap.max_recv_wr, qp_init_attr.cap.max_inline_data);
+    dprintf("ibcomOpen,fd=%d,qpn=%08x\n", *condesc, conp->icom_qp->qp_num);
+#ifdef DCFA
+    dprintf("ibcomOpen,fd=%d,lid=%04x\n", *condesc, ib_ctx->lid);
+#else
+    dprintf("ibcomOpen,fd=%d,lid=%04x\n", *condesc, conp->icom_pattr.lid);
+#endif
+
+#ifdef DCFA
+    /* DCFA doesn't use gid */
+    for(i = 0; i < 16; i++) { conp->icom_gid.raw[i] = 0; }
+#else
+    ib_errno = ibv_query_gid(ib_ctx, ib_port, 0, &conp->icom_gid);
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibv_query_gid\n"));
+
+    dprintf("ibcomOpen,fd=%d,my_gid=", *condesc);
+    for(i = 0; i < 16; i++) { dprintf("%02x", (int)conp->icom_gid.raw[i]); }
+    dprintf("\n");
+#endif
+
+    /* buffers */
+    switch(ibcom_open_flag) {
+    case IBCOM_OPEN_RC:
+        /* RDMA-write-from and -to local memory area */
+        conp->icom_mrlist = malloc(sizeof(struct ibv_mr*) * IBCOM_NBUF_RDMA);
+        memset(conp->icom_mrlist, 0, sizeof(struct ibv_mr*) * IBCOM_NBUF_RDMA);
+        conp->icom_mrlen = IBCOM_NBUF_RDMA;
+        conp->icom_mem = (void **) malloc(sizeof(void**) * IBCOM_NBUF_RDMA);
+        //printf("open,icom_mem=%p\n", conp->icom_mem);
+        memset(conp->icom_mem, 0, sizeof(void**) * IBCOM_NBUF_RDMA);
+        conp->icom_msize = (int*) malloc(sizeof(int*)* IBCOM_NBUF_RDMA);
+        memset(conp->icom_msize, 0, sizeof(int*) * IBCOM_NBUF_RDMA);
+        mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE ;
+        
+        /* RDMA-write-from local memory area */
+        conp->icom_msize[IBCOM_RDMAWR_FROM] = IBCOM_RDMABUF_SZ;
+        conp->icom_mem[IBCOM_RDMAWR_FROM] = mmap(0, IBCOM_RDMABUF_SZ, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+        dprintf("IbcomOpen,mmap=%p,len=%d\n", conp->icom_mem[IBCOM_RDMAWR_FROM], IBCOM_RDMABUF_SZ);
+        if (conp->icom_mem[IBCOM_RDMAWR_FROM] == (void*)-1){
+            fprintf(stderr, "failed to allocate buffer\n");
+            goto err_exit;
+        }
+        memset(conp->icom_mem[IBCOM_RDMAWR_FROM], 0 , conp->icom_msize[IBCOM_RDMAWR_FROM]);
+        
+        conp->icom_mrlist[IBCOM_RDMAWR_FROM] = ibcom_reg_mr_fetch(conp->icom_mem[IBCOM_RDMAWR_FROM], conp->icom_msize[IBCOM_RDMAWR_FROM]);
+        if (!conp->icom_mrlist[IBCOM_RDMAWR_FROM]){
+            fprintf(stderr, "ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags);
+            goto err_exit;
+        }
+        
+        /* RDMA-write-to local memory area */
+        conp->icom_msize[IBCOM_RDMAWR_TO] = IBCOM_RDMABUF_SZ;
+#if 0
+	int shmid = shmget(2, IBCOM_RDMABUF_SZ, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
+	IBCOM_ERR_CHKANDJUMP(shmid < 0, -1,  perror("shmget"));
+	conp->icom_mem[IBCOM_RDMAWR_TO] = shmat(shmid, 0, 0);
+	if(conp->icom_mem[IBCOM_RDMAWR_TO] == (char *)-1) {
+	  perror("Shared memory attach failure"); 
+	  shmctl(shmid, IPC_RMID, NULL);
+	  ibcom_errno = -1;
+	  goto fn_fail;
+	}
+#else
+        conp->icom_mem[IBCOM_RDMAWR_TO] = mmap(0, IBCOM_RDMABUF_SZ, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+        dprintf("IbcomOpen,mmap=%p,len=%d\n", conp->icom_mem[IBCOM_RDMAWR_TO], IBCOM_RDMABUF_SZ);
+#endif
+        if (conp->icom_mem[IBCOM_RDMAWR_TO] == (void*)-1){
+            fprintf(stderr, "failed to allocate buffer\n");
+            goto err_exit;
+        }
+        memset(conp->icom_mem[IBCOM_RDMAWR_TO], 0 , conp->icom_msize[IBCOM_RDMAWR_TO]);
+        
+        conp->icom_mrlist[IBCOM_RDMAWR_TO] = ibv_reg_mr(ib_pd, conp->icom_mem[IBCOM_RDMAWR_TO], conp->icom_msize[IBCOM_RDMAWR_TO], mr_flags);
+        if (!conp->icom_mrlist[IBCOM_RDMAWR_TO]){
+            fprintf(stderr, "ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags);
+            goto err_exit;
+        }
+#ifdef DCFA
+        dprintf("ibcomOpen,fd=%d,rmem=%p\n", *condesc, conp->icom_mrlist[IBCOM_RDMAWR_TO]->buf);
+#else
+        dprintf("ibcomOpen,fd=%d,rmem=%p\n", *condesc, conp->icom_mrlist[IBCOM_RDMAWR_TO]->addr);
+#endif
+        dprintf("ibcomOpen,fd=%d,rkey=%08x\n", *condesc, conp->icom_mrlist[IBCOM_RDMAWR_TO]->rkey);
+        
+        /* RDMA-write-to remote memory area */
+        conp->icom_rmem = (void**) malloc(sizeof(void**) * IBCOM_NBUF_RDMA);
+        if (conp->icom_rmem == 0) goto err_exit;
+        memset(conp->icom_rmem, 0, sizeof(void**) * IBCOM_NBUF_RDMA);
+        
+        conp->icom_rsize = (size_t*) malloc(sizeof(void**) * IBCOM_NBUF_RDMA);
+        if (conp->icom_rsize == 0) goto err_exit;
+        memset(conp->icom_rsize, 0, sizeof(void**) * IBCOM_NBUF_RDMA);
+        
+        conp->icom_rkey = (int*) malloc(sizeof(int) * IBCOM_NBUF_RDMA);
+        if (conp->icom_rkey == 0) goto err_exit;
+        memset(conp->icom_rkey, 0, sizeof(int) *  IBCOM_NBUF_RDMA);
+        break;
+    case IBCOM_OPEN_SCRATCH_PAD:
+        /* RDMA-write-from and -to local memory area */
+        conp->icom_mrlist = malloc(sizeof(struct ibv_mr*) * IBCOM_NBUF_SCRATCH_PAD);
+        memset(conp->icom_mrlist, 0, sizeof(struct ibv_mr*) * IBCOM_NBUF_SCRATCH_PAD);
+        conp->icom_mrlen = IBCOM_NBUF_SCRATCH_PAD;
+        conp->icom_mem = (void **) malloc(sizeof(void**) * IBCOM_NBUF_SCRATCH_PAD);
+        memset(conp->icom_mem, 0, sizeof(void**) * IBCOM_NBUF_SCRATCH_PAD);
+        conp->icom_msize = (int*) malloc(sizeof(int*) * IBCOM_NBUF_SCRATCH_PAD);
+        memset(conp->icom_msize, 0, sizeof(int*) * IBCOM_NBUF_SCRATCH_PAD);
+        mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE ;
+        
+        /* RDMA-write-to remote memory area */
+        conp->icom_rmem = (void**)malloc(sizeof(void**) * IBCOM_NBUF_SCRATCH_PAD);
+        IBCOM_ERR_CHKANDJUMP(conp->icom_rmem == 0, -1, dprintf("malloc failed\n"));
+        memset(conp->icom_rmem, 0, sizeof(void**) * IBCOM_NBUF_SCRATCH_PAD);
+        
+        conp->icom_rsize = (size_t*)malloc(sizeof(void**) * IBCOM_NBUF_SCRATCH_PAD);
+        IBCOM_ERR_CHKANDJUMP(conp->icom_rsize == 0, -1, dprintf("malloc failed\n"));
+        memset(conp->icom_rsize, 0, sizeof(void**) * IBCOM_NBUF_SCRATCH_PAD);
+
+        conp->icom_rkey = (int*)malloc(sizeof(int) * IBCOM_NBUF_SCRATCH_PAD);
+        IBCOM_ERR_CHKANDJUMP(conp->icom_rkey == 0, -1, dprintf("malloc failed\n"));
+        memset(conp->icom_rkey, 0, sizeof(int) *  IBCOM_NBUF_SCRATCH_PAD);
+        break;
+
+    case IBCOM_OPEN_UD:
+        /* UD-write-from and -to local memory area */
+        conp->icom_mrlist = malloc(sizeof(struct ibv_mr*) * IBCOM_NBUF_UD);
+        memset(conp->icom_mrlist, 0, sizeof(struct ibv_mr*) * IBCOM_NBUF_UD);
+        conp->icom_mrlen = IBCOM_NBUF_UD;
+        conp->icom_mem = (void **) malloc(sizeof(void**) * IBCOM_NBUF_UD);
+        memset(conp->icom_mem, 0, sizeof(void**) * IBCOM_NBUF_UD);
+        conp->icom_msize = (int*) malloc(sizeof(int*)* IBCOM_NBUF_UD);
+        memset(conp->icom_msize, 0, sizeof(int*) * IBCOM_NBUF_UD);
+        mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE ;
+        
+        /* UD-write-from local memory area */
+        IBCOM_ERR_CHKANDJUMP(IBCOM_UDBUF_SZ <= 40, -1, dprintf("buf_size too short\n")); 
+        conp->icom_msize[IBCOM_UDWR_FROM] = IBCOM_UDBUF_SZ;
+        conp->icom_mem[IBCOM_UDWR_FROM] = mmap(0, IBCOM_UDBUF_SZ, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+        dprintf("IbcomOpen,mmap=%p,len=%d\n", conp->icom_mem[IBCOM_UDWR_FROM], IBCOM_UDBUF_SZ);
+        IBCOM_ERR_CHKANDJUMP(conp->icom_mem[IBCOM_UDWR_FROM] == (void*)-1, -1, dprintf("failed to allocate buffer\n"));
+        memset(conp->icom_mem[IBCOM_UDWR_FROM], 0 , conp->icom_msize[IBCOM_UDWR_FROM]);
+        
+        conp->icom_mrlist[IBCOM_UDWR_FROM] = ibcom_reg_mr_fetch(conp->icom_mem[IBCOM_UDWR_FROM], conp->icom_msize[IBCOM_UDWR_FROM]);
+        IBCOM_ERR_CHKANDJUMP(!conp->icom_mrlist[IBCOM_UDWR_FROM], -1, dprintf("ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags));
+
+        /* UD-write-to local memory area */
+        /* addr to addr+39 are not filled, addr+40 to addr+length-1 are filled with payload */
+        IBCOM_ERR_CHKANDJUMP(IBCOM_UDBUF_SZ <= 40, -1, dprintf("buf_size too short\n")); 
+        conp->icom_msize[IBCOM_UDWR_TO] = IBCOM_UDBUF_SZ;
+        conp->icom_mem[IBCOM_UDWR_TO] = mmap(0, IBCOM_UDBUF_SZ, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+        dprintf("IbcomOpen,mmap=%p,len=%d\n", conp->icom_mem[IBCOM_UDWR_TO], IBCOM_UDBUF_SZ);
+        IBCOM_ERR_CHKANDJUMP(conp->icom_mem[IBCOM_UDWR_TO] == (void*)-1, -1, dprintf("failed to allocate buffer\n"));
+        memset(conp->icom_mem[IBCOM_UDWR_TO], 0 , conp->icom_msize[IBCOM_UDWR_TO]);
+        
+        conp->icom_mrlist[IBCOM_UDWR_TO] = ibcom_reg_mr_fetch(conp->icom_mem[IBCOM_UDWR_TO], conp->icom_msize[IBCOM_UDWR_TO]);
+        IBCOM_ERR_CHKANDJUMP(!conp->icom_mrlist[IBCOM_UDWR_TO], -1, dprintf("ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags));
+
+        /* initialize arena allocator for IBCOM_UDWR_TO */
+        //ibcom_udbuf_init(conp->icom_mem[IBCOM_UDWR_TO]);
+
+        dprintf("ibcomOpen,ud,fd=%d,lkey=%08x\n", *condesc, conp->icom_mrlist[IBCOM_UDWR_TO]->lkey);
+        break;
+    default:
+        IBCOM_ERR_CHKANDJUMP(1, -1, dprintf("invalid ibcom_open_flag\n"));
+        break;
+
+    }
+
+    /* command templates */
+    switch(ibcom_open_flag) {
+    case IBCOM_OPEN_RC: 
+        
+        /* SR (send request) template */
+        conp->icom_sr = (struct ibv_send_wr*)malloc(sizeof(struct ibv_send_wr) * IBCOM_RC_SR_NTEMPLATE);
+        memset(conp->icom_sr, 0, sizeof(struct ibv_send_wr) * IBCOM_RC_SR_NTEMPLATE);
+        
+        int i;
+        for(i = 0; i < IBCOM_SMT_INLINE_NCHAIN; i++) {
+            /* SGE (RDMA-send-from memory) template */
+#ifdef DCFA
+            memset(&(conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[0]), 0, sizeof(struct ibv_sge) * WR_SG_NUM);
+#else
+            struct ibv_sge *sge = (struct ibv_sge*)malloc(sizeof(struct ibv_sge) * IBCOM_SMT_INLINE_INITIATOR_NSGE);
+            memset(sge, 0, sizeof(struct ibv_sge) * IBCOM_SMT_INLINE_INITIATOR_NSGE);
+#endif            
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].next = (i == IBCOM_SMT_INLINE_NCHAIN - 1) ? NULL : &conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i + 1];
+#ifdef DCFA
+#else
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list = sge;
+#endif
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].opcode = IBV_WR_RDMA_WRITE;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;
+        }
+
+        {
+#ifdef DCFA
+            memset(&(conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[0]), 0, sizeof(struct ibv_sge) * WR_SG_NUM);
+#else
+            struct ibv_sge *sge = (struct ibv_sge*) malloc(sizeof(struct ibv_sge) * IBCOM_SMT_NOINLINE_INITIATOR_NSGE);
+            memset(sge, 0, sizeof(struct ibv_sge) * IBCOM_SMT_NOINLINE_INITIATOR_NSGE);
+#endif            
+            conp->icom_sr[IBCOM_SMT_NOINLINE].next = NULL;
+#ifdef DCFA
+#else
+            conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list = sge;
+#endif
+            conp->icom_sr[IBCOM_SMT_NOINLINE].opcode = IBV_WR_RDMA_WRITE;
+            conp->icom_sr[IBCOM_SMT_NOINLINE].send_flags = IBV_SEND_SIGNALED;
+        }
+        {
+            /* SR (send request) template for IBCOM_LMT_INITIATOR */
+#ifdef DCFA
+            memset(&(conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[0]), 0, sizeof(struct ibv_sge) * WR_SG_NUM);
+#else
+            struct ibv_sge *sge = (struct ibv_sge*) malloc(sizeof(struct ibv_sge) * IBCOM_LMT_INITIATOR_NSGE);
+            memset(sge, 0, sizeof(struct ibv_sge) * IBCOM_LMT_INITIATOR_NSGE);
+#endif
+            conp->icom_sr[IBCOM_LMT_INITIATOR].next = NULL;
+#ifdef DCFA
+#else
+            conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list = sge;
+#endif
+            conp->icom_sr[IBCOM_LMT_INITIATOR].opcode = IBV_WR_RDMA_READ;
+            conp->icom_sr[IBCOM_LMT_INITIATOR].send_flags = IBV_SEND_SIGNALED;
+        }            
+
+        /* SR (send request) template for IBCOM_LMT_PUT */ /* for lmt-put-done */
+#ifdef DCFA
+        memset(&(conp->icom_sr[IBCOM_LMT_PUT].sg_list[0]), 0, sizeof(struct ibv_sge) * WR_SG_NUM);
+#else
+        sge = (struct ibv_sge*) malloc(sizeof(struct ibv_sge) * IBCOM_LMT_PUT_NSGE);
+        memset(sge, 0, sizeof(struct ibv_sge) * IBCOM_LMT_PUT_NSGE);
+#endif
+        conp->icom_sr[IBCOM_LMT_PUT].next = NULL;
+#ifdef DCFA
+#else
+        conp->icom_sr[IBCOM_LMT_PUT].sg_list = sge;
+#endif
+        conp->icom_sr[IBCOM_LMT_PUT].opcode = IBV_WR_RDMA_WRITE;
+        conp->icom_sr[IBCOM_LMT_PUT].send_flags = IBV_SEND_SIGNALED;
+
+        /* SR (send request) template for IBCOM_RDMAWR_FRMFIXED */
+        /* not implemented */
+        
+        /* SGE (scatter gather element) template for recv */
+        /* nothing is required for RDMA-write */
+        
+        /* RR (receive request) template for IBCOM_RDMAWR_RESPONDER */
+        conp->icom_rr = (struct ibv_recv_wr*)malloc(sizeof(struct ibv_recv_wr) * IBCOM_RC_RR_NTEMPLATE);
+        memset(conp->icom_rr, 0, sizeof(struct ibv_recv_wr) * IBCOM_RC_RR_NTEMPLATE);
+        
+        /* create one dummy RR to ibv_post_recv */
+        conp->icom_rr[IBCOM_RDMAWR_RESPONDER].next = NULL;
+#ifdef DCFA
+#else
+        conp->icom_rr[IBCOM_RDMAWR_RESPONDER].sg_list = NULL;
+#endif
+        conp->icom_rr[IBCOM_RDMAWR_RESPONDER].num_sge = 0;
+        break;
+
+    case IBCOM_OPEN_SCRATCH_PAD: {
+        /* SR (send request) template */
+        conp->icom_sr = (struct ibv_send_wr*)malloc(sizeof(struct ibv_send_wr) * IBCOM_SCRATCH_PAD_SR_NTEMPLATE);
+        memset(conp->icom_sr, 0, sizeof(struct ibv_send_wr) * IBCOM_SCRATCH_PAD_SR_NTEMPLATE);
+
+        /* SR (send request) template for IBCOM_SCRATCH_PAD_INITIATOR */
+#ifdef DCFA
+        memset(&(conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0]), 0, sizeof(struct ibv_sge) * WR_SG_NUM);
+#else
+        struct ibv_sge *sge = (struct ibv_sge*) malloc(sizeof(struct ibv_sge) * IBCOM_SCRATCH_PAD_INITIATOR_NSGE);
+        memset(sge, 0, sizeof(struct ibv_sge) * IBCOM_SCRATCH_PAD_INITIATOR_NSGE);
+#endif
+        conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].next = NULL;
+#ifdef DCFA
+#else
+        conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list = sge;
+#endif
+        conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].num_sge = 1;
+        conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].opcode = IBV_WR_RDMA_WRITE;
+        conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].send_flags = IBV_SEND_SIGNALED;
+        break; }
+
+    case IBCOM_OPEN_RC_LMT_PUT:
+        /* SR (send request) template */
+        conp->icom_sr = (struct ibv_send_wr*)malloc(sizeof(struct ibv_send_wr) * IBCOM_RC_SR_LMT_PUT_NTEMPLATE);
+        memset(conp->icom_sr, 0, sizeof(struct ibv_send_wr) * IBCOM_RC_SR_LMT_PUT_NTEMPLATE);
+        /* SR (send request) template for IBCOM_LMT_PUT */
+#ifdef DCFA
+        memset(&(conp->icom_sr[IBCOM_LMT_PUT].sg_list[0]), 0, sizeof(struct ibv_sge) * WR_SG_NUM);
+#else
+        sge = (struct ibv_sge*) malloc(sizeof(struct ibv_sge) * IBCOM_LMT_PUT_NSGE);
+        memset(sge, 0, sizeof(struct ibv_sge) * IBCOM_LMT_PUT_NSGE);
+#endif
+        conp->icom_sr[IBCOM_LMT_PUT].next = NULL;
+#ifdef DCFA
+#else
+        conp->icom_sr[IBCOM_LMT_PUT].sg_list = sge;
+#endif
+        conp->icom_sr[IBCOM_LMT_PUT].opcode = IBV_WR_RDMA_WRITE;
+        conp->icom_sr[IBCOM_LMT_PUT].send_flags = IBV_SEND_SIGNALED;
+        break;
+
+    case IBCOM_OPEN_UD:
+        /* SGE (RDMA-send-from memory) template for IBCOM_UD_INITIATOR */
+#ifdef DCFA
+        sge = &(conp->icom_sr[IBCOM_UD_INITIATOR].sg_list[0]);
+        memset(sge, 0, sizeof(struct ibv_sge) * WR_SG_NUM);
+#else
+        sge = (struct ibv_sge*)calloc(1, sizeof(struct ibv_sge));
+#endif
+        /* addr to addr + length - 1 will be on the payload, but search backword for "<= 40" */
+        sge[0].addr = (uint64_t)conp->icom_mem[IBCOM_UDWR_FROM] + 40; 
+        sge[0].length = IBCOM_UDBUF_SZSEG - 40;
+        sge[0].lkey = conp->icom_mrlist[IBCOM_UDWR_FROM]->lkey;
+        
+
+        conp->icom_ah_attr = (struct ibv_ah_attr*)calloc(IBCOM_UD_SR_NTEMPLATE, sizeof(struct ibv_ah_attr));
+
+        conp->icom_ah_attr[IBCOM_UD_INITIATOR].sl = 0;
+        conp->icom_ah_attr[IBCOM_UD_INITIATOR].src_path_bits = 0;
+        conp->icom_ah_attr[IBCOM_UD_INITIATOR].static_rate = 0; /* not limit on static rate (100% port speed) */
+        conp->icom_ah_attr[IBCOM_UD_INITIATOR].is_global = 0;
+        conp->icom_ah_attr[IBCOM_UD_INITIATOR].port_num = conp->icom_port;
+        
+#if 0
+        conp->icom_ah_attr[IBCOM_UD_INITIATOR].is_global = 1;
+        conp->icom_ah_attr[IBCOM_UD_INITIATOR].grh.flow_label = 0;
+        conp->icom_ah_attr[IBCOM_UD_INITIATOR].grh.sgid_index = 0; /* what is this? */
+        conp->icom_ah_attr[IBCOM_UD_INITIATOR].grh.hop_limit = 1;
+        conp->icom_ah_attr[IBCOM_UD_INITIATOR].grh.traffic_class = 0;
+#endif
+
+        /* SR (send request) template for IBCOM_UD_INITIATOR */
+        conp->icom_sr = (struct ibv_send_wr*)calloc(IBCOM_UD_SR_NTEMPLATE, sizeof(struct ibv_send_wr));
+        
+        conp->icom_sr[IBCOM_UD_INITIATOR].next = NULL;
+#ifdef DCFA
+#else
+        conp->icom_sr[IBCOM_UD_INITIATOR].sg_list = sge;
+#endif
+        conp->icom_sr[IBCOM_UD_INITIATOR].num_sge = 1;
+        conp->icom_sr[IBCOM_UD_INITIATOR].opcode = IBV_WR_SEND;
+        conp->icom_sr[IBCOM_UD_INITIATOR].send_flags = IBV_SEND_SIGNALED;
+
+        conp->icom_sr[IBCOM_UD_INITIATOR].wr.ud.remote_qkey = IBCOM_QKEY;
+        
+        /* SGE (scatter gather element) template for recv */
+#ifdef DCFA
+        sge = &(conp->icom_rr[IBCOM_UD_RESPONDER].sg_list[0]);
+        memset(sge, 0, sizeof(struct ibv_sge) * WR_SG_NUM);
+#else
+        sge = (struct ibv_sge*)calloc(1, sizeof(struct ibv_sge));
+#endif
+        sge[0].addr = (uint64_t)conp->icom_mem[IBCOM_UDWR_TO];
+        sge[0].length = IBCOM_UDBUF_SZ;
+        sge[0].lkey =  conp->icom_mrlist[IBCOM_UDWR_TO]->lkey;
+        
+        /* RR (receive request) template for IBCOM_UD_RESPONDER */
+        conp->icom_rr = (struct ibv_recv_wr*)calloc(IBCOM_UD_RR_NTEMPLATE, sizeof(struct ibv_recv_wr));
+        
+        /* create one dummy RR to ibv_post_recv */
+        conp->icom_rr[IBCOM_UD_RESPONDER].next = NULL;
+#ifdef DCFA
+#else
+        conp->icom_rr[IBCOM_UD_RESPONDER].sg_list = sge;
+#endif
+        conp->icom_rr[IBCOM_UD_RESPONDER].num_sge = 1;
+        break;
+    }
+    
+    maxcon++;
+
+ fn_exit:
+    return ibcom_errno;
+ err_exit:
+    ibcomClean(conp);
+    return -1;
+ fn_fail:
+    ibcomClean(conp);
+    goto fn_exit;
+}
+
+/* 1. allocate memory area if it's not allocated or reuse it if it's allocated
+   2. ibv_reg_mr it and store rkey to conp->icom_mrlist
+   buf is output */
+int ibcom_alloc(int condesc, int sz) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+    int mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+
+    switch(conp->open_flag) {
+        
+    case IBCOM_OPEN_SCRATCH_PAD:
+        /* RDMA-write-to local memory area */
+        if(!scratch_pad) {
+            scratch_pad = mmap(0, sz, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+            dprintf("ibcom_alloc,mmap=%p,len=%d\n", scratch_pad, sz);
+            IBCOM_ERR_CHKANDJUMP(scratch_pad == (void*)-1, -1, dprintf("failed to allocate buffer\n")); 
+            dprintf("ibcom_alloc,scratch_pad=%p\n", scratch_pad);
+            memset(scratch_pad, 0 , sz);
+        }
+        conp->icom_mem[IBCOM_SCRATCH_PAD_TO] = scratch_pad;
+        conp->icom_msize[IBCOM_SCRATCH_PAD_TO] = sz;
+        
+        conp->icom_mrlist[IBCOM_SCRATCH_PAD_TO] = ibcom_reg_mr_fetch(conp->icom_mem[IBCOM_SCRATCH_PAD_TO], conp->icom_msize[IBCOM_SCRATCH_PAD_TO]);
+        IBCOM_ERR_CHKANDJUMP(!conp->icom_mrlist[IBCOM_SCRATCH_PAD_TO], -1, dprintf("ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags));
+        
+#ifdef DCFA
+        dprintf("ibcom_alloc,fd=%d,rmem=%p\n", condesc, conp->icom_mrlist[IBCOM_SCRATCH_PAD_TO]->buf);
+#else
+        dprintf("ibcom_alloc,fd=%d,rmem=%p\n", condesc, conp->icom_mrlist[IBCOM_SCRATCH_PAD_TO]->addr);
+#endif
+        dprintf("ibcom_alloc,fd=%d,rkey=%08x\n", condesc, conp->icom_mrlist[IBCOM_SCRATCH_PAD_TO]->rkey);
+        break;
+    default:
+        IBCOM_ERR_CHKANDJUMP(1, -1, dprintf("ibcom_alloc, invalid open_flag=%d\n", conp->open_flag));
+        break;
+    }
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_close(int condesc) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+    
+    dprintf("ibcom_close,condesc=%d\n", condesc);
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    ibcomClean(conp);
+    --maxcon;
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_rts(int condesc, int remote_qpnum, uint16_t remote_lid, union ibv_gid *remote_gid) {
+    IbCom *conp;
+    int ibcom_errno = 0;
+    int ib_errno;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    if(conp->icom_connected == 1) {
+        ibcom_errno = -1;
+        goto fn_fail;
+    }
+
+    struct ibv_qp_attr attr;
+    int flags;
+
+    switch(conp->open_flag) {
+    case IBCOM_OPEN_RC:
+    case IBCOM_OPEN_RC_LMT_PUT:
+    case IBCOM_OPEN_SCRATCH_PAD:
+        /* Init QP  */
+        ib_errno = modify_qp_to_init(conp->icom_qp, conp->icom_port);
+        if(ib_errno) {
+            fprintf(stderr, "change QP state to INIT failed\n");
+            ibcom_errno = ib_errno;
+            goto fn_fail;
+        }
+        /* Modify QP TO RTR status */
+        ib_errno = modify_qp_to_rtr(conp->icom_qp, remote_qpnum, remote_lid, remote_gid, conp->icom_port, 0);
+        conp->remote_lid = remote_lid; /* for debug */
+        if(ib_errno){
+            fprintf(stderr, "failed to modify QP state to RTR\n");
+            ibcom_errno = ib_errno;
+            goto fn_fail;
+        }
+        /* Modify QP TO RTS status */
+        ib_errno = modify_qp_to_rts(conp->icom_qp);
+        if(ib_errno) {
+            fprintf(stderr, "failed to modify QP state to RTS\n");
+            ibcom_errno = ib_errno;
+            goto fn_fail;
+        }
+        break;
+    case IBCOM_OPEN_UD:
+        /* INIT */
+        memset(&attr, 0, sizeof(attr));
+        attr.qp_state = IBV_QPS_INIT;
+        attr.port_num = conp->icom_port;
+        attr.pkey_index = 0;
+        attr.qkey = IBCOM_QKEY;
+        flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_QKEY;
+        ib_errno = ibv_modify_qp(conp->icom_qp, &attr, flags);
+        IBCOM_ERR_CHKANDJUMP(ib_errno, -1, perror("ibv_modify_qp"));
+
+        /* RTR */
+        memset(&attr, 0, sizeof(attr));
+        attr.qp_state = IBV_QPS_RTR;
+        flags = IBV_QP_STATE;
+        ib_errno = ibv_modify_qp(conp->icom_qp, &attr, flags);
+        IBCOM_ERR_CHKANDJUMP(ib_errno, -1, perror("ibv_modify_qp"));
+      
+        /* RTS */
+        memset(&attr, 0, sizeof(attr));
+        attr.qp_state = IBV_QPS_RTS;
+        attr.sq_psn = 0;
+        flags = IBV_QP_STATE | IBV_QP_SQ_PSN;
+        ib_errno = ibv_modify_qp(conp->icom_qp, &attr, flags);
+        IBCOM_ERR_CHKANDJUMP(ib_errno, -1, perror("ibv_modify_qp"));
+        break;
+    }
+    conp->icom_connected = 1;
+
+fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#define MPID_NEM_DCFA_ENABLE_INLINE
+int ibcom_isend(int condesc, uint64_t wr_id, void* prefix, int sz_prefix, void* hdr, int sz_hdr, void* data, int sz_data, int* copied) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+    struct ibv_send_wr	*bad_wr;
+    int	ib_errno;
+    int num_sge;
+
+    dprintf("ibcom_isend,prefix=%p,sz_prefix=%d,hdr=%p,sz_hdr=%d,data=%p,sz_data=%d\n", prefix, sz_prefix, hdr, sz_hdr, data, sz_data);
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    if (conp->icom_connected == 0) { return -1; }
+    int sz_data_pow2;
+    DCFA_NEM_SZ_DATA_POW2(sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr + sz_data);
+    uint32_t sumsz = sz_data_pow2 + sizeof(tailmagic_t);
+    if(sz_data>16000) {
+        //dprintf("ibcom_isend,sz_data=%d,sz_data_pow2=%d,sz_max=%ld\n", sz_data, sz_data_pow2, DCFA_NEM_MAX_DATA_POW2);
+    }
+    
+    num_sge = 0;
+
+    void* buf_from = conp->icom_mem[IBCOM_RDMAWR_FROM] + IBCOM_RDMABUF_SZSEG * (conp->sseq_num % IBCOM_RDMABUF_NSEG);
+
+    sz_hdrmagic_t* sz_hdrmagic = (sz_hdrmagic_t*)buf_from;
+    sz_hdrmagic->sz = sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr + sz_data + sizeof(tailmagic_t);
+    sz_hdrmagic->magic = IBCOM_MAGIC;
+
+    /* memcpy hdr is needed because hdr resides in stack when sending close-VC command */
+    /* memcpy is performed onto IBCOM_RDMAWR_FROM buffer */
+        void* hdr_copy = buf_from + sizeof(sz_hdrmagic_t);
+        memcpy(hdr_copy, prefix, sz_prefix);
+        memcpy(hdr_copy + sz_prefix, hdr, sz_hdr);
+#ifdef DCFA
+        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].mic_addr = (uint64_t)sz_hdrmagic;
+        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->host_addr + ((uint64_t)sz_hdrmagic - (uint64_t)conp->icom_mem[IBCOM_RDMAWR_FROM]);
+#else
+        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr = (uint64_t)sz_hdrmagic;
+#endif
+        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].length = sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr;
+        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].lkey = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->lkey;
+        num_sge += 1;
+
+    if(sz_data) {
+        //dprintf("ibcom_isend,data=%p,sz_data=%d\n", data, sz_data);
+        struct ibv_mr *mr_data = ibcom_reg_mr_fetch(data, sz_data);
+        IBCOM_ERR_CHKANDJUMP(!mr_data, -1, printf("ibcom_isend,ibv_reg_mr_fetch failed\n"));
+#ifdef DCFA
+        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].mic_addr = (uint64_t)data;
+        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr = mr_data->host_addr + ((uint64_t)data - (uint64_t)data);
+#else
+        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr = (uint64_t)data;
+#endif
+        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].length = sz_data;
+        conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].lkey = mr_data->lkey;
+        num_sge += 1;
+    }
+
+    int sz_pad = sz_data_pow2 - (sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr + sz_data);
+    tailmagic_t* tailmagic = (tailmagic_t*)(buf_from + sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr + sz_pad);
+    tailmagic->magic = IBCOM_MAGIC; 
+#ifdef DCFA
+    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].mic_addr = (uint64_t)buf_from + sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr;
+    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->host_addr + ((uint64_t)buf_from + sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr - (uint64_t)conp->icom_mem[IBCOM_RDMAWR_FROM]);
+#else
+    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].addr = (uint64_t)buf_from + sizeof(sz_hdrmagic_t) + sz_prefix + sz_hdr;
+#endif
+    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].length = sz_pad + sizeof(tailmagic_t);
+    conp->icom_sr[IBCOM_SMT_NOINLINE].sg_list[num_sge].lkey = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->lkey;
+    num_sge += 1;
+    dprintf("ibcom_isend,sz_data=%d,pow2=%d,sz_pad=%d,num_sge=%d\n", sz_data, sz_data_pow2, sz_pad, num_sge);
+
+    conp->icom_sr[IBCOM_SMT_NOINLINE].num_sge = num_sge;
+    conp->icom_sr[IBCOM_SMT_NOINLINE].wr_id = wr_id;
+    conp->icom_sr[IBCOM_SMT_NOINLINE].wr.rdma.remote_addr = (uint64_t) conp->icom_rmem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZSEG * (conp->sseq_num % IBCOM_RDMABUF_NSEG);
+    /* rkey is defined in ibcom_reg_mr_connect */
+
+    //dprintf("ibcom_isend,condesc=%d,num_sge=%d,opcode=%08x,imm_data=%08x,wr_id=%016lx, raddr=%p, rkey=%08x\n", condesc, conp->icom_sr[IBCOM_SMT_NOINLINE].num_sge, conp->icom_sr[IBCOM_SMT_NOINLINE].opcode, conp->icom_sr[IBCOM_SMT_NOINLINE].imm_data, conp->icom_sr[IBCOM_SMT_NOINLINE].wr_id, conp->icom_sr[IBCOM_SMT_NOINLINE].wr.rdma.remote_addr, conp->icom_sr[IBCOM_SMT_NOINLINE].wr.rdma.rkey);
+    
+    /* other commands can executed RDMA-rd command */
+    /* see the "Ordering and the Fence Indicator" section in "InfiniBand Architecture" by William T. Futral */
+#if 0
+    if(conp->after_rdma_rd) {
+        conp->icom_sr[IBCOM_SMT_NOINLINE].send_flags |= IBV_SEND_FENCE;
+    }
+#endif
+#ifdef MPID_NEM_DCFA_ENABLE_INLINE
+	if(sumsz <= conp->max_inline_data) {
+		conp->icom_sr[IBCOM_SMT_NOINLINE].send_flags |= IBV_SEND_INLINE;
+        *copied = 1;
+	} else {
+        *copied = 0;
+    }
+#endif    
+#ifdef DCFA
+    ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_SMT_NOINLINE]);
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_isend, ibv_post_send, rc=%d\n", ib_errno));
+#else
+    ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_SMT_NOINLINE], &bad_wr);
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_isend, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno, bad_wr));
+#endif
+#ifdef MPID_NEM_DCFA_ENABLE_INLINE
+	if(sumsz <= conp->max_inline_data) {
+		conp->icom_sr[IBCOM_SMT_NOINLINE].send_flags &= ~IBV_SEND_INLINE;
+	}
+#endif    
+#if 0
+    if(conp->after_rdma_rd) {
+        conp->icom_sr[IBCOM_SMT_NOINLINE].send_flags &= ~IBV_SEND_FENCE;
+        conp->after_rdma_rd = 0;
+    }
+#endif
+
+    conp->sseq_num += 1;
+    assert(conp->sseq_num > 0);
+    conp->ncom += 1;
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_isend_chain(int condesc, uint64_t wr_id, void* hdr, int sz_hdr, void* data, int sz_data) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+    struct ibv_send_wr	*bad_wr;
+    int	ib_errno;
+    int sz_data_rem = sz_data;
+    int i;
+    struct ibv_mr *mr_data;
+    uint32_t sumsz = sizeof(sz_hdrmagic_t) + sz_hdr + sz_data + sizeof(tailmagic_t);
+    unsigned long tscs, tsce;
+
+    dprintf("ibcom_isend_chain,enter\n");
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    IBCOM_ERR_CHKANDJUMP(conp->icom_connected == 0, -1, printf("ibcom_isend_chain,icom_connected==0\n"));
+
+    void* buf_from = conp->icom_mem[IBCOM_RDMAWR_FROM] + IBCOM_RDMABUF_SZSEG * (conp->sseq_num % IBCOM_RDMABUF_NSEG);
+
+    /* make a tail-magic position is in a fixed set */
+    int sz_data_pow2;
+    DCFA_NEM_SZ_DATA_POW2(sizeof(sz_hdrmagic_t) + sz_hdr + sz_data);
+
+    /* let the last command icom_sr[IBCOM_SMT_INLINE_CHAIN-1] which has IBV_WR_RDMA_WRITE_WITH_IMM */
+    int s = IBCOM_SMT_INLINE_NCHAIN - (sizeof(sz_hdrmagic_t) + sz_hdr + sz_data_pow2 + sizeof(tailmagic_t) + IBCOM_INLINE_DATA - 1) / IBCOM_INLINE_DATA;
+    IBCOM_ERR_CHKANDJUMP((sizeof(sz_hdrmagic_t) + sz_hdr + sz_data_pow2) % 4 != 0, -1, printf("ibcom_isend_chain,tail-magic gets over packet-boundary\n"));
+    IBCOM_ERR_CHKANDJUMP(s < 0 || s >= IBCOM_SMT_INLINE_NCHAIN, -1, printf("ibcom_isend_chain,s\n"));
+    dprintf("ibcom_isend_chain,sz_hdr=%d,sz_data=%d,s=%d\n", sz_hdr, sz_data, s);
+
+    for(i = s; i < IBCOM_SMT_INLINE_NCHAIN; i++) {
+
+        //tscs = MPID_nem_dcfa_rdtsc();
+        int sz_used = 0; /* how much of the payload of a IB packet is used? */
+        int num_sge = 0;
+        if(i == s) { 
+            sz_hdrmagic_t* sz_hdrmagic = (sz_hdrmagic_t*)buf_from;
+            sz_hdrmagic->sz = sumsz;
+            sz_hdrmagic->magic = IBCOM_MAGIC;
+            memcpy(buf_from + sizeof(sz_hdrmagic_t), hdr, sz_hdr);
+#ifdef DCFA
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].mic_addr = (uint64_t)buf_from;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->host_addr + ((uint64_t)buf_from - (uint64_t)conp->icom_mem[IBCOM_RDMAWR_FROM]);
+#else
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = (uint64_t)buf_from;
+#endif
+            buf_from += sizeof(sz_hdrmagic_t) + sz_hdr;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].length = sizeof(sz_hdrmagic_t) + sz_hdr;
+            sz_used += sizeof(sz_hdrmagic_t) + sz_hdr;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].lkey = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->lkey;
+            num_sge += 1;
+            dprintf("ibcom_isend_chain,i=%d,sz_used=%d\n", i, sz_used);
+        }
+        //tsce = MPID_nem_dcfa_rdtsc(); printf("0,%ld\n", tsce-tscs);
+
+        //tscs = MPID_nem_dcfa_rdtsc();
+        if(sz_data_rem > 0) { 
+#ifdef DCFA
+#else
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = (uint64_t)data + sz_data - sz_data_rem;
+#endif
+            int sz_data_red = sz_used + sz_data_rem + sizeof(tailmagic_t) <= IBCOM_INLINE_DATA ? sz_data_rem : sz_data_rem <= IBCOM_INLINE_DATA - sz_used ? sz_data_rem : IBCOM_INLINE_DATA - sz_used;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].length = sz_data_red;
+            sz_used += sz_data_red;
+            sz_data_rem -= sz_data_red;
+            IBCOM_ERR_CHKANDJUMP(sz_data_rem < 0, -1, printf("ibcom_isend_chain,sz_data_rem\n"));
+    
+            if(i == s) {
+                IBCOM_ERR_CHKANDJUMP(!sz_data, -1, printf("ibcom_isend_chain,sz_data==0\n"));
+                mr_data = ibcom_reg_mr_fetch(data, sz_data);
+                IBCOM_ERR_CHKANDJUMP(!mr_data, -1, printf("ibcom_isend,ibv_reg_mr_fetch failed\n"));
+            }
+#ifdef DCFA
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].mic_addr = (uint64_t)data + sz_data - sz_data_rem;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = mr_data->host_addr + ((uint64_t)data + sz_data - sz_data_rem - (uint64_t)data);
+#endif
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].lkey = mr_data->lkey;
+            num_sge += 1;
+            dprintf("ibcom_isend_chain,i=%d,sz_used=%d,sz_data_rem=%d\n", i, sz_used, sz_data_rem);
+        } else { /* tailmagic only packet is being generated */
+
+        }
+        //tsce = MPID_nem_dcfa_rdtsc(); printf("1,%ld\n", tsce-tscs);
+
+        //tscs = MPID_nem_dcfa_rdtsc();
+        if(i == IBCOM_SMT_INLINE_NCHAIN - 1) { /* append tailmagic */
+            int sz_pad = sz_data_pow2 - sz_data;
+            tailmagic_t* tailmagic = (tailmagic_t*)(buf_from + sz_pad);
+            tailmagic->magic = IBCOM_MAGIC; 
+#ifdef DCFA
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].mic_addr = (uint64_t)buf_from;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->host_addr + ((uint64_t)buf_from - (uint64_t)conp->icom_mem[IBCOM_RDMAWR_FROM]);
+#else
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = (uint64_t)buf_from;
+#endif
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].length = sz_pad + sizeof(tailmagic_t);
+            sz_used += sz_pad + sizeof(tailmagic_t);
+            IBCOM_ERR_CHKANDJUMP(sz_data_rem != 0, -1, printf("ibcom_isend_chain, sz_data_rem\n"));
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].lkey = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->lkey;
+            num_sge += 1;
+
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].imm_data = conp->sseq_num;
+            dprintf("ibcom_isend_chain,i=%d,sz_pad=%d,sz_used=%d,num_sge=%d\n", i, sz_pad, sz_used, num_sge);
+        } else if(IBCOM_INLINE_DATA - sz_used > 0) { /* data fell short of the packet, so pad */
+            IBCOM_ERR_CHKANDJUMP(1, -1, printf("ibcom_isend_chain,tail-magic gets over packet-boundary\n"));
+            int sz_pad = IBCOM_INLINE_DATA - sz_used;
+#ifdef DCFA
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].mic_addr = (uint64_t)buf_from;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->host_addr + ((uint64_t)buf_from - (uint64_t)conp->icom_mem[IBCOM_RDMAWR_FROM]);
+#else
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].addr = (uint64_t)buf_from;
+#endif
+            buf_from += sz_pad;
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].length = sz_pad;
+            sz_used += sz_pad;
+            IBCOM_ERR_CHKANDJUMP(sz_used != IBCOM_INLINE_DATA, -1, printf("ibcom_isend_chain, sz_used\n"));
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].sg_list[num_sge].lkey = conp->icom_mrlist[IBCOM_RDMAWR_FROM]->lkey;
+            num_sge += 1;
+            dprintf("ibcom_isend_chain,i=%d,sz_pad=%d,sz_used=%d\n", i, sz_pad, sz_used);
+        } else { /* packet is full with data */
+            IBCOM_ERR_CHKANDJUMP(sz_used != IBCOM_INLINE_DATA, -1, printf("ibcom_isend_chain, sz_used\n"));
+        }
+        //tsce = MPID_nem_dcfa_rdtsc(); printf("2,%ld\n", tsce-tscs);
+
+        conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].num_sge = num_sge;
+        conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].wr_id = wr_id;
+        conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].wr.rdma.remote_addr = (uint64_t) conp->icom_rmem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZSEG * (conp->sseq_num % IBCOM_RDMABUF_NSEG) + IBCOM_INLINE_DATA * (i - s);
+    }
+#if 0
+    if(conp->after_rdma_rd) {
+        conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + s].send_flags |= IBV_SEND_FENCE;
+    }
+#endif
+#ifdef DCFA
+    ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + s]);
+#else
+    ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + s], &bad_wr);
+#endif
+#if 0
+    if(i == 0 && conp->after_rdma_rd) {
+        conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + s].send_flags &= ~IBV_SEND_FENCE;
+        conp->after_rdma_rd = 0;
+    }
+#endif
+#ifdef DCFA
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_isend, ibv_post_send, rc=%d\n", ib_errno));
+#else
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_isend, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno, bad_wr));
+#endif
+    conp->ncom += (IBCOM_SMT_INLINE_NCHAIN - s);
+    conp->sseq_num += 1;
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_irecv(int condesc, uint64_t wr_id) {
+
+    IbCom	*conp;
+    int ib_errno;
+    int ibcom_errno = 0;
+    struct ibv_recv_wr	*bad_wr;
+    
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    //    if (conp->icom_connected == 0) { return -1; }
+
+    //dprintf("ibcom_irecv,condesc=%d,wr_id=%016lx\n", condesc, wr_id);
+
+    conp->icom_rr[IBCOM_RDMAWR_RESPONDER].wr_id = wr_id;
+#ifdef DCFA
+    ib_errno = ibv_post_recv(conp->icom_qp, &conp->icom_rr[IBCOM_RDMAWR_RESPONDER]);
+#else
+    ib_errno = ibv_post_recv(conp->icom_qp, &conp->icom_rr[IBCOM_RDMAWR_RESPONDER], &bad_wr);
+#endif
+    if (ib_errno) {
+#ifdef DCFA
+        fprintf(stderr, "ibcom_irecv: failed to post receive, ib_errno=%d\n", ib_errno);
+#else
+        fprintf(stderr, "ibcom_irecv: failed to post receive, ib_errno=%d,bad_wr=%p\n", ib_errno, bad_wr);
+#endif
+        ibcom_errno = ib_errno;
+        goto fn_fail;
+    }
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+ 
+int ibcom_udsend(int condesc, union ibv_gid* remote_gid, uint16_t remote_lid, uint32_t remote_qpn, uint32_t imm_data, uint64_t wr_id) {
+    IbCom	*conp;
+	struct ibv_send_wr *bad_wr;
+    int ibcom_errno = 0, ib_errno;
+    
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+
+#ifdef DCFA
+    IBCOM_ERR_CHKANDJUMP(1, -1, dprintf("ibcom_udsend not supported by DCFA because DCFA doesn't have ibv_create_ah\n"));
+#else
+    /* prepare ibv_ah_attr */
+    conp->icom_ah_attr[IBCOM_UD_INITIATOR].dlid = remote_lid;
+#if 0
+    conp->icom_ah_attr[IBCOM_UD_INITIATOR].grh.dgid = *remote_gid;
+#endif
+    
+    /* prepare ibv_ah */
+    struct ibv_ah *ah;
+    ah = ibv_create_ah(ib_pd, &conp->icom_ah_attr[IBCOM_UD_INITIATOR]);
+    IBCOM_ERR_CHKANDJUMP(!ah, -1, dprintf("ibv_crate_ah\n"));
+    
+    conp->icom_sr[IBCOM_UD_INITIATOR].wr.ud.ah = ah;
+    conp->icom_sr[IBCOM_UD_INITIATOR].wr.ud.remote_qpn = remote_qpn;
+    /* qkey is defined in open */
+
+    //dprintf("lid=%04x\n", conp->icom_ah_attr[IBCOM_UD_INITIATOR].dlid);
+    //dprintf("qpn=%08x\n", conp->icom_sr[IBCOM_UD_INITIATOR].wr.ud.remote_qpn);
+    
+    /* recv doesn't know the length, so we can't optimize it */
+    //    conp->icom_sr[IBCOM_UD_INITIATOR].sg_list[0].length = length;
+
+	conp->icom_sr[IBCOM_UD_INITIATOR].wr_id = wr_id;
+    conp->icom_sr[IBCOM_UD_INITIATOR].imm_data = imm_data;
+
+#if 0
+	if(length <= qpinfo->max_inline_data){
+		conp->icom_sr[IBCOM_UD_INITIATOR].send_flags |= IBV_SEND_INLINE;
+	}
+#endif    
+
+#ifdef DCFA
+	ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_UD_INITIATOR]);
+#else
+	ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_UD_INITIATOR], &bad_wr);
+#endif
+	IBCOM_ERR_CHKANDJUMP(ib_errno, -1, perror("ibv_post_send"));
+#endif /* DCFA */
+
+    conp->ncom += 1;
+
+ fn_exit:
+	return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_udrecv(int condesc){
+    IbCom	*conp;
+	struct ibv_recv_wr *bad_wr;
+	int ibcom_errno = 0, ib_errno;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+
+	/* Create RR */
+	conp->icom_rr[IBCOM_UD_RESPONDER].wr_id = 0;
+
+	/* Post RR to RQ */
+#ifdef DCFA
+	ib_errno = ibv_post_recv(conp->icom_qp, &conp->icom_rr[IBCOM_UD_RESPONDER]);
+#else
+	ib_errno = ibv_post_recv(conp->icom_qp, &conp->icom_rr[IBCOM_UD_RESPONDER],
+ &bad_wr);
+#endif
+	IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibv_post_recv ib_errno=%d\n", ib_errno));
+	
+ fn_exit:
+	return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_lrecv(int condesc, uint64_t wr_id, void* raddr, int sz_data, uint32_t rkey, void* laddr) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+    struct ibv_send_wr	*bad_wr;
+    int	ib_errno;
+    int num_sge;
+    
+    dprintf("ibcom_lrecv,enter,raddr=%p,sz_data=%d,laddr=%p\n", raddr, sz_data, laddr);
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    IBCOM_ERR_CHKANDJUMP(!conp->icom_connected, -1, dprintf("ibcom_lrecv,not connected\n"));
+    IBCOM_ERR_CHKANDJUMP(!sz_data, -1, dprintf("ibcom_lrecv,sz_data==0\n"));
+    
+    num_sge = 0;
+    
+    /* register memory area containing data */
+    struct ibv_mr *mr_data = ibcom_reg_mr_fetch(laddr, sz_data);
+    IBCOM_ERR_CHKANDJUMP(!mr_data, -1, dprintf("ibcom_lrecv,ibv_reg_mr_fetch failed\n"));
+    
+    /* Erase magic, super bug!! */
+    //((tailmagic_t*)(laddr + sz_data - sizeof(tailmagic_t)))->magic = 0;
+#ifdef DCFA
+    conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[num_sge].mic_addr = (uint64_t)laddr;
+    conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[num_sge].addr = mr_data->host_addr + ((uint64_t)laddr - (uint64_t)laddr);
+#else
+    conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[num_sge].addr = (uint64_t)laddr;
+#endif
+    conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[num_sge].length = sz_data;
+    conp->icom_sr[IBCOM_LMT_INITIATOR].sg_list[num_sge].lkey = mr_data->lkey;
+    num_sge += 1;
+    
+    conp->icom_sr[IBCOM_LMT_INITIATOR].num_sge = num_sge;
+    conp->icom_sr[IBCOM_LMT_INITIATOR].wr_id = wr_id;
+    conp->icom_sr[IBCOM_LMT_INITIATOR].wr.rdma.remote_addr = (uint64_t)raddr;
+    conp->icom_sr[IBCOM_LMT_INITIATOR].wr.rdma.rkey = rkey;
+    
+#ifdef DCFA
+    ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_LMT_INITIATOR]);
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_lrecv, ibv_post_send, rc=%d\n", ib_errno));
+#else
+    ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_LMT_INITIATOR], &bad_wr);
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_lrecv, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno, bad_wr));
+#endif
+
+    /* other commands can be executed before RDMA-rd command */
+    /* see the "Ordering and the Fence Indicator" section in "InfiniBand Architecture" by William T. Futral */
+#if 0
+    conp->after_rdma_rd = 1;
+#endif
+    conp->ncom += 1;
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+/* use the same QP as isend */
+int ibcom_put_lmt(int condesc, uint64_t wr_id, void* raddr, int sz_data, uint32_t rkey, void* laddr) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+    struct ibv_send_wr	*bad_wr;
+    int	ib_errno;
+    int num_sge;
+    
+    dprintf("ibcom_put_lmt,enter,sz_data=%d,laddr=%p\n", sz_data, laddr);
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    IBCOM_ERR_CHKANDJUMP(!conp->icom_connected, -1, dprintf("ibcom_put_lmt,not connected\n"));
+    IBCOM_ERR_CHKANDJUMP(!sz_data, -1, dprintf("ibcom_put_lmt,sz_data==0\n"));
+    
+    num_sge = 0;
+    
+    /* register memory area containing data */
+    struct ibv_mr *mr_data = ibcom_reg_mr_fetch(laddr, sz_data);
+    IBCOM_ERR_CHKANDJUMP(!mr_data, -1, dprintf("ibcom_put_lmt,ibv_reg_mr_fetch failed\n"));
+    
+#ifdef DCFA
+    conp->icom_sr[IBCOM_LMT_PUT].sg_list[num_sge].mic_addr = (uint64_t)laddr;
+    conp->icom_sr[IBCOM_LMT_PUT].sg_list[num_sge].addr = mr_data->host_addr + ((uint64_t)laddr - (uint64_t)laddr);
+#else
+    conp->icom_sr[IBCOM_LMT_PUT].sg_list[num_sge].addr = (uint64_t)laddr;
+#endif
+    conp->icom_sr[IBCOM_LMT_PUT].sg_list[num_sge].length = sz_data;
+    conp->icom_sr[IBCOM_LMT_PUT].sg_list[num_sge].lkey = mr_data->lkey;
+    num_sge += 1;
+    
+    conp->icom_sr[IBCOM_LMT_PUT].num_sge = num_sge;
+    conp->icom_sr[IBCOM_LMT_PUT].wr_id = wr_id;
+    conp->icom_sr[IBCOM_LMT_PUT].wr.rdma.remote_addr = (uint64_t)raddr;
+    conp->icom_sr[IBCOM_LMT_PUT].wr.rdma.rkey = rkey;
+    
+#ifdef DCFA
+    ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_LMT_PUT]);
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_put_lmt, ibv_post_send, rc=%d\n", ib_errno));
+#else
+    ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_LMT_PUT], &bad_wr);
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_put_lmt, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno, bad_wr));
+#endif
+
+    conp->ncom += 1;
+    dprintf("ibcom_put_lmt,exit\n");
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_put_scratch_pad(int condesc, uint64_t wr_id, uint64_t offset, int sz, void* laddr) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+    struct ibv_send_wr	*bad_wr;
+    int	ib_errno;
+    
+    dprintf("ibcom_put_scratch_pad,enter,wr_id=%llx,offset=%llx,sz=%d,laddr=%p\n", (unsigned long long)wr_id, (unsigned long long)offset, sz, laddr);
+    dprintf("ibcom_put_scratch_pad,data=%08x\n", *((uint32_t*)laddr));
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    IBCOM_ERR_CHKANDJUMP(conp->open_flag != IBCOM_OPEN_SCRATCH_PAD, -1, dprintf("ibcom_put_scratch_pad,invalid open_flag=%d\n", conp->open_flag));
+    IBCOM_ERR_CHKANDJUMP(!conp->icom_connected, -1, dprintf("ibcom_put_scratch_pad,not connected\n"));
+    IBCOM_ERR_CHKANDJUMP(!sz, -1, dprintf("ibcom_put_scratch_pad,sz==0\n"));
+    
+    /* register memory area containing data */
+    struct ibv_mr *mr_data = ibcom_reg_mr_fetch(laddr, sz);
+    IBCOM_ERR_CHKANDJUMP(!mr_data, -1, dprintf("ibcom_put_scratch_pad,ibv_reg_mr_fetch failed\n"));
+    dprintf("ibcom_put_scratch_pad,");
+    
+#ifdef DCFA
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].mic_addr = (uint64_t)laddr;
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].addr = mr_data->host_addr + ((uint64_t)laddr - (uint64_t)laddr);
+#else
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].addr = (uint64_t)laddr;
+#endif
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].length = sz;
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].lkey = mr_data->lkey;
+    
+    /* num_sge is defined in ibcomOpen */
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr_id = wr_id;
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.rdma.remote_addr = (uint64_t)conp->icom_rmem[IBCOM_SCRATCH_PAD_TO] + offset;
+    /* rkey is defined in ibcom_reg_mr_connect */
+
+    dprintf("ibcom_put_scratch_pad,wr.rdma.remote_addr=%llx\n", (unsigned long long)conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.rdma.remote_addr);
+    
+#ifdef DCFA
+    ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR]);
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_put_scratch_pad, ibv_post_send, rc=%d\n", ib_errno));
+#else
+    ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR], &bad_wr);
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_put_scratch_pad, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno, bad_wr));
+#endif
+
+    conp->ncom_scratch_pad += 1;
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#ifdef DCFA_ONDEMAND
+int ibcom_cas_scratch_pad(int condesc, uint64_t wr_id, uint64_t offset, uint64_t compare, uint64_t swap) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+    struct ibv_send_wr	*bad_wr;
+    int	ib_errno;
+    
+    dprintf("ibcom_put_scratch_pad,enter,wr_id=%llx,offset=%llx,sz=%d,laddr=%p\n", (unsigned long long)wr_id, (unsigned long long)offset, sz, laddr);
+    dprintf("ibcom_put_scratch_pad,data=%08x\n", *((uint32_t*)laddr));
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    IBCOM_ERR_CHKANDJUMP(conp->open_flag != IBCOM_OPEN_SCRATCH_PAD, -1, dprintf("ibcom_put_scratch_pad,invalid open_flag=%d\n", conp->open_flag));
+    IBCOM_ERR_CHKANDJUMP(!conp->icom_connected, -1, dprintf("ibcom_put_scratch_pad,not connected\n"));
+    IBCOM_ERR_CHKANDJUMP(!sz, -1, dprintf("ibcom_put_scratch_pad,sz==0\n"));
+    
+    /* register memory area containing data */
+    struct ibv_mr *mr_data = ibcom_reg_mr_fetch(laddr, sz);
+    IBCOM_ERR_CHKANDJUMP(!mr_data, -1, dprintf("ibcom_put_scratch_pad,ibv_reg_mr_fetch failed\n"));
+    dprintf("ibcom_put_scratch_pad,");
+    
+#ifdef DCFA
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].mic_addr = (uint64_t)laddr;
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].addr = mr_data->host_addr + ((uint64_t)laddr - (uint64_t)laddr);
+#else
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].addr = (uint64_t)laddr;
+#endif
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].length = sz;
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].sg_list[0].lkey = mr_data->lkey;
+    
+    /* num_sge is defined in ibcomOpen */
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr_id = wr_id;
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.atomic.remote_addr = (uint64_t)conp->icom_rmem[IBCOM_SCRATCH_PAD_TO] + offset;
+    /* rkey is defined in ibcom_reg_mr_connect */
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.atomic.compare_add = compare;
+    conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.atomic.swap = swap;
+
+    dprintf("ibcom_put_scratch_pad,wr.rdma.remote_addr=%llx\n", (unsigned long long)conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.rdma.remote_addr);
+    
+#ifdef DCFA
+    ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR]);
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_put_scratch_pad, ibv_post_send, rc=%d\n", ib_errno));
+#else
+    ib_errno = ibv_post_send(conp->icom_qp, &conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR], &bad_wr);
+    IBCOM_ERR_CHKANDJUMP(ib_errno, -1, dprintf("ibcom_put_scratch_pad, ibv_post_send, rc=%d, bad_wr=%p\n", ib_errno, bad_wr));
+#endif
+
+    conp->ncom_scratch_pad += 1;
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+#endif
+
+/* poll completion queue */
+int ibcom_poll_cq(int which_cq, struct ibv_wc* wc, int* result) {
+    int ibcom_errno = 0;
+
+    switch(which_cq) {
+    case IBCOM_RC_SHARED_RCQ:
+        *result = ibv_poll_cq(rc_shared_rcq, 1, wc);
+        break;
+    case IBCOM_RC_SHARED_SCQ:
+        *result = ibv_poll_cq(rc_shared_scq, 1, wc);
+        break;
+    case IBCOM_UD_SHARED_RCQ:
+        *result = ibv_poll_cq(ud_shared_rcq, 1, wc);
+        break;
+    case IBCOM_UD_SHARED_SCQ:
+        *result = ibv_poll_cq(ud_shared_scq, 1, wc);
+        break;
+    }
+
+    if (*result < 0) {
+        dprintf("ibcom_poll_cq,status=%08x,vendor_err=%08x,len=%d,opcode=%08x,wr_id=%016lx\n", wc->status, wc->vendor_err, wc->byte_len, wc->opcode, wc->wr_id);
+        ibcom_errno = *result;
+        goto fn_fail;
+    }
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_reg_mr_connect(int condesc, void *rmem, int rkey) {
+    int ibcom_errno = 0;
+    IbCom	*conp;
+    int i;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    switch(conp->open_flag) {
+    case IBCOM_OPEN_RC:
+    case IBCOM_OPEN_RC_LMT_PUT:
+        conp->icom_rmem[IBCOM_RDMAWR_TO] = rmem;
+        conp->icom_rkey[IBCOM_RDMAWR_TO] = rkey;
+        conp->icom_sr[IBCOM_SMT_NOINLINE].wr.rdma.rkey = conp->icom_rkey[IBCOM_RDMAWR_TO];
+        for(i = 0; i < IBCOM_SMT_INLINE_NCHAIN; i++) {
+            conp->icom_sr[IBCOM_SMT_INLINE_CHAINED0 + i].wr.rdma.rkey = conp->icom_rkey[IBCOM_RDMAWR_TO];
+        }
+        break;
+
+    case IBCOM_OPEN_SCRATCH_PAD:
+        conp->icom_rmem[IBCOM_SCRATCH_PAD_TO] = rmem;
+        conp->icom_rkey[IBCOM_SCRATCH_PAD_TO] = rkey;
+        conp->icom_sr[IBCOM_SCRATCH_PAD_INITIATOR].wr.rdma.rkey = conp->icom_rkey[IBCOM_SCRATCH_PAD_TO];
+        break;
+
+    default:
+        dprintf("invalid open_flag=%d\n", conp->open_flag);
+        break;
+    }
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_get_info_conn(int condesc, int key, void *out, uint32_t out_len) {
+    int ibcom_errno = 0;
+    IbCom	*conp;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+
+    switch(key) {
+    case IBCOM_INFOKEY_QP_QPN:
+        memcpy(out, &conp->icom_qp->qp_num, out_len);
+        break;
+    case IBCOM_INFOKEY_PORT_LID:
+#ifdef DCFA
+        dprintf("ibcom_get_info_conn,lid=%04x\n", ib_ctx->lid);
+        memcpy(out, &ib_ctx->lid, out_len);
+#else
+        dprintf("ibcom_get_info_conn,lid=%04x\n", conp->icom_pattr.lid);
+        memcpy(out, &conp->icom_pattr.lid, out_len);
+#endif
+        break;
+    case IBCOM_INFOKEY_PORT_GID:
+        memcpy(out, &conp->icom_gid, out_len);
+        break;
+    case IBCOM_INFOKEY_PATTR_MAX_MSG_SZ: {
+#ifdef DCFA
+        uint32_t max_msg_sz = 1073741824; /* ConnectX-3 */
+        memcpy(out, &max_msg_sz, out_len);
+#else
+        memcpy(out, &conp->icom_pattr.max_msg_sz, out_len);
+#endif
+        break; }
+    default:
+        ibcom_errno = -1;
+        break;
+    }
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_get_info_mr(int condesc, int memid, int key, void *out, int out_len) {
+    int ibcom_errno = 0;
+    IbCom	*conp;
+    struct ibv_mr	*mr;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    IBCOM_ERR_CHKANDJUMP(memid >= conp->icom_mrlen, -1, dprintf("ibcom_get_info_mr,wrong mem_id=%d\n", memid));
+    mr = conp->icom_mrlist[memid];
+
+    switch(key) {
+    case IBCOM_INFOKEY_MR_ADDR:
+#ifdef DCFA
+        /* host_addr is created by ibv_reg_mr in ibcomOpen, */
+        /* dcfa_init read this host-addr, put it into KVS, the counter-party read it through KVS*/
+        memcpy(out, &mr->host_addr, out_len);
+#else
+        memcpy(out, &mr->addr, out_len);
+#endif
+        break;
+    case IBCOM_INFOKEY_MR_LENGTH: {
+#ifdef DCFA
+        assert(out_len == sizeof(size_t));
+        size_t length = mr->size; /* type of mr->size is int */
+        memcpy(out, &length, out_len);
+#else
+        memcpy(out, &mr->length, out_len);
+#endif
+        break; }
+    case IBCOM_INFOKEY_MR_RKEY:
+        memcpy(out, &mr->rkey, out_len);
+        break;
+    default:
+        dprintf("ibcom_get_info_mr,unknown key=%d\n", key);
+        ibcom_errno = -1;
+        break;
+    }
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_mem_rdmawr_from(int condesc, void** out) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    *out = conp->icom_mem[IBCOM_RDMAWR_FROM] + IBCOM_RDMABUF_SZSEG * (conp->sseq_num % IBCOM_RDMABUF_NSEG);
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_mem_rdmawr_to(int condesc, int seq_num, void** out) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    *out = conp->icom_mem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZSEG * (seq_num % IBCOM_RDMABUF_NSEG);
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_mem_udwr_from(int condesc, void** out) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    *out = conp->icom_mem[IBCOM_UDWR_FROM];
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_mem_udwr_to(int condesc, void** out) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    *out = conp->icom_mem[IBCOM_UDWR_TO];
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_sseq_num_get(int condesc, int* seq_num) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    *seq_num = conp->sseq_num;
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_lsr_seq_num_tail_get(int condesc, int** seq_num) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    *seq_num = &(conp->lsr_seq_num_tail);
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_rsr_seq_num_tail_get(int condesc, int** seq_num) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    *seq_num = &(conp->rsr_seq_num_tail);
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_rsr_seq_num_tail_last_sent_get(int condesc, int** seq_num) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    *seq_num = &(conp->rsr_seq_num_tail_last_sent);
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_rdmabuf_occupancy_notify_rate_get(int condesc, int* notify_rate) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+
+    switch(conp->rdmabuf_occupancy_notify_lstate) {
+    case IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW:
+        *notify_rate = IBCOM_RDMABUF_OCCUPANCY_NOTIFY_RATE_HW;
+        break;
+    case IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_LW:
+        *notify_rate = IBCOM_RDMABUF_OCCUPANCY_NOTIFY_RATE_LW;
+        break;
+    default:
+        ibcom_errno = -1; goto fn_fail;
+        break;
+    }
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_rdmabuf_occupancy_notify_rstate_get(int condesc, int** rstate) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    *rstate = &(conp->rdmabuf_occupancy_notify_rstate);
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_rdmabuf_occupancy_notify_lstate_get(int condesc, int** lstate) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    *lstate = &(conp->rdmabuf_occupancy_notify_lstate);
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_obtain_pointer(int condesc, IbCom** ibcom) {
+    IbCom	*conp;
+    int ibcom_errno = 0;
+
+    RANGE_CHECK_WITH_ERROR(condesc, conp);
+    *ibcom = conp;
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+void ibcomShow(int condesc) {
+    IbCom	*conp;
+    uint8_t	*p;
+    int		i;
+
+    RANGE_CHECK(condesc, conp);
+    fprintf(stdout, "qp_num = %d\n", conp->icom_qp->qp_num);
+#ifdef DCFA
+    fprintf(stdout, "lid    = %d\n", ib_ctx->lid);
+#else
+    fprintf(stdout, "lid    = %d\n", conp->icom_pattr.lid);
+#endif
+    p = (uint8_t*) &conp->icom_gid;
+    fprintf(stdout, "gid    = %02x", p[0]);
+    for (i = 1; i < 16; i++) {
+        fprintf(stdout, ":%02x", p[i]);
+    }
+    fprintf(stdout, "\n");
+}
+
+static char *strerror_tbl[] = {
+	[0] = "zero",
+	[1] = "one",
+	[2] = "two",
+	[3] = "three",
+};
+
+char* ibcom_strerror(int errno) {
+    char* r;
+    if(-errno > 3) {
+        r = malloc(256);
+        sprintf(r, "%d", -errno);
+        goto fn_exit;
+    } else {
+        r = strerror_tbl[-errno];
+    }
+ fn_exit:
+    return r;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_reg_mr(void *addr, int len, struct ibv_mr **mr) {
+    int ibcom_errno = 0;
+	dprintf("ibcom_reg_mr,addr=%p,len=%d,mr=%p\n", addr, len, mr);
+
+    *mr = ibv_reg_mr(ib_pd, addr, len, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
+
+    IBCOM_ERR_CHKANDJUMP(*mr == 0, -1, dprintf("ibcom_reg_mr,cannot register memory\n"));
+
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+int ibcom_dereg_mr(struct ibv_mr *mr) {
+	int i;
+	int ib_errno;
+    int ibcom_errno = 0;
+
+    if (!mr) { goto fn_exit; }
+
+    ib_errno = ibv_dereg_mr(mr);
+    if (ib_errno < 0) {
+        fprintf(stderr, "cannot deregister memory\n");
+        goto fn_fail;
+    }
+#ifdef DCFA
+    dprintf("ibcom_dereg_mr, addr=%p\n", mr->buf);
+#else
+    dprintf("ibcom_dereg_mr, addr=%p\n", mr->addr);
+#endif
+    
+ fn_exit:
+    return ibcom_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.h b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.h
new file mode 100644
index 0000000..3da5c2d
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.h
@@ -0,0 +1,412 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2012 NEC Corporation
+ *      Author: Masamichi Takagi
+ *  (C) 2012 Oct 14 Yutaka Ishikawa, ishikawa at is.s.u-tokyo.ac.jp
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+
+//#define DEBUG_ON	1
+#ifdef DEBUG_ON
+#define DEBUG	if(dflag)
+#else
+#define DEBUG	if(0)
+#endif
+
+#ifdef DCFA
+#include "dcfa.h"
+
+/* 
+*** diff -p verbs.h dcfa.h (structures)
+same name, same fields
+   struct ibv_device { };
+   struct ibv_context { };
+   struct ibv_pd { };
+   struct ibv_ah_attr { };
+
+same name, different fields
+ struct ibv_qp_init_attr {
+-  void *qp_context;
+-  struct ibv_xrc_domain  *xrc_domain;
+};
+
+ struct ibv_mr {
+-  void *addr;
++  void *buf;
++  uint64_t host_addr;
+-  size_t length;
++  int size;
+-  uint32_t handle;
++  uint64_t handle;
++  int flag;  1: offload 
+-  uint32_t lkey;
++  int lkey;
+-  uint32_t rkey;
++  int rkey;
+};
+
+ struct ibv_qp {
++  struct mlx4_buf buf;
++  int max_inline_data;
++  int buf_size;
+
++  uint32_t doorbell_qpn;
++  uint32_t sq_signal_bits;
++  int sq_spare_wqes;
++  struct mlx4_wq sq;
+
++  uint32_t *db; // doorbell addr for post recv
++  struct mlx4_wq rq;
++  ibmic_qp_conn_info_t   remote_qp_info;
+
+-  uint32_t               handle;
++  uint64_t               handle;
+
+-  struct ibv_context     *context;
+-  void                   *qp_context;
+-  uint32_t                events_completed;
+-  struct ibv_xrc_domain  *xrc_domain;
+-  pthread_mutex_t         mutex;
+-  pthread_cond_t          cond;
+};
+
+ struct ibv_cq {
+-  struct ibv_comp_channel *channel;
+-  void                   *cq_context;
+-  uint32_t                handle;
+-  uint32_t                comp_events_completed;
+-  uint32_t                async_events_completed;
+
+-  pthread_mutex_t         mutex;
+-  pthread_cond_t          cond;
+
++  struct mlx4_buf         buf;
++  uint32_t                cons_index;
++  uint32_t                wait_index;
++  uint32_t               *set_ci_db;
++  uint32_t               *arm_db;
++  int                     arm_sn;
++  int                     cqe_size;
++  uint64_t                handle;
+};
+
+ struct ibv_wc {
+-  uint32_t                src_qp;
+-  uint16_t                pkey_index;
+-  uint16_t                slid;
+-  uint8_t                 sl;
+-  uint8_t                 dlid_path_bits;
+};
+
+ struct ibv_send_wr {
+-  struct ibv_sge         *sg_list;
++  struct ibv_sge          sg_list[WR_SG_NUM];
++  uint64_t                addr;
++  uint32_t                length;
++  uint32_t                lkey;
+ };
+
+ struct ibv_recv_wr {
+-  struct ibv_sge         *sg_list;
++  struct ibv_sge          sg_list[WR_SG_NUM];
+ };
+
+ struct ibv_sge {
++  uint64_t mic_addr; // buffer address on mic
+ };
+
+non-existent
+-  struct ibv_port_attr { };
+
+
+*** diff -p verbs.h dcfa.h (functions)
+
+same name, same arguments
+   ibv_get_device_list
+   ibv_open_device	
+   ibv_close_device
+   ibv_free_device_list
+   ibv_alloc_pd
+   ibv_dealloc_pd
+   ibv_create_qp
+   ibv_destroy_qp
+   ibv_reg_mr
+   ibv_dereg_mr
+   ibv_destroy_cq
+   ibv_poll_cq
+   ibv_modify_qp
+
+same name, different arguments
+-  int ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr)
++  int ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr);
+   
+-  int ibv_post_recv(struct ibv_qp *qp, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr)
++  int ibv_post_recv(struct ibv_qp *qp, struct ibv_recv_wr *wr);
+   
+-  struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe, void *cq_context, struct ibv_comp_channel *channel, int comp_vector);
++  struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe_max);
+
+non-existent
+-  ibv_get_device_name
+-  ibv_query_port
+-  ibv_query_gid
+-  ibv_create_ah
+struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr);
+*/
+
+#else
+/* Original Infiniband */
+#include <infiniband/verbs.h>
+#endif
+
+static inline unsigned long long
+getCPUCounter(void)
+{
+    unsigned int lo, hi;
+    __asm__ __volatile__ (      // serialize
+	"xorl %%eax,%%eax \n        cpuid"
+	::: "%rax", "%rbx", "%rcx", "%rdx");
+    __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+    return (unsigned long long)hi << 32 | lo;
+}
+
+extern struct ibv_cq		*rc_shared_scq;
+extern struct ibv_cq		*rc_shared_scq_lmt_put;
+extern struct ibv_cq		*rc_shared_scq_scratch_pad;
+
+#define IBCOM_SIZE		2048 /* one process uses 2-4 fds */
+#define IBCOM_INLINE_DATA (512-64) /* experimented max is 884 */ /* this is lower bound and more than this value is set. the more this value is, the more the actual value set is. you need to check it */
+
+#define IBCOM_MAX_SQ_CAPACITY (256/1)
+#define IBCOM_MAX_RQ_CAPACITY ((IBCOM_MAX_SQ_CAPACITY)+16) /* We pre-post_recv IBCOM_MAX_SQ_CAPACITY of commands */
+#define IBCOM_MAX_SGE_CAPACITY (32/2) /* maximum for ConnectX-3 looks like 32 */
+#define IBCOM_MAX_CQ_CAPACITY IBCOM_MAX_RQ_CAPACITY
+#define IBCOM_MAX_CQ_HEIGHT_DRAIN (((IBCOM_MAX_CQ_CAPACITY)>>2)+((IBCOM_MAX_CQ_CAPACITY)>>1)) /* drain when reaching this amount */
+#define IBCOM_MAX_SQ_HEIGHT_DRAIN (((IBCOM_MAX_SQ_CAPACITY)>>2)+((IBCOM_MAX_SQ_CAPACITY)>>1)) /* drain when reaching this amount */
+#define IBCOM_AMT_CQ_DRAIN ((IBCOM_MAX_CQ_CAPACITY)>>2) /* drain this amount */
+#define IBCOM_MAX_RD_ATOMIC 4
+
+#define IBCOM_MAX_TRIES		 1
+#define IBCOM_SCQ_FLG		 1
+#define IBCOM_RCQ_FLG		 2
+    
+#define IBCOM_INFOKEY_PATTR_MAX_MSG_SZ 100
+#define IBCOM_INFOKEY_MR_ADDR 200
+#define IBCOM_INFOKEY_MR_LENGTH 201
+#define IBCOM_INFOKEY_MR_RKEY 202
+#define IBCOM_INFOKEY_QP_QPN 300
+#define IBCOM_INFOKEY_PORT_LID 400
+#define IBCOM_INFOKEY_PORT_GID 401
+
+
+/* buffers */
+#define IBCOM_NBUF_RDMA 2 /* number of <addr, sz, lkey, rkey> */
+#define IBCOM_RDMAWR_FROM 0 /* index to RDMA-write-from buffer */
+#define IBCOM_RDMAWR_TO 1 /* index to RDMA-write-to buffer */
+/* assuming that the unit (32768) is equals to eager-RDMA-write threashold 
+   assuming that the multiplier (256) is 
+   equals to max number of outstanding eager-RDMA-write transactions */
+#define IBCOM_RDMABUF_SZSEG (16384/4)//(16384+8+40+1) /* this size minus magics and headers must be 2^n because data might grow to the next 2^m boundary, see dcfa_impl.h, dcfa_ibcom.c, src/mpid/ch3/src/mpid_isend.c */
+#define IBCOM_RDMABUF_SZ ((IBCOM_RDMABUF_SZSEG) * 16) /* (32768 * 256) */  
+#define IBCOM_RDMABUF_NSEG ((IBCOM_RDMABUF_SZ) / (IBCOM_RDMABUF_SZSEG)) 
+#define IBCOM_SMT_INLINE_NCHAIN 8 /* maximum number of chained inline-send commands */
+#define IBCOM_RDMABUF_HIGH_WATER_MARK (((IBCOM_RDMABUF_NSEG)>>1)+((IBCOM_RDMABUF_NSEG)>>2))
+#define IBCOM_RDMABUF_LOW_WATER_MARK (((IBCOM_RDMABUF_NSEG)>>2))
+#define IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW 1
+#define IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_LW 2
+#define IBCOM_RDMABUF_OCCUPANCY_NOTIFY_RATE_HW /*1*/(((IBCOM_RDMABUF_NSEG)>>4) == 0 ? 1 : ((IBCOM_RDMABUF_NSEG)>>4))
+#define IBCOM_RDMABUF_OCCUPANCY_NOTIFY_RATE_LW (((IBCOM_RDMABUF_NSEG)>>2)) /*12*/ /* receiver tries to notify sender the number of releases when receiver find not-noticed releases of more than this number */
+#define IBCOM_RDMABUF_OCCUPANCY_NOTIFY_RATE_DELAY_MULTIPLIER(notify_rate) /*(notify_rate + (notify_rate>>1))*/(notify_rate) /* send seq_num to the sender side if there is no chance to embed seq_num into a packet bound for the sender side for this number of release events */
+
+#define IBCOM_NBUF_UD 2 /* number of <addr, sz, lkey, rkey> */
+#define IBCOM_UDWR_FROM 0 /* index to UD-write-from buffer */
+#define IBCOM_UDWR_TO 1 /* index to UD-write-to buffer */
+#define IBCOM_UDBUF_SZ (128 * 8192) /* supporting 100K ranks with 10 rounds */ 
+#define IBCOM_UDBUF_SZSEG (128)
+#define IBCOM_UDBUF_NSEG (IBCOM_UDBUF_SZ / IBCOM_UDBUF_SZSEG) 
+
+#define IBCOM_NBUF_SCRATCH_PAD 1 /* number of <addr, sz, lkey, rkey> */
+#define IBCOM_SCRATCH_PAD_TO 0 /* index to RDMA-write-to buffer */
+
+/* send command templates */
+#define IBCOM_RC_SR_NTEMPLATE (8+1+2) /* number of request templates, 8 for inline-chained-smt, 1 for smt, 1 for lmt */
+#define IBCOM_SMT_INLINE_CHAINED0 0 /* index to it */
+#define IBCOM_SMT_INLINE_CHAINED7 7 
+#define IBCOM_SMT_NOINLINE 8 
+#define IBCOM_LMT_INITIATOR 9 /* FIXME: bad naming */
+
+#define IBCOM_RC_SR_LMT_PUT_NTEMPLATE IBCOM_RC_SR_NTEMPLATE /* FIXME: TEMPLATE named IBCOM_RC_SR shares IBCOM_LMT_PUT */
+#define IBCOM_LMT_PUT 10
+
+/* recv command templates */
+#define IBCOM_RC_RR_NTEMPLATE 1  /* 1 for smt, */
+#define IBCOM_RDMAWR_RESPONDER  0 /* index to recv request template */
+
+/* sge template */
+#define IBCOM_SMT_INLINE_INITIATOR_NSGE 4 /* MPI header, (sz;magic), data x1, magic */
+#define IBCOM_SMT_NOINLINE_INITIATOR_NSGE 4 /* MPI header, (sz;magic), data x1, magic */
+#define IBCOM_LMT_INITIATOR_NSGE 1 /* data x1 */
+#define IBCOM_LMT_PUT_NSGE 1 /* data x1 */
+#define IBCOM_SCRATCH_PAD_INITIATOR_NSGE 1 /* QP state */
+
+#define IBCOM_UD_SR_NTEMPLATE 1 
+#define IBCOM_UD_RR_NTEMPLATE 1 
+#define IBCOM_UD_INITIATOR 0 /* index to send request template */
+#define IBCOM_UD_RESPONDER 0 /* index to recv request template */
+
+#define IBCOM_SCRATCH_PAD_SR_NTEMPLATE 2 
+#define IBCOM_SCRATCH_PAD_RR_NTEMPLATE 1 
+#define IBCOM_SCRATCH_PAD_INITIATOR 0 /* index to send request template */
+#define IBCOM_SCRATCH_PAD_CAS       1 
+#define IBCOM_SCRATCH_PAD_RESPONDER 0 /* index to recv request template */
+
+
+typedef struct IbCom {
+    short			icom_used;
+    short			icom_connected;
+    int				icom_port;
+#ifdef DCFA
+#else
+    struct ibv_port_attr	icom_pattr;	/* IB port attributes */
+#endif
+    struct ibv_qp		*icom_qp;
+    struct ibv_cq		*icom_scq;
+    struct ibv_cq		*icom_rcq;
+    struct ibv_mr		**icom_mrlist;
+    int				icom_mrlen;
+    union  ibv_gid		icom_gid;
+    void			**icom_mem;	/* 0: send 1: recv 2..: rdma */
+    int				*icom_msize;	/* 0: send 1: recv 2..: rdma */
+    struct ibv_send_wr *icom_sr;
+    struct ibv_ah_attr *icom_ah_attr;
+    struct ibv_recv_wr *icom_rr;
+    void			**icom_rmem;
+    int				*icom_rkey;
+    size_t			*icom_rsize;
+    int sseq_num;
+    int rsr_seq_num_poll;
+    int rsr_seq_num_tail; /* occupation status of remote Send Request (SR) queue (it covers occupation status of local RDMA-wr-to buffer) */
+    int rsr_seq_num_tail_last_sent; /* latest one sent to remote rank */
+    int lsr_seq_num_tail; /* occupation status of local Send Request (SR) queue */
+    int lsr_seq_num_tail_last_requested;  /* value when lmt_start_send issued req_seq_num */
+    int rdmabuf_occupancy_notify_rstate, rdmabuf_occupancy_notify_lstate;
+    int ncom, ncom_lmt_put, ncom_scratch_pad; /* number of entries in the command queue */
+
+    uint32_t max_inline_data; /* actual value obtained after ibv_create_qp */
+    uint32_t max_send_wr;
+    uint32_t max_recv_wr;
+
+    uint32_t open_flag; /* IBCOM_OPEN_UD, ... */
+    uint16_t remote_lid; /* for debug */
+    
+    /* other commands can be executed before RDMA-rd command */
+    /* see the "Ordering and the Fence Indicator" section in "InfiniBand Architecture" by William T. Futral */
+    uint16_t after_rdma_rd;
+    
+    uint64_t rsr_seq_num_released[(IBCOM_RDMABUF_NSEG+63)/64];
+
+} IbCom;
+
+extern int ibcomOpen(int ib_port, int ibcom_open_flag, int* condesc);
+extern int ibcom_alloc(int condesc, int sz);
+extern int ibcom_close(int);
+extern int ibcom_rts(int condesc, int remote_qpnum, uint16_t remote_lid, union ibv_gid *remote_gid);
+
+extern int ibcom_reg_mr_connect(int condesc, void *rmem, int rkey);
+extern int ibcom_isend(int condesc, uint64_t wr_id, void* prefix, int sz_prefix, void* hdr, int sz_hdr, void* data, int sz_data, int* copied);
+//extern int ibcom_isend(int condesc, uint64_t wr_id, void* hdr, int sz_hdr, void* data, int sz_data);
+extern int ibcom_irecv(int condesc, uint64_t wr_id);
+extern int ibcom_udsend(int condesc, union ibv_gid* remote_gid, uint16_t remote_lid, uint32_t remote_qpn, uint32_t imm_data, uint64_t wr_id);
+extern int ibcom_udrecv(int condesc);
+extern int ibcom_lrecv(int condesc, uint64_t wr_id, void* raddr, int sz_data, uint32_t rkey, void* laddr);
+extern int ibcom_poll_cq(int which_cq, struct ibv_wc* wc, int* result);
+
+/* for dcfa_reg_mr.c */
+extern int ibcom_reg_mr(void *addr, int len, struct ibv_mr **mr);
+
+extern int ibcom_get_info_conn(int condesc, int key, void *out, uint32_t out_len);
+extern int ibcom_get_info_mr(int condesc, int memid, int key, void *out, int out_len);
+
+extern int ibcom_lsr_seq_num_tail_get(int condesc, int** seq_num);
+extern int ibcom_rsr_seq_num_tail_get(int condesc, int** seq_num);
+extern int ibcom_rsr_seq_num_tail_last_sent_get(int condesc, int** seq_num);
+extern int ibcom_rdmabuf_occupancy_notify_rate_get(int condesc, int* notify_rate);
+extern int ibcom_rdmabuf_occupancy_notify_rstate_get(int condesc, int** rstate);
+extern int ibcom_rdmabuf_occupancy_notify_lstate_get(int condesc, int** lstate);
+
+extern int ibcomMemInfo(int, int, void**, size_t*, int*);
+extern char* ibcom_strerror(int);
+extern int dflag;
+
+extern int ibcom_mem_rdmawr_from(int condesc, void** out);
+extern int ibcom_mem_rdmawr_to(int condesc, int seq_num, void** out);
+extern int ibcom_mem_udwr_from(int condesc, void** out);
+extern int ibcom_mem_udwr_to(int condesc, void** out);
+
+/* dcfa_reg_mr.c */
+extern void ibcom_RegisterCacheInit();
+extern struct ibv_mr *ibcom_reg_mr_fetch(void *addr, int len); 
+
+/* dcfa_ctlmsg.c */
+extern int ibcom_udbuf_init(void* q);
+
+#define IBCOM_RC_SHARED_RCQ 0
+#define IBCOM_RC_SHARED_SCQ 1
+#define IBCOM_UD_SHARED_RCQ 2
+#define IBCOM_UD_SHARED_SCQ 3
+#define IBCOM_RC_SHARED_SCQ_LMT_PUT 4
+
+/* flag for open */
+#define IBCOM_OPEN_RC            0x01 
+/* for MPI control message, eager send, rendezvous protocol,
+   so via RC-send/recv or RDMA-write/RDMA-read */
+
+#define IBCOM_OPEN_UD       0x02
+/* obsolete, to wait for you-to-me QP to become RTR state
+   so via UD-send/recv */
+
+#define IBCOM_OPEN_RC_LMT_PUT       0x03
+/* obsolete, tried to use different CQ for LMT-PUT protocol for speed */
+
+#define IBCOM_OPEN_SCRATCH_PAD   0x04
+/* obsolete, to wait for you-to-me QP to become RTR state
+   so via RDMA-write */
+
+#define IBCOM_ERR_SETANDJUMP(errno, stmt) { stmt; ibcom_errno = errno; goto fn_fail; }
+#define IBCOM_ERR_CHKANDJUMP(cond, errno, stmt) if(cond) { stmt; ibcom_errno = errno; goto fn_fail; }
+
+#define IBCOM_QKEY 0x1234
+#define IBCOM_MAGIC 0x55
+
+typedef struct sz_hdrmagic_t {
+    uint32_t sz;
+    uint32_t magic;
+} sz_hdrmagic_t;
+
+
+typedef struct tailmagic_t {
+    uint8_t magic;
+    //uint32_t traits; /* for debug */
+} tailmagic_t;
+
+#define DCFA_NEM_SZ_DATA_POW2(sz) \
+    for(sz_data_pow2 = 15; sz_data_pow2 < (sz); sz_data_pow2 = ( (((sz_data_pow2 + 1) << 1) - 1) > IBCOM_RDMABUF_SZSEG - sizeof(tailmagic_t) ) ? IBCOM_RDMABUF_SZSEG - sizeof(tailmagic_t) : (((sz_data_pow2 + 1) << 1) - 1) ) { } \
+        if(sz_data_pow2 > IBCOM_RDMABUF_SZSEG - sizeof(tailmagic_t)) { printf("assertion failed\n"); }; \
+
+#define DCFA_NEM_MAX_DATA_POW2 (IBCOM_RDMABUF_SZSEG - sizeof(tailmagic_t))
+
+typedef struct ibcom_qp_state_t {
+    uint32_t state;
+} ibcom_qp_state_t;
+
+#define IBCOM_QP_STATE_RTR 0x12345678
+#define IBCOM_SZ_MPI_HEADER 48
+#define IBCOM_AMT_SLACK (IBCOM_RDMABUF_NSEG > 128 ? 1 : 1)
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_impl.h b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_impl.h
new file mode 100644
index 0000000..8d77c47
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_impl.h
@@ -0,0 +1,575 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2012 NEC Corporation
+ *      Author: Masamichi Takagi
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#ifndef DCFA_IMPL_H
+//#define DCFA_IMPL_H
+
+#include "mpid_nem_impl.h"
+#include "dcfa_ibcom.h"
+#include <sys/types.h>
+#include <errno.h>
+
+#define LMT_GET_CQE /* detect RDMA completion by CQE */
+//#define LMT_PUT_DONE
+#define DISABLE_VAR_OCC_NOTIFY_RATE
+/* lmt-put: 
+   (1) receiver sends cts to sender (2) sender RDMA-write to receiver
+   (3) sender fetch CQE (4) receiver polls on end-flag 
+*/
+//#define DCFA_ONDEMAND
+
+typedef struct {
+    union ibv_gid gid;
+    uint16_t lid;
+    uint32_t qpn;
+} dcfa_conn_ud_t;
+
+typedef struct {
+    int fd, fd_lmt_put;
+    MPIDI_VC_t *vc;
+} dcfaconn_t;
+
+/* see src/mpid/ch3/channels/nemesis/include/mpid_nem_generic_queue.h */
+typedef GENERIC_Q_DECL(struct MPID_Request) MPID_nem_dcfa_sendq_t;
+
+/* The vc provides a generic buffer in which network modules can store
+   private fields This removes all dependencies from the VC struction
+   on the network module, facilitating dynamic module loading. */
+typedef struct {
+    dcfaconn_t *sc;
+    int pending_sends; /* number of send in flight */
+    IbCom *ibcom, *ibcom_lmt_put;
+    MPID_nem_dcfa_sendq_t sendq; /* overflow queue for IB commands */
+    MPID_nem_dcfa_sendq_t sendq_lmt_put;
+    int is_connected; /* dynamic connection, checked in iSendContig, protocol processed there and in progress engine */
+} MPID_nem_dcfa_vc_area;
+
+/* macro for secret area in vc */
+#define VC_CH(vc) ((MPIDI_CH3I_VC *)&(vc)->ch)
+#define VC_DCFA(vc) ((MPID_nem_dcfa_vc_area *)VC_CH((vc))->netmod_area.padding)
+#define VC_FIELD(vcp, field) (((MPID_nem_dcfa_vc_area *)VC_CH(((vcp)))->netmod_area.padding)->field)
+
+/* The req provides a generic buffer in which network modules can store
+   private fields This removes all dependencies from the req structure
+   on the network module, facilitating dynamic module loading. */
+typedef struct {
+    int seq_num; /* NOT USED, DELETE IT: sequence number of SR which RDMA-RD for lmt releases in dcfa_poll */
+    struct MPID_Request *lmt_next; /* for lmtq */
+    struct MPID_Request *sendq_next; /* for sendq */
+    void* lmt_raddr; /* remember this for sendq, it might be better to use sreq->dev.iov[0].MPID_IOV_BUF instead */
+    uint32_t lmt_rkey; /* remember this for sendq, survive over lrecv and referenced when dequeueing from sendq */
+    uint32_t lmt_szsend; /* remember this for sendq */
+    uint8_t lmt_tail, lmt_sender_tail, lmt_receiver_tail; /* survive over lrecv and referenced when polling */
+    MPI_Aint lmt_dt_true_lb; /* to locate the last byte of receive buffer */
+    void* lmt_write_to_buf; /* user buffer or temporary buffer for pack and remember it for lmt_orderq */
+    void* lmt_pack_buf; /* to pack non-contiguous data */
+} MPID_nem_dcfa_req_area;
+
+/* macro for secret area in req */
+#define REQ_DCFA(req) ((MPID_nem_dcfa_req_area *)(&(req)->ch.netmod_area.padding))
+#define REQ_FIELD(reqp, field) (((MPID_nem_dcfa_req_area *)((reqp)->ch.netmod_area.padding))->field)
+
+/* see src/mpid/ch3/channels/nemesis/include/mpidi_ch3_impl.h */
+/* sreq is never enqueued into posted-queue nor unexpected-queue, so we can reuse sreq->dev.next */
+#define MPID_nem_dcfa_sendq_empty(q) GENERICM_Q_EMPTY (q)
+#define MPID_nem_dcfa_sendq_head(q) GENERICM_Q_HEAD (q)
+#define MPID_nem_dcfa_sendq_next_field(ep, next_field) REQ_FIELD(ep, next_field) 
+#define MPID_nem_dcfa_sendq_next(ep) REQ_FIELD(ep, sendq_next) 
+//#define MPID_nem_dcfa_sendq_next(ep) (ep->dev.next) /*takagi*/
+#define MPID_nem_dcfa_sendq_enqueue(qp, ep) GENERICM_Q_ENQUEUE (qp, ep, MPID_nem_dcfa_sendq_next_field, sendq_next);
+#define MPID_nem_dcfa_sendq_enqueue_at_head(qp, ep) GENERICM_Q_ENQUEUE_AT_HEAD(qp, ep, MPID_nem_dcfa_sendq_next_field, sendq_next);
+#define MPID_nem_dcfa_sendq_dequeue(qp, ep) GENERICM_Q_DEQUEUE (qp, ep, MPID_nem_dcfa_sendq_next_field, sendq_next);
+
+/* see src/mpid/ch3/channels/nemesis/include/mpid_nem_generic_queue.h */
+typedef GENERIC_Q_DECL(struct MPID_Request) MPID_nem_dcfa_lmtq_t;
+
+/* connection manager */
+typedef struct {
+    int remote_rank; 
+    uint32_t type; /* SYN */
+    uint32_t qpn; /* QPN for eager-send channel */
+    uint32_t rkey; /* key for RDMA-write-to buffer of eager-send channel */
+    void* rmem; /* address of RDMA-write-to buffer of eager-send channel */
+} MPID_nem_dcfa_cm_pkt_syn_t;
+
+typedef struct {
+    int remote_rank; 
+    uint32_t type; /* SYNACK */
+    uint32_t qpn; /* QPN for eager-send channel */
+    uint32_t rkey; /* key for RDMA-write-to buffer of eager-send channel */
+    void* rmem; /* address of RDMA-write-to buffer of eager-send channel */
+} MPID_nem_dcfa_cm_pkt_synack_t;
+
+typedef union {
+    MPID_nem_dcfa_cm_pkt_syn_t syn;
+    MPID_nem_dcfa_cm_pkt_synack_t synack;
+} MPID_nem_dcfa_cm_pkt_t;
+
+typedef struct MPID_nem_dcfa_cm_sendq_entry {
+    MPID_nem_dcfa_cm_pkt_t pending_pkt;
+    struct MPID_nem_dcfa_cm_sendq_entry *sendq_next; /* for software command queue */
+} MPID_nem_dcfa_cm_sendq_entry_t;
+
+#ifdef DCFA_ONDEMAND
+typedef struct {
+    char *data;
+    int length;
+    int max_length;
+} MPID_nem_dcfa_cm_map_t;
+
+typedef struct {
+    uint32_t type;
+    uint32_t qpnum;
+    uint16_t lid;
+    union ibv_gid gid;
+    void* rmem;
+    uint32_t rkey;
+} MPID_nem_dcfa_cm_cmd_t;
+#endif
+
+typedef GENERIC_Q_DECL(struct MPID_Request) MPID_nem_dcfa_cm_sendq_t;
+
+#define MPID_nem_dcfa_cm_sendq_empty(q) GENERICM_Q_EMPTY (q)
+#define MPID_nem_dcfa_cm_sendq_head(q) GENERICM_Q_HEAD (q)
+#define MPID_nem_dcfa_cm_sendq_next_field(ep, next_field) ((ep)->next_field) 
+#define MPID_nem_dcfa_cm_sendq_next(ep) ((ep)->sendq_next) 
+#define MPID_nem_dcfa_cm_sendq_enqueue(qp, ep) GENERICM_Q_ENQUEUE (qp, ep, MPID_nem_dcfa_cm_sendq_next_field, sendq_next);
+#define MPID_nem_dcfa_cm_sendq_enqueue_at_head(qp, ep) GENERICM_Q_ENQUEUE_AT_HEAD(qp, ep, MPID_nem_dcfa_cm_sendq_next_field, sendq_next);
+#define MPID_nem_dcfa_cm_sendq_dequeue(qp, ep) GENERICM_Q_DEQUEUE (qp, ep, MPID_nem_dcfa_cm_sendq_next_field, sendq_next);
+
+/* see src/mpid/ch3/channels/nemesis/include/mpidi_ch3_impl.h */
+/* TODO: rreq for rendezvous is dequeued from posted-queue nor unexpected-queue when do_cts is called,
+   so we can reuse rreq->dev.next */
+#define MPID_nem_dcfa_lmtq_empty(q) GENERICM_Q_EMPTY(q)
+#define MPID_nem_dcfa_lmtq_head(q) GENERICM_Q_HEAD(q)
+#define MPID_nem_dcfa_lmtq_next_field(ep, next_field) REQ_FIELD(ep, next_field) 
+#define MPID_nem_dcfa_lmtq_next(ep) REQ_FIELD(ep, lmt_next) 
+#define MPID_nem_dcfa_lmtq_enqueue(qp, ep) GENERICM_Q_ENQUEUE(qp, ep, MPID_nem_dcfa_lmtq_next_field, lmt_next);
+
+#define MPID_nem_dcfa_diff32(a, b) ((uint32_t)((a + (1ULL<<32) - b) & ((1ULL<<32)-1)))
+#define MPID_nem_dcfa_sendq_ready_to_send_head(vc_dcfa) (vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY && MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY && MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG)
+#define MPID_nem_dcfa_sendq_ready_to_send_head_lmt_put(vc_dcfa) (vc_dcfa->ibcom->ncom_lmt_put < IBCOM_MAX_SQ_CAPACITY && MPID_nem_dcfa_ncqe_lmt_put < IBCOM_MAX_CQ_CAPACITY)
+
+/* counting bloom filter to detect multiple lmt-sends in one send-wait period to
+   avoid overwriting the last byte in the receive buffer */
+#define MPID_nem_dcfa_cbf_nslot 16 /* slots */
+#define MPID_nem_dcfa_cbf_bitsperslot 4  /* one slot can accomodate multiple bits */
+#define MPID_nem_dcfa_cbf_lognslot 4
+#define MPID_nem_dcfa_cbf_nhash 3 /* number of hash functions */
+#define MPID_nem_dcfa_getpos \
+    int pos_8b = pos / (8 / MPID_nem_dcfa_cbf_bitsperslot);\
+    assert(0 <= pos_8b && pos_8b < MPID_nem_dcfa_cbf_nslot * MPID_nem_dcfa_cbf_bitsperslot / 8);\
+    int pos_bps = pos & (8 / MPID_nem_dcfa_cbf_bitsperslot - 1);
+#define MPID_nem_dcfa_shift \
+    ((array[pos_8b] >> (pos_bps * MPID_nem_dcfa_cbf_bitsperslot)) & ((1ULL<<MPID_nem_dcfa_cbf_bitsperslot) - 1))
+#define MPID_nem_dcfa_maskset \
+    array[pos_8b] &= ~(((1ULL<<MPID_nem_dcfa_cbf_bitsperslot) - 1) << (pos_bps * MPID_nem_dcfa_cbf_bitsperslot)); \
+    array[pos_8b] |= (bits & ((1ULL<<MPID_nem_dcfa_cbf_bitsperslot)-1)) << (pos_bps * MPID_nem_dcfa_cbf_bitsperslot)
+static inline int MPID_nem_dcfa_cbf_get(uint8_t* array, int pos) {
+    MPID_nem_dcfa_getpos;
+    return MPID_nem_dcfa_shift;
+}
+static inline void MPID_nem_dcfa_cbf_set(uint8_t* array, int pos, uint16_t bits) {
+    MPID_nem_dcfa_getpos;
+    MPID_nem_dcfa_maskset;
+}
+static inline void MPID_nem_dcfa_cbf_inc(uint8_t* array, int pos) {
+    MPID_nem_dcfa_getpos;
+    int16_t bits = MPID_nem_dcfa_shift;
+    assert(bits != (1ULL << MPID_nem_dcfa_cbf_bitsperslot) - 1);
+    bits++;
+    MPID_nem_dcfa_maskset;
+}
+static inline void MPID_nem_dcfa_cbf_dec(uint8_t* array, int pos) {
+    MPID_nem_dcfa_getpos;
+    int16_t bits = MPID_nem_dcfa_shift;
+    assert(bits != 0);
+    bits--;
+    MPID_nem_dcfa_maskset;
+}
+static inline int MPID_nem_dcfa_cbf_hash1(uint64_t addr) {
+    return
+        (
+         ((addr >> (MPID_nem_dcfa_cbf_lognslot * 0)) & (MPID_nem_dcfa_cbf_nslot - 1)) ^ 
+         ((addr >> (MPID_nem_dcfa_cbf_lognslot * 3)) & (MPID_nem_dcfa_cbf_nslot - 1)) ^
+         ((addr >> (MPID_nem_dcfa_cbf_lognslot * 6)) & (MPID_nem_dcfa_cbf_nslot - 1))
+         + 1) & (MPID_nem_dcfa_cbf_nslot - 1);
+}
+static inline int MPID_nem_dcfa_cbf_hash2(uint64_t addr) {
+    /* adding one because addr tends to have a postfix of "fff" */
+    return 
+        (
+         ((addr >> (MPID_nem_dcfa_cbf_lognslot * 1)) & (MPID_nem_dcfa_cbf_nslot - 1)) ^
+         ((addr >> (MPID_nem_dcfa_cbf_lognslot * 4)) & (MPID_nem_dcfa_cbf_nslot - 1)) ^
+         ((addr >> (MPID_nem_dcfa_cbf_lognslot * 7)) & (MPID_nem_dcfa_cbf_nslot - 1)) 
+         + 1) & (MPID_nem_dcfa_cbf_nslot - 1);
+}
+static inline int MPID_nem_dcfa_cbf_hash3(uint64_t addr) {
+    /* adding two because addr tends to have a postfix of "fff" */
+    return 
+        (
+         ((addr >> (MPID_nem_dcfa_cbf_lognslot * 2)) & (MPID_nem_dcfa_cbf_nslot - 1)) ^
+         ((addr >> (MPID_nem_dcfa_cbf_lognslot * 5)) & (MPID_nem_dcfa_cbf_nslot - 1)) ^
+         ((addr >> (MPID_nem_dcfa_cbf_lognslot * 8)) & (MPID_nem_dcfa_cbf_nslot - 1))
+         + 2) & (MPID_nem_dcfa_cbf_nslot - 1);
+
+}
+static inline void MPID_nem_dcfa_cbf_add(uint64_t addr, uint8_t* array) {
+    //dprintf("cbf_add,addr=%08lx,%08x,%08x,%08x\n", addr, MPID_nem_dcfa_cbf_hash1(addr), MPID_nem_dcfa_cbf_hash2(addr), MPID_nem_dcfa_cbf_hash3(addr));
+    //dprintf("cbf_add,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
+    MPID_nem_dcfa_cbf_inc(array, MPID_nem_dcfa_cbf_hash1(addr));
+    MPID_nem_dcfa_cbf_inc(array, MPID_nem_dcfa_cbf_hash2(addr));
+    MPID_nem_dcfa_cbf_inc(array, MPID_nem_dcfa_cbf_hash3(addr));
+    dprintf("cbf_add,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
+}
+static inline void MPID_nem_dcfa_cbf_delete(uint64_t addr, uint8_t* array) {
+    //dprintf("cbf_delete,addr=%08lx,%08x,%08x,%08x\n", addr, MPID_nem_dcfa_cbf_hash1(addr), MPID_nem_dcfa_cbf_hash2(addr), MPID_nem_dcfa_cbf_hash3(addr));
+    //dprintf("cbf_delete,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
+    MPID_nem_dcfa_cbf_dec(array, MPID_nem_dcfa_cbf_hash1(addr));
+    MPID_nem_dcfa_cbf_dec(array, MPID_nem_dcfa_cbf_hash2(addr));
+    MPID_nem_dcfa_cbf_dec(array, MPID_nem_dcfa_cbf_hash3(addr));
+    dprintf("cbf_delete,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
+}
+static inline int MPID_nem_dcfa_cbf_query(uint64_t addr, uint8_t* array) {
+    //dprintf("cbf_query,addr=%08lx,%08x,%08x,%08x\n", addr, MPID_nem_dcfa_cbf_hash1(addr), MPID_nem_dcfa_cbf_hash2(addr), MPID_nem_dcfa_cbf_hash3(addr));
+    //dprintf("cbf_query,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
+    return 
+        MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)) > 0 &&
+        MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)) > 0 &&
+        MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)) > 0;
+}
+static inline int MPID_nem_dcfa_cbf_would_overflow(uint64_t addr, uint8_t* array) {
+    //dprintf("cbf_would_overflow,addr=%08lx,%08x,%08x,%08x\n", addr, MPID_nem_dcfa_cbf_hash1(addr), MPID_nem_dcfa_cbf_hash2(addr), MPID_nem_dcfa_cbf_hash3(addr));
+    //dprintf("cbf_would_overflow,%d,%d,%d\n", MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)), MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)));
+    return 
+        MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash1(addr)) == (1ULL << MPID_nem_dcfa_cbf_bitsperslot) - 1 ||
+        MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash2(addr)) == (1ULL << MPID_nem_dcfa_cbf_bitsperslot) - 1 ||
+        MPID_nem_dcfa_cbf_get(array, MPID_nem_dcfa_cbf_hash3(addr)) == (1ULL << MPID_nem_dcfa_cbf_bitsperslot) - 1;
+}
+
+/* functions */
+uint64_t MPID_nem_dcfa_rdtsc();
+int MPID_nem_dcfa_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p);
+int MPID_nem_dcfa_finalize(void);
+int MPID_nem_dcfa_drain_scq();
+int MPID_nem_dcfa_drain_scq_lmt_put();
+int MPID_nem_dcfa_drain_scq_scratch_pad();
+int MPID_nem_dcfa_poll(int in_blocking_poll);
+int MPID_nem_dcfa_poll_eager(MPIDI_VC_t *vc);
+
+int MPID_nem_dcfa_get_business_card(int my_rank, char **bc_val_p, int *val_max_sz_p);
+int MPID_nem_dcfa_connect_to_root(const char *business_card, MPIDI_VC_t * new_vc);
+int MPID_nem_dcfa_vc_init(MPIDI_VC_t * vc);
+int MPID_nem_dcfa_vc_destroy(MPIDI_VC_t * vc);
+int MPID_nem_dcfa_vc_terminate(MPIDI_VC_t * vc);
+int MPID_nem_dcfa_pkthandler_init(MPIDI_CH3_PktHandler_Fcn *pktArray[], int arraySize);
+
+int MPID_nem_dcfa_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *header, MPIDI_msg_sz_t hdr_sz);
+
+/* CH3 send/recv functions */
+int MPID_nem_dcfa_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz);
+int MPID_nem_dcfa_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz, MPID_Request ** sreq_ptr);
+
+/* used by dcfa_poll.c */
+int MPID_nem_dcfa_send_progress(MPID_nem_dcfa_vc_area *vc_dcfa);
+
+/* CH3--lmt send/recv functions */
+int MPID_nem_dcfa_lmt_initiate_lmt(struct MPIDI_VC *vc, union MPIDI_CH3_Pkt *rts_pkt, struct MPID_Request *req);
+int MPID_nem_dcfa_lmt_start_recv(struct MPIDI_VC *vc, struct MPID_Request *req, MPID_IOV s_cookie);
+int MPID_nem_dcfa_lmt_handle_cookie(struct MPIDI_VC *vc, struct MPID_Request *req, MPID_IOV cookie);
+int MPID_nem_dcfa_lmt_done_send(struct MPIDI_VC *vc, struct MPID_Request *req);
+int MPID_nem_dcfa_lmt_done_recv(struct MPIDI_VC *vc, struct MPID_Request *req);
+int MPID_nem_dcfa_lmt_vc_terminated(struct MPIDI_VC *vc);
+/* overriding functions 
+   initialize the value of a member named "recv_posted" 
+   in BSS-variable named "comm_ops" with type of MPIDI_Comm_ops_t
+   to "MPID_nem_dcfa_recv_posted" in dcfa_init.c
+   MPIDI_Comm_ops_t is defined in src/mpid/ch3/include/mpidimpl.h */
+int MPID_nem_dcfa_recv_posted(struct MPIDI_VC *vc, struct MPID_Request *req);
+int MPID_nem_dcfa_recv_buf_released(struct MPIDI_VC *vc, void* user_data);
+
+void pht_update(uint64_t vpc, uint32_t hist, int32_t taken);
+int pht_pred(uint64_t vpc, uint32_t hist);
+
+/* Keys for business cards */
+#define MPIDI_CH3I_GID_KEY "gid"
+#define MPIDI_CH3I_LID_KEY "lid"
+#define MPIDI_CH3I_QPN_KEY "qpn"
+#define MPIDI_CH3I_RKEY_KEY "rkey"
+#define MPIDI_CH3I_RMEM_KEY "rmem"
+
+#define MPID_NEM_DCFA_RECV_MAX_PKT_LEN 1024
+
+extern int MPID_nem_dcfa_conn_ud_fd;
+extern IbCom *MPID_nem_dcfa_conn_ud_ibcom;
+extern dcfa_conn_ud_t *MPID_nem_dcfa_conn_ud;
+extern dcfaconn_t *MPID_nem_dcfa_conns;
+extern MPIDI_VC_t **MPID_nem_dcfa_pollingset;
+extern int *MPID_nem_dcfa_scratch_pad_fds;
+extern int MPID_nem_dcfa_npollingset;
+extern void* MPID_nem_dcfa_fl[18];
+extern int MPID_nem_dcfa_nranks;
+//extern char *MPID_nem_dcfa_recv_buf;
+extern int MPID_nem_dcfa_myrank;
+extern uint64_t MPID_nem_dcfa_tsc_poll; /* to throttle dcfa_poll in recv_posted (in dcfa_poll.c) */
+extern int MPID_nem_dcfa_ncqe; /* for lazy poll scq */
+extern int MPID_nem_dcfa_ncqe_lmt_put; /* lmt-put uses another QP, SQ, CQ to speed-up fetching CQE */
+#ifdef DCFA_ONDEMAND
+extern MPID_nem_dcfa_cm_map_t MPID_nem_dcfa_cm_state;
+extern int MPID_nem_dcfa_ncqe_connect; /* couting outstanding connection requests */
+#endif
+extern int MPID_nem_dcfa_ncqe_scratch_pad;
+extern int MPID_nem_dcfa_ncqe_to_drain; /* count put in lmt-put-done protocol */
+extern int MPID_nem_dcfa_ncqe_nces; /* counting non-copied eager-send */
+extern MPID_nem_dcfa_lmtq_t MPID_nem_dcfa_lmtq; /* poll queue for lmt */
+extern MPID_nem_dcfa_lmtq_t MPID_nem_dcfa_lmt_orderq; /* force order when two or more rts_to_sender randomizes the last byte of receive buffer */
+extern MPID_nem_dcfa_vc_area* MPID_nem_dcfa_debug_current_vc_dcfa;
+
+/* to detect multiple lmt-sends in one send-wait period to
+   avoid overwriting the last byte in the receive buffer */
+extern uint8_t MPID_nem_dcfa_lmt_tail_addr_cbf[MPID_nem_dcfa_cbf_nslot * MPID_nem_dcfa_cbf_bitsperslot / 8];
+
+#define MPID_NEM_DCFA_MAX_POLLINGSET 64
+
+/* xfer.c manages memory region using memid */
+#define MPID_NEM_DCFA_MEMID_RDMA 0
+
+/* command using IB UD */
+#define MPID_NEM_DCFA_SYNC_SYN 0 
+#define MPID_NEM_DCFA_SYNC_SYNACK 1 
+#define MPID_NEM_DCFA_SYNC_NACK 2 
+
+#define MPID_NEM_DCFA_EAGER_MAX_MSG_SZ (IBCOM_RDMABUF_SZSEG/*1024*/-sizeof(MPIDI_CH3_Pkt_t)+sizeof(MPIDI_CH3_Pkt_eager_send_t)-sizeof(sz_hdrmagic_t)-sizeof(MPID_nem_dcfa_pkt_prefix_t)-sizeof(tailmagic_t)) /* when > this size, lmt is used. see src/mpid/ch3/src/mpid_isend.c */
+#define MPID_NEM_DCFA_POLL_PERIOD_RECV_POSTED 2000 /* minimum period from previous dcfa_poll to dcfa_poll in recv_posted */
+#define MPID_NEM_DCFA_POLL_PERIOD_SEND_POSTED 2000
+
+typedef struct {
+    void* addr;
+    uint32_t rkey;
+    int seq_num_tail; /* notify RDMA-write-to buffer occupation */
+    uint8_t tail; /* last word of payload */
+} MPID_nem_dcfa_lmt_cookie_t;
+
+typedef enum MPID_nem_dcfa_pkt_subtype {
+    MPIDI_NEM_DCFA_PKT_EAGER_SEND,
+    MPIDI_NEM_DCFA_PKT_PUT,
+    MPIDI_NEM_DCFA_PKT_ACCUMULATE,
+    MPIDI_NEM_DCFA_PKT_GET,
+    MPIDI_NEM_DCFA_PKT_GET_RESP,
+    MPIDI_NEM_DCFA_PKT_LMT_GET_DONE,
+    MPIDI_NEM_DCFA_REQ_SEQ_NUM,
+    MPIDI_NEM_DCFA_REPLY_SEQ_NUM,
+    MPIDI_NEM_DCFA_CHG_RDMABUF_OCC_NOTIFY_STATE,
+    MPIDI_NEM_DCFA_NUM_PKT_HANDLERS
+} MPID_nem_dcfa_pkt_subtype_t;
+
+/* derived from MPID_nem_pkt_netmod_t */
+typedef struct MPID_nem_dcfa_pkt_prefix {
+    MPID_nem_pkt_type_t type;
+    unsigned subtype;
+    /* additional field */
+    int seq_num_tail;
+} MPID_nem_dcfa_pkt_prefix_t;
+
+/* derived from MPID_nem_pkt_netmod_t and MPID_nem_pkt_lmt_done_t */
+typedef struct MPID_nem_dcfa_pkt_lmt_get_done {
+    MPID_nem_pkt_type_t type;
+    unsigned subtype;
+    /* additional field */
+    MPI_Request req_id;
+    int seq_num_tail;
+} MPID_nem_dcfa_pkt_lmt_get_done_t;
+
+/* derived from MPID_nem_pkt_netmod_t */
+typedef struct MPID_nem_dcfa_pkt_req_seq_num_t {
+    MPID_nem_pkt_type_t type;
+    unsigned subtype;
+    /* additional field */
+    int seq_num_tail;
+} MPID_nem_dcfa_pkt_req_seq_num_t;
+
+/* derived from MPID_nem_pkt_netmod_t */
+typedef struct MPID_nem_dcfa_pkt_reply_seq_num_t {
+    MPID_nem_pkt_type_t type;
+    unsigned subtype;
+    /* additional field */
+    int seq_num_tail;
+} MPID_nem_dcfa_pkt_reply_seq_num_t;
+
+/* derived from MPID_nem_pkt_netmod_t */
+typedef struct MPID_nem_dcfa_pkt_change_rdmabuf_occupancy_notify_state_t {
+    MPID_nem_pkt_type_t type;
+    unsigned subtype;
+    /* additional field */
+    int state;
+} MPID_nem_dcfa_pkt_change_rdmabuf_occupancy_notify_state_t;
+
+int MPID_nem_dcfa_PktHandler_EagerSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */);
+int MPID_nem_dcfa_PktHandler_Put( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */);
+int MPID_nem_dcfa_PktHandler_Accumulate( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */);
+int MPID_nem_dcfa_PktHandler_Get( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */);
+int MPID_nem_dcfa_PktHandler_GetResp( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */);
+int MPID_nem_dcfa_PktHandler_lmt_done(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp);
+int MPID_nem_dcfa_pkt_GET_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp);
+int MPID_nem_dcfa_PktHandler_req_seq_num(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp);
+int MPID_nem_dcfa_PktHandler_reply_seq_num(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp);
+int MPID_nem_dcfa_PktHandler_change_rdmabuf_occupancy_notify_state(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp);
+
+/* MPID_nem_dcfa_PktHandler_lmt_done is a wrapper of pkt_DONE_handler and calls it */
+/* pkt_DONE_handler (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c) is not exported */
+int pkt_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp);
+
+
+#define MPID_nem_dcfa_send_req_seq_num(vc) do { \
+        MPID_PKT_DECL_CAST(_upkt, MPID_nem_dcfa_pkt_req_seq_num_t, _pkt); \
+        MPID_Request *_req; \
+        \
+        MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending req_seq_num packet"); \
+        MPIDI_Pkt_init(_pkt, MPIDI_NEM_PKT_NETMOD); \
+        _pkt->subtype = MPIDI_NEM_DCFA_REQ_SEQ_NUM; \
+        \
+        MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc); \
+        _pkt->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail; \
+        vc_dcfa->ibcom->rsr_seq_num_tail_last_sent = vc_dcfa->ibcom->rsr_seq_num_tail; \
+        \
+        mpi_errno = MPIDI_CH3_iStartMsg((vc), _pkt, sizeof(*_pkt), &_req); \
+        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_send_req_seq_num"); \
+        if (_req != NULL) { \
+            MPIU_ERR_CHKANDJUMP(_req->status.MPI_ERROR, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_send_req_seq_num"); \
+            MPID_Request_release(_req); \
+        } \
+    } while (0)   
+
+#define MPID_nem_dcfa_send_reply_seq_num(vc) do {                                                             \
+        MPID_PKT_DECL_CAST(_upkt, MPID_nem_dcfa_pkt_reply_seq_num_t, _pkt);                                   \
+        MPID_Request *_req;                                                                                   \
+                                                                                                              \
+        MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending reply_seq_num packet"); \
+        MPIDI_Pkt_init(_pkt, MPIDI_NEM_PKT_NETMOD); \
+        _pkt->subtype = MPIDI_NEM_DCFA_REPLY_SEQ_NUM; \
+                                                                                                              \
+        int *rsr_seq_num_tail;                                                                                \
+        MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);                                                         \
+        ibcom_errno = ibcom_rsr_seq_num_tail_get(vc_dcfa->sc->fd, &rsr_seq_num_tail);                         \
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rsr_seq_num_tail_get");           \
+        _pkt->seq_num_tail = *rsr_seq_num_tail;                                                               \
+                                                                                                              \
+        int *rsr_seq_num_tail_last_sent;                                                                      \
+        ibcom_errno = ibcom_rsr_seq_num_tail_last_sent_get(vc_dcfa->sc->fd, &rsr_seq_num_tail_last_sent);     \
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rsr_seq_num_tail_last_sent_get");           \
+        *rsr_seq_num_tail_last_sent = *rsr_seq_num_tail;                                                      \
+\
+        mpi_errno = MPIDI_CH3_iStartMsg((vc), _pkt, sizeof(*_pkt), &_req);                                    \
+        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_send_reply_seq_num");                          \
+        if (_req != NULL) {                                                                                   \
+            MPIU_ERR_CHKANDJUMP(_req->status.MPI_ERROR, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_send_reply_seq_num");         \
+            MPID_Request_release(_req);                                                                       \
+        }                                                                                                     \
+    } while (0)   
+
+#define MPID_nem_dcfa_send_change_rdmabuf_occupancy_notify_state(vc, _state) do {                             \
+        MPID_PKT_DECL_CAST(_upkt, MPID_nem_dcfa_pkt_change_rdmabuf_occupancy_notify_state_t, _pkt);           \
+        MPID_Request *_req;                                                                                   \
+                                                                                                              \
+        MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending change_rdmabuf_occupancy_notify_state packet");               \
+        MPIDI_Pkt_init(_pkt, MPIDI_NEM_PKT_NETMOD); \
+        _pkt->subtype = MPIDI_NEM_DCFA_CHG_RDMABUF_OCC_NOTIFY_STATE; \
+        _pkt->state = _state;                                                                                 \
+                                                                                                              \
+        mpi_errno = MPIDI_CH3_iStartMsg((vc), _pkt, sizeof(*_pkt), &_req);                                    \
+        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_send_change_rdmabuf_occupancy_notify_state");                          \
+        if (_req != NULL) {                                                                                   \
+            MPIU_ERR_CHKANDJUMP(_req->status.MPI_ERROR, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_send_change_rdmabuf_occupancy_notify_state");         \
+            MPID_Request_release(_req);                                                                       \
+        }                                                                                                     \
+    } while (0)   
+
+#define MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, lsr_seq_num_tail) \
+    do { \
+        int lsr_seq_num_head; \
+        /* sequence number of (largest) in-flight send command */ \
+        ibcom_errno = ibcom_sseq_num_get(vc_dcfa->sc->fd, &lsr_seq_num_head); \
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_sseq_num_get"); \
+        \
+        int *rdmabuf_occupancy_notify_rstate; \
+        ibcom_errno = ibcom_rdmabuf_occupancy_notify_rstate_get(vc_dcfa->sc->fd, &rdmabuf_occupancy_notify_rstate); \
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rdmabuf_occupancy_notify_rstate_get"); \
+        \
+        /*dprintf("notify_policy_lw,head=%d,tail=%d,lw=%d\n", lsr_seq_num_head, *lsr_seq_num_tail, IBCOM_RDMABUF_LOW_WATER_MARK);*/ \
+        /* if the number of occupied slot of RDMA-write-to buffer have got below the low water-mark */ \
+        if(*rdmabuf_occupancy_notify_rstate == IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW && \
+           MPID_nem_dcfa_diff32(lsr_seq_num_head, *lsr_seq_num_tail) < IBCOM_RDMABUF_LOW_WATER_MARK) { \
+            dprintf("changing notify_rstate\n"); \
+            /* remember remote notifying policy so that local can know when to change remote policy back to HW */ \
+            *rdmabuf_occupancy_notify_rstate = IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_LW; \
+            /* change remote notifying policy of RDMA-write-to buf occupancy info */ \
+            MPID_nem_dcfa_send_change_rdmabuf_occupancy_notify_state(vc, IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_LW); \
+        } \
+    } while(0)
+   
+#define MPID_nem_dcfa_lmt_send_GET_DONE(vc, rreq) do {                                                                   \
+        MPID_PKT_DECL_CAST(_upkt, MPID_nem_dcfa_pkt_lmt_get_done_t, _done_pkt);                                          \
+        MPID_Request *_done_req;                                                                                \
+        MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc); \
+                                                                                                                \
+        MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending rndv DONE packet"); \
+        MPIDI_Pkt_init(_done_pkt, MPIDI_NEM_PKT_NETMOD); \
+        _done_pkt->subtype = MPIDI_NEM_DCFA_PKT_LMT_GET_DONE;\
+        _done_pkt->req_id = (rreq)->ch.lmt_req_id; \
+            /* embed SR occupancy information */ \
+        _done_pkt->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail; \
+ \
+            /* remember the last one sent */ \
+            vc_dcfa->ibcom->rsr_seq_num_tail_last_sent = vc_dcfa->ibcom->rsr_seq_num_tail; \
+                                                                                                                \
+        mpi_errno = MPIDI_CH3_iStartMsg((vc), _done_pkt, sizeof(*_done_pkt), &_done_req);                       \
+        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**donepkt");                                  \
+        if (_done_req != NULL)                                                                                  \
+        {                                                                                                       \
+            MPIU_ERR_CHKANDJUMP(_done_req->status.MPI_ERROR, mpi_errno, MPI_ERR_OTHER, "**donepkt");            \
+            MPID_Request_release(_done_req);                                                                    \
+        }                                                                                                       \
+    } while (0)   
+
+#define DCFA_MAX(a, b) ((a) > (b) ? (a) : (b))
+
+static inline void* MPID_nem_dcfa_stmalloc(size_t _sz) {
+    size_t sz = _sz;
+    int i = 0;
+    do { i++; sz >>= 1; } while(sz > 0);
+    if(i < 12) { return malloc(sz); }
+    if(i > 30) { return mmap(0, sz, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); }
+    int ndx = i - 12;
+    void* slot;
+    if(MPID_nem_dcfa_fl[ndx]) {
+        slot = MPID_nem_dcfa_fl[ndx];
+        if(MPID_nem_dcfa_myrank == 1) {
+            //printf("stmalloc,reuse %p,%08x\n", slot, (int)_sz); 
+        }
+        MPID_nem_dcfa_fl[ndx] = *((void**)MPID_nem_dcfa_fl[ndx]);
+    } else {
+        slot = mmap(0, 1<<i, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+        if(MPID_nem_dcfa_myrank == 1) {
+            //printf("stmalloc,new %p,%08x\n", slot, (int)_sz); 
+        }
+    }
+    return slot;
+}
+static inline void MPID_nem_dcfa_stfree(void* ptr, size_t sz) {
+    if(MPID_nem_dcfa_myrank == 1) {
+        //printf("stfree,%p,%08x\n", ptr, (int)sz); 
+    }
+    int i = 0;
+    do { i++; sz >>= 1; } while(sz > 0);
+    if(i < 12) { free(ptr); goto fn_exit; }
+    if(i > 30) { munmap(ptr, sz); goto fn_exit; }
+    int ndx = i - 12;
+    *((void**)ptr) = MPID_nem_dcfa_fl[ndx];
+    MPID_nem_dcfa_fl[ndx] = ptr;
+ fn_exit:;
+}
+#endif /* DCFA_IMPL_H */
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_init.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_init.c
new file mode 100644
index 0000000..efdc22e
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_init.c
@@ -0,0 +1,936 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ *  (C) 2012 NEC Corporation
+ *      Author: Masamichi Takagi
+ */
+
+#include "dcfa_impl.h"
+#ifdef USE_PMI2_API
+#include "pmi2.h"
+#else
+#include "pmi.h"
+#endif
+
+//#define DEBUG_DCFA_INIT
+#ifdef dprintf /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
+#undef dprintf
+#endif
+#ifdef DEBUG_DCFA_INIT
+#define dprintf printf
+#else
+#define dprintf(...)
+#endif
+
+MPID_nem_netmod_funcs_t MPIDI_nem_dcfa_funcs = {
+    MPID_nem_dcfa_init,
+    MPID_nem_dcfa_finalize,
+    MPID_nem_dcfa_poll,
+    MPID_nem_dcfa_get_business_card,
+    MPID_nem_dcfa_connect_to_root,
+    MPID_nem_dcfa_vc_init,
+    MPID_nem_dcfa_vc_destroy,
+    MPID_nem_dcfa_vc_terminate,
+    NULL, /*MPID_nem_dcfa_anysource_iprobe*/
+    NULL, /*MPID_nem_dcfa_anysource_improbe*/
+};
+
+MPIDI_CH3_PktHandler_Fcn *MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_NUM_PKT_HANDLERS];
+
+static MPIDI_Comm_ops_t comm_ops = {
+    /*NULL,*/MPID_nem_dcfa_recv_posted, /* recv_posted */
+    
+    NULL, /* send */
+    NULL, /* rsend */
+    NULL, /* ssend */
+    NULL, /* isend */
+    NULL, /* irsend */
+    NULL, /* issend */
+    
+    NULL, /* send_init */
+    NULL, /* bsend_init */
+    NULL, /* rsend_init */
+    NULL, /* ssend_init */
+    NULL, /* startall */
+    
+    NULL,/* cancel_send */
+    NULL, /* cancel_recv */
+    
+    NULL, /* probe */
+    NULL, /* iprobe */
+    NULL, /* improbe */
+};
+
+void* MPID_nem_dcfa_fl[18];
+int MPID_nem_dcfa_nranks;
+dcfa_conn_ud_t *MPID_nem_dcfa_conn_ud;
+dcfaconn_t *MPID_nem_dcfa_conns;
+MPIDI_VC_t **MPID_nem_dcfa_pollingset;
+int MPID_nem_dcfa_conn_ud_fd;
+IbCom *MPID_nem_dcfa_conn_ud_ibcom;
+int MPID_nem_dcfa_npollingset;
+int *MPID_nem_dcfa_scratch_pad_fds;
+//char *MPID_nem_dcfa_recv_buf;
+int MPID_nem_dcfa_myrank;
+uint64_t MPID_nem_dcfa_tsc_poll;
+int MPID_nem_dcfa_ncqe;
+int MPID_nem_dcfa_ncqe_lmt_put;
+#ifdef DCFA_ONDEMAND
+MPID_nem_dcfa_cm_map_t MPID_nem_dcfa_cm_state;
+int MPID_nem_dcfa_ncqe_connect;
+#endif
+int MPID_nem_dcfa_ncqe_scratch_pad;
+int MPID_nem_dcfa_ncqe_to_drain;
+int MPID_nem_dcfa_ncqe_nces;
+MPID_nem_dcfa_lmtq_t MPID_nem_dcfa_lmtq = {NULL, NULL}; 
+MPID_nem_dcfa_lmtq_t MPID_nem_dcfa_lmt_orderq = {NULL, NULL}; 
+uint8_t MPID_nem_dcfa_lmt_tail_addr_cbf[MPID_nem_dcfa_cbf_nslot * MPID_nem_dcfa_cbf_bitsperslot / 8] = {0};
+static uint32_t MPID_nem_dcfa_rand_next = 1;
+MPID_nem_dcfa_vc_area* MPID_nem_dcfa_debug_current_vc_dcfa;
+static int listen_fd;
+static int listen_port;
+
+uint8_t MPID_nem_dcfa_rand() {
+    //return 0xaa;
+    MPID_nem_dcfa_rand_next = MPID_nem_dcfa_rand_next * 1103515245 + 12345;
+    return (MPID_nem_dcfa_rand_next/65536) % 256;
+}
+
+uint64_t MPID_nem_dcfa_rdtsc() {
+    uint64_t x;
+    __asm__ __volatile__("rdtsc; shl $32, %%rdx; or %%rdx, %%rax" : "=a"(x) : : "%rdx", "memory"); /* rdtsc cannot be executed earlier than here */
+    return x;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_kvs_put_binary
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_kvs_put_binary(int from, const char *postfix, const uint8_t *buf, int length) {
+    int mpi_errno = MPI_SUCCESS;
+    int pmi_errno;
+    char* kvs_name;
+    char key[256], val[256], str[256];
+    int j;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_KVS_PUT_BINARY);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_KVS_PUT_BINARY);
+
+    mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);      
+    MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPIDI_PG_GetConnKVSname");
+    dprintf("kvs_put_binary,kvs_name=%s\n", kvs_name);
+
+    sprintf(key, "bc/%d/%s", from, postfix);
+    val[0] = 0;
+    for(j = 0; j < length; j++) {
+        sprintf(str, "%02x", buf[j]);
+        strcat(val, str);
+    }
+    dprintf("kvs_put_binary,rank=%d,from=%d,PMI_KVS_Put(%s, %s, %s)\n", MPID_nem_dcfa_myrank, from, kvs_name, key, val);
+    pmi_errno = PMI_KVS_Put(kvs_name, key, val);
+    MPIU_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**PMI_KVS_Put");
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_KVS_PUT_BINARY);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_kvs_get_binary
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_kvs_get_binary(int from, const char *postfix, char *buf, int length) {
+    int mpi_errno = MPI_SUCCESS;
+    int pmi_errno;
+    char* kvs_name;
+    char key[256], val[256], str[256];
+    int j;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_KVS_GET_BINARY);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_KVS_GET_BINARY);
+
+    mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);      
+    dprintf("kvs_get_binary,kvs_name=%s\n", kvs_name);
+    MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPIDI_PG_GetConnKVSname");
+
+    sprintf(key, "bc/%d/%s", from, postfix);
+    pmi_errno = PMI_KVS_Get(kvs_name, key, val, 256);
+    dprintf("kvs_put_binary,rank=%d,from=%d,PMI_KVS_Get(%s, %s, %s)\n", MPID_nem_dcfa_myrank, from, kvs_name, key, val);
+    MPIU_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**PMS_KVS_Get");
+
+    dprintf("rank=%d,obtained val=%s\n", MPID_nem_dcfa_myrank, val);
+    char* strp = val;
+    for(j = 0; j < length; j++) {
+        memcpy(str, strp, 2);
+        str[2] = 0;
+        buf[j] = strtol(str, NULL, 16);
+        strp += 2;
+    }
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_KVS_GET_BINARY);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_init
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno = 0, pmi_errno;
+    int ret;
+    int i, j;
+    int ib_port = 1;
+
+    MPIU_CHKPMEM_DECL(7);
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_INIT);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_INIT);
+
+    /* first make sure that our private fields in the vc fit into the area provided  */
+    MPIU_Assert(sizeof(MPID_nem_dcfa_vc_area) <= MPID_NEM_VC_NETMOD_AREA_LEN);
+
+    MPID_nem_dcfa_nranks = pg_p->size;
+    MPID_nem_dcfa_myrank = pg_rank;
+    MPID_nem_dcfa_tsc_poll = MPID_nem_dcfa_rdtsc();
+    MPID_nem_dcfa_ncqe = 0;
+    MPID_nem_dcfa_ncqe_lmt_put = 0;
+#ifdef DCFA_ONDEMAND
+    MPID_nem_dcfa_ncqe_connect = 0;
+#endif
+    MPID_nem_dcfa_ncqe_scratch_pad = 0;
+    MPID_nem_dcfa_ncqe_to_drain = 0;
+    MPID_nem_dcfa_ncqe_nces = 0;
+    MPID_nem_dcfa_npollingset = 0;
+
+#ifdef DCFA_ONDEMAND    
+    /* prepare UD QPN for dynamic connection */
+    ibcom_errno = ibcomOpen(ib_port, IBCOM_OPEN_UD, &MPID_nem_dcfa_conn_ud_fd);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcomOpen");
+    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_conn_ud_fd, &MPID_nem_dcfa_conn_ud_ibcom); 
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_obtain_pointer");
+    ibcom_errno = ibcom_rts(MPID_nem_dcfa_conn_ud_fd, 0, 0, 0);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rts");
+    
+    for(i = 0; i < IBCOM_MAX_RQ_CAPACITY; i++) {
+        ibcom_errno = ibcom_udrecv(MPID_nem_dcfa_conn_ud_fd);
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_udrecv");
+    }
+    
+    /* obtain gid, lid, qpn using KVS */
+    MPIU_CHKPMEM_MALLOC(MPID_nem_dcfa_conn_ud, dcfa_conn_ud_t *, MPID_nem_dcfa_nranks * sizeof(dcfa_conn_ud_t), mpi_errno, "ud connection table");
+    memset(MPID_nem_dcfa_conn_ud, 0, MPID_nem_dcfa_nranks * sizeof(dcfa_conn_ud_t));
+
+    /* put bc/<my rank>/dcs/gid:lid:qpn */
+    uint32_t  my_qpnum;
+    uint16_t my_lid;
+    union ibv_gid my_gid;
+    ibcom_get_info_conn(MPID_nem_dcfa_conn_ud_fd, IBCOM_INFOKEY_QP_QPN, &my_qpnum, sizeof(uint32_t));
+    ibcom_get_info_conn(MPID_nem_dcfa_conn_ud_fd, IBCOM_INFOKEY_PORT_LID, &my_lid, sizeof(uint16_t));
+    ibcom_get_info_conn(MPID_nem_dcfa_conn_ud_fd, IBCOM_INFOKEY_PORT_GID, &my_gid, sizeof(union ibv_gid));
+
+    char* kvs_name;
+    mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name);      
+    char* key_dcs, val[2*sizeof(union ibv_gid)+1+4+1+8+1], str[9];
+
+    /* count maximum length of the string representation of remote_rank */
+    for(i = 0, nranks = MPID_nem_dcfa_nranks; nranks > 0; nranks /= 10, i++) { }
+    MPIU_CHKPMEM_MALLOC(key_dcs, char*, strlen("bc/") + i + strlen("/dcs/gid_lid_qpn") + 1, mpi_errno, "connection table");
+
+    sprintf(key, "bc/%d/dcs/gid_lid_qpn", MPID_nem_dcfa_myrank);
+    val[0] = 0;
+    for(j = 0; j < sizeof(union ibv_gid); j++) {
+        sprintf(str, "%02x", my_gid.raw[j]);
+        strcat(val, str);
+    }
+    sprintf(str, ":");
+    strcat(val, str);
+    sprintf(str, "%04x:", my_lid);
+    strcat(val, str);
+    sprintf(str, "%08x", my_qpnum);
+    strcat(val, str);
+    dprintf("rank=%d,PMI_KVS_Put(%s, %s, %s)\n", MPID_nem_dcfa_myrank, kvs_name, key_dcs, val);
+    pmi_errno = PMI_KVS_Put(kvs_name, key_dcs, val);
+    MPIU_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**PMI_KVS_Put");
+
+    /* wait for key-value to propagate among all ranks */
+    pmi_errno = PMI_Barrier();
+    MPIU_ERR_CHKANDJUMP(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**PMI_Barrier");
+    
+    /* obtain GID, LID, QP number for remote UD QP for dynamic connection */
+    for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
+        if(i != MPID_nem_dcfa_myrank) { 
+            sprintf(key_dcs, "bc/%d/dcs/gid_lid_qpn", i);
+            pmi_errno = PMI_KVS_Get(kvs_name, key_dcs, val, 256);
+            dprintf("pmi_errno=%d\n", pmi_errno);
+            MPIU_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**PMI_KVS_Get");
+            dprintf("rank=%d,obtained val=%s\n", MPID_nem_dcfa_myrank, val);
+            char* strp = val;
+            for(j = 0; j < sizeof(union ibv_gid); j++) {
+                memcpy(str, strp, 2);
+                str[2] = 0;
+                MPID_nem_dcfa_conn_ud[i].gid.raw[j] = strtol(str, NULL, 16);
+                strp += 2;
+            }
+            sscanf(strp, ":%04x:%08x", &MPID_nem_dcfa_conn_ud[i].lid, &MPID_nem_dcfa_conn_ud[i].qpn);
+
+            dprintf("remote rank=%d,gid=", i);
+            for(j = 0; j < sizeof(union ibv_gid); j++) {
+                dprintf("%02x", MPID_nem_dcfa_conn_ud[i].gid.raw[j]);
+            }
+            dprintf(",lid=%04x,qpn=%08x\n", MPID_nem_dcfa_conn_ud[i].lid, MPID_nem_dcfa_conn_ud[i].qpn);
+       }
+    }
+#endif   
+
+    /* malloc scratch-pad fd */
+    MPIU_CHKPMEM_MALLOC(MPID_nem_dcfa_scratch_pad_fds, int*, MPID_nem_dcfa_nranks * sizeof(int), mpi_errno, "connection table");
+    memset(MPID_nem_dcfa_scratch_pad_fds, 0, MPID_nem_dcfa_nranks * sizeof(int));
+
+    /* prepare scrath-pad QP and malloc scratch-pad */
+    for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
+        ibcom_errno = ibcomOpen(ib_port, IBCOM_OPEN_SCRATCH_PAD, &MPID_nem_dcfa_scratch_pad_fds[i]);
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcomOpen");
+
+        ibcom_errno = ibcom_alloc(MPID_nem_dcfa_scratch_pad_fds[i], MPID_nem_dcfa_nranks * sizeof(ibcom_qp_state_t));
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_alloc");
+    }
+
+    /* put bc/me/sp/{gid,lid} put bc/me/sp/{qpn,rmem,rkey}/you */
+    int nranks;
+
+#ifndef DCFA_ONDEMAND
+    uint32_t my_qpnum;
+    uint16_t my_lid;
+    union ibv_gid	my_gid;
+#endif
+    void* my_rmem;
+    int my_rkey;
+
+    int remote_qpnum;
+    uint16_t remote_lid;
+    union ibv_gid remote_gid;
+    void* remote_rmem;
+    int remote_rkey;
+
+    char *remote_rank_str;
+    char *key_str; 
+
+    /* count maximum length of the string representation of remote_rank */
+    for(i = 0, nranks = MPID_nem_dcfa_nranks; nranks > 0; nranks /= 10, i++) { }
+    MPIU_CHKPMEM_MALLOC(remote_rank_str, char*, 1 + i + 1, mpi_errno, "connection table");
+    MPIU_CHKPMEM_MALLOC(key_str, char*, strlen("sp/qpn") + 1 + i + 1, mpi_errno, "connection table");
+
+    for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
+
+        if(i == 0) {
+            ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_scratch_pad_fds[i], IBCOM_INFOKEY_PORT_LID, &my_lid, sizeof(uint16_t));
+            dprintf("dcfa_init,scratch pad,lid=%04x\n", my_lid);
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
+
+            mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, "sp/lid", (uint8_t*)&my_lid, sizeof(uint16_t));
+            MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
+            
+            { dprintf("dcfa_init,scratch pad,put <%d/sp/lid/,%04x>\n", MPID_nem_dcfa_myrank, (int)my_lid); }
+
+            ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_scratch_pad_fds[i], IBCOM_INFOKEY_PORT_GID, &my_gid, sizeof(union ibv_gid));
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
+
+            mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, "sp/gid", (uint8_t*)&my_gid, sizeof(union ibv_gid));
+            MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
+
+            { dprintf("dcfa_init,scratch pad,gid "); int i; for(i = 0; i < 16; i++) { dprintf("%02x", (int)my_gid.raw[i]); } dprintf("\n"); }
+        }
+
+        /* put bc/me/sp/qpn/you  */
+        strcpy(key_str, "sp/qpn");
+        sprintf(remote_rank_str, "/%x", i);
+        strcat(key_str, remote_rank_str);
+        ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_scratch_pad_fds[i], IBCOM_INFOKEY_QP_QPN, &my_qpnum, sizeof(uint32_t));
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
+        dprintf("dcfa_init,scratch pad,qpn=%08x\n", my_qpnum);
+
+        mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t*)&my_qpnum, sizeof(uint32_t));
+        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
+        dprintf("dcfa_init,scratch pad,kvs put done\n");
+
+        strcpy(key_str, "sp/rmem");
+        sprintf(remote_rank_str, "/%x", i);
+        strcat(key_str, remote_rank_str);
+
+        ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_scratch_pad_fds[i], IBCOM_SCRATCH_PAD_TO, IBCOM_INFOKEY_MR_ADDR, &my_rmem, sizeof(void*));
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
+
+        dprintf("dcfa_init,scratch_pad,rmem=%p\n", my_rmem);
+        mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t*)&my_rmem, sizeof(void*));
+        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
+
+        strcpy(key_str, "sp/rkey");
+        sprintf(remote_rank_str, "/%x", i);
+        strcat(key_str, remote_rank_str);
+
+        ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_scratch_pad_fds[i], IBCOM_SCRATCH_PAD_TO, IBCOM_INFOKEY_MR_RKEY, &my_rkey, sizeof(int));
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
+        dprintf("dcfa_init,scratch_pad,rkey=%08x\n", my_rkey);
+
+        mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t*)&my_rkey, sizeof(uint32_t));
+        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
+    }
+    
+    /* wait until key-value propagates among all ranks */
+    pmi_errno = PMI_Barrier(); 
+    MPIU_ERR_CHKANDJUMP(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**PMI_Barrier");
+    dprintf("dcfa_init,put KVS;barrier;\n");
+
+    /* make me-to-you scratch-pad QP RTS */
+    for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
+        if(i != MPID_nem_dcfa_myrank) { 
+
+            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, "sp/gid", (char*)&remote_gid, sizeof(union ibv_gid));
+            dprintf("dcfa_init,after kvs get\n");
+            if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+
+            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, "sp/lid", (char*)&remote_lid, sizeof(uint16_t));
+            dprintf("dcfa_init,after kvs get\n");
+            if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            
+            strcpy(key_str, "sp/qpn");
+            strcat(key_str, ""); /* "" or "lmt-put" */
+            sprintf(remote_rank_str, "/%x", MPID_nem_dcfa_myrank);
+            strcat(key_str, remote_rank_str);
+            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, key_str, (char*)&remote_qpnum, sizeof(uint32_t));
+            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            dprintf("dcfa_init,get KVS,remote_qpnum=%08x\n", remote_qpnum);
+
+            ibcom_errno = ibcom_rts(MPID_nem_dcfa_scratch_pad_fds[i], remote_qpnum, remote_lid, &remote_gid);
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rts");
+
+            strcpy(key_str, "sp/rmem");
+            sprintf(remote_rank_str, "/%x", MPID_nem_dcfa_myrank);
+            strcat(key_str, remote_rank_str);
+            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, key_str, (char*)&remote_rmem, sizeof(void*));
+            dprintf("dcfa_init,after kvs get\n");
+            if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            dprintf("dcfa_init,get KVS,remote_rmem=%p\n", remote_rmem);
+
+            strcpy(key_str, "sp/rkey");
+            sprintf(remote_rank_str, "/%x", MPID_nem_dcfa_myrank);
+            strcat(key_str, remote_rank_str);
+            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, key_str, (char*)&remote_rkey, sizeof(uint32_t));
+            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            dprintf("dcfa_init,get KVS,remote_rkey=%08x\n", remote_rkey);
+
+            ibcom_errno = ibcom_reg_mr_connect(MPID_nem_dcfa_scratch_pad_fds[i], remote_rmem, remote_rkey);
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_reg_mr_connect");
+        }
+    }
+
+    /* wait until you-to-me scratch-pad QP becomes RTR */
+    pmi_errno = PMI_Barrier();
+    MPIU_ERR_CHKANDJUMP(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**PMI_Barrier");
+
+    MPIU_CHKPMEM_MALLOC(MPID_nem_dcfa_conns, dcfaconn_t *, MPID_nem_dcfa_nranks * sizeof(dcfaconn_t), mpi_errno, "connection table");
+    memset(MPID_nem_dcfa_conns, 0, MPID_nem_dcfa_nranks * sizeof(dcfaconn_t));
+
+    MPIU_CHKPMEM_MALLOC(MPID_nem_dcfa_pollingset, MPIDI_VC_t**, MPID_NEM_DCFA_MAX_POLLINGSET * sizeof(MPIDI_VC_t*), mpi_errno, "connection table");
+    memset(MPID_nem_dcfa_pollingset, 0, MPID_NEM_DCFA_MAX_POLLINGSET * sizeof(MPIDI_VC_t*));
+
+    /* prepare eager-send QP */
+    for(i = 0; i < MPID_nem_dcfa_nranks; i++) {
+        ibcom_errno = ibcomOpen(ib_port, IBCOM_OPEN_RC, &MPID_nem_dcfa_conns[i].fd);
+        dprintf("init,fd=%d\n", MPID_nem_dcfa_conns[i].fd);
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcomOpen");
+    }
+
+#if 0    
+    for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
+        ibcom_errno = ibcomOpen(ib_port, IBCOM_OPEN_RC_LMT_PUT, &MPID_nem_dcfa_conns[i].fd_lmt_put);
+        dprintf("init,fd_lmt_put=%d\n", MPID_nem_dcfa_conns[i].fd_lmt_put);
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcomOpen");
+    }   
+#endif
+
+    /* put bc/me/{gid,lid}, put bc/me/{qpn,rmem,rkey}/you */
+    mpi_errno = MPID_nem_dcfa_announce_network_addr(pg_rank, bc_val_p, val_max_sz_p);
+    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+
+    /* wait until key-value propagates among all ranks */
+    pmi_errno = PMI_Barrier(); 
+    MPIU_ERR_CHKANDJUMP(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**PMI_Barrier");
+
+    /* make me-to-you eager-send QP RTS */
+    for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
+        if(i != MPID_nem_dcfa_myrank) { 
+
+            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, MPIDI_CH3I_LID_KEY, (char*)&remote_lid, sizeof(uint16_t));
+            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, MPIDI_CH3I_GID_KEY, (char*)&remote_gid, sizeof(union ibv_gid));
+            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            
+            strcpy(key_str, MPIDI_CH3I_RMEM_KEY);
+            sprintf(remote_rank_str, "/%x", MPID_nem_dcfa_myrank);
+            strcat(key_str, remote_rank_str);
+            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, key_str, (char*)&remote_rmem, sizeof(void*));
+            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            
+            strcpy(key_str, MPIDI_CH3I_RKEY_KEY);
+            sprintf(remote_rank_str, "/%x", MPID_nem_dcfa_myrank);
+            strcat(key_str, remote_rank_str);
+            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, key_str, (char*)&remote_rkey, sizeof(uint32_t));
+            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            
+            strcpy(key_str, MPIDI_CH3I_QPN_KEY);
+            strcat(key_str, ""); /* "" or "lmt-put" */
+            sprintf(remote_rank_str, "/%x", MPID_nem_dcfa_myrank);
+            strcat(key_str, remote_rank_str);
+            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, key_str, (char*)&remote_qpnum, sizeof(uint32_t));
+            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            dprintf("remote_qpnum obtained=%08x\n", remote_qpnum);
+            
+            ibcom_errno = ibcom_rts(MPID_nem_dcfa_conns[i].fd, remote_qpnum, remote_lid, &remote_gid);
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rts");
+            
+            /* report me-to-you eager-send QP becomes RTR */
+            IbCom* ibcom_scratch_pad;
+            ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_scratch_pad_fds[i], &ibcom_scratch_pad); 
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_obtain_pointer");
+
+            ibcom_qp_state_t state = {.state = IBCOM_QP_STATE_RTR};
+            ibcom_errno = ibcom_put_scratch_pad(MPID_nem_dcfa_scratch_pad_fds[i], (uint64_t)ibcom_scratch_pad, sizeof(ibcom_qp_state_t) * MPID_nem_dcfa_myrank, sizeof(ibcom_qp_state_t), (void*)&state);
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_put_scratch_pad");
+            MPID_nem_dcfa_ncqe_scratch_pad += 1;
+
+            ibcom_errno = ibcom_reg_mr_connect(MPID_nem_dcfa_conns[i].fd, remote_rmem, remote_rkey);
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_reg_mr_connect");
+            dprintf("dcfa_init,after mr_connect for me-to-you eager-send QP\n");
+            
+#if 0
+            /* CQ, SQ, SCQ for lmt-put */
+            strcpy(key_str, MPIDI_CH3I_QPN_KEY);
+            strcat(key_str, "lmt-put"); /* "" or "lmt-put" */
+            sprintf(remote_rank_str, "/%x", MPID_nem_dcfa_myrank);
+            strcat(key_str, remote_rank_str);
+            mpi_errno = MPID_nem_dcfa_kvs_get_binary(i, key_str, (char*)&remote_qpnum, sizeof(uint32_t));
+            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+            
+            ibcom_errno = ibcom_rts(MPID_nem_dcfa_conns[i].fd_lmt_put, remote_qpnum, remote_lid, &remote_gid);
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rts");
+#endif
+        }
+    }
+
+#if 0 /* debug */
+    for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
+        dprintf("init,fd[%d]=%d\n", i, MPID_nem_dcfa_conns[i].fd);
+    }
+#endif
+
+
+  fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_INIT);
+    return mpi_errno;
+  fn_fail:
+    MPIU_CHKPMEM_REAP();
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_get_business_card
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_get_business_card(int my_rank, char **bc_val_p, int *val_max_sz_p)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int str_errno = MPIU_STR_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_GET_BUSINESS_CARD);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_GET_BUSINESS_CARD);
+    dprintf("MPID_nem_dcfa_get_business_card,enter\n");
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_GET_BUSINESS_CARD);
+    return mpi_errno;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_announce_network_addr
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_announce_network_addr(int my_rank, char **bc_val_p, int *val_max_sz_p)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int str_errno = MPIU_STR_SUCCESS;
+    int ibcom_errno;
+    int i, nranks;
+
+    uint32_t my_qpnum;
+    uint16_t my_lid;
+    union ibv_gid	my_gid;
+    void* my_rmem;
+    int my_rkey;
+    char *remote_rank_str; /* perl -e '$key_str .= $remote_rank;' */
+    char *key_str; 
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_ANNOUNCE_NETWORK_ADDR);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_ANNOUNCE_NETWORK_ADDR);
+    MPIU_CHKLMEM_DECL(2); /* argument is the number of alloca */
+
+    /* count maximum length of the string representation of remote_rank */
+    for(i = 0, nranks = MPID_nem_dcfa_nranks; nranks > 0; nranks /= 10, i++) { }
+    MPIU_CHKLMEM_MALLOC(remote_rank_str, char *, i + 1, mpi_errno, "key_str"); /* alloca */
+    MPIU_CHKLMEM_MALLOC(key_str, char *, strlen(MPIDI_CH3I_QPN_KEY) + i + 1, mpi_errno, "key_str"); /* alloca */
+
+    /* We have one local qp and remote qp for each rank-pair, 
+       so a rank should perform 
+       remote_qpn = kvs_get($remote_rank . "qpnum/" . $local_rank).
+       a memory area to read from and write to HCA,
+       and a memory area to read from HCA and write to DRAM is
+       associated with each connection, so a rank should perform 
+       rkey = kvs_get($remote_rank . "rkey/" . $local_rank) 
+       and raddr = kvs_get($remote_rank . "raddr/" . $local_rank). */
+    for (i = 0; i < MPID_nem_dcfa_nranks; i++) {
+
+        /* lid and gid are common for all remote-ranks */
+        if(i == 0) {
+            ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[i].fd, IBCOM_INFOKEY_PORT_LID, &my_lid, sizeof(uint16_t));
+            dprintf("get_business_card,lid=%04x\n", my_lid);
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
+
+            mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, MPIDI_CH3I_LID_KEY, (uint8_t*)&my_lid, sizeof(uint16_t));
+            MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
+            
+            ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[i].fd, IBCOM_INFOKEY_PORT_GID, &my_gid, sizeof(union ibv_gid));
+            {
+                dprintf("get_business_card,val_max_sz=%d\n", *val_max_sz_p);
+                dprintf("get_business_card,sz=%ld,my_gid=", sizeof(union ibv_gid));
+                int i;
+                for(i = 0; i < 16; i++) { dprintf("%02x", (int)my_gid.raw[i]); }
+                dprintf("\n");
+            }
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
+
+            mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, MPIDI_CH3I_GID_KEY, (uint8_t*)&my_gid, sizeof(union ibv_gid));
+            MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
+            dprintf("get_business_card,val_max_sz=%d\n", *val_max_sz_p);
+        }
+
+        /* we use different RDMA-rbuf for different senders.
+           so announce like this:
+           <"0/qpn/0", 0xa0000>
+           <"0/qpn/1", 0xb0000>   
+           <"0/qpn/2", 0xc0000>
+           <"0/qpn/3", 0xd0000>   
+         */
+        strcpy(key_str, MPIDI_CH3I_QPN_KEY);
+        sprintf(remote_rank_str, "/%x", i);
+        strcat(key_str, remote_rank_str);
+        ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[i].fd, IBCOM_INFOKEY_QP_QPN, &my_qpnum, sizeof(uint32_t));
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
+
+        mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t*)&my_qpnum, sizeof(uint32_t));
+        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
+
+#if 0
+        /* lmt-put */
+        strcpy(key_str, MPIDI_CH3I_QPN_KEY);
+        strcat(key_str, "lmt-put");
+        sprintf(remote_rank_str, "/%x", i);
+        strcat(key_str, remote_rank_str);
+        ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[i].fd_lmt_put, IBCOM_INFOKEY_QP_QPN, &my_qpnum, sizeof(uint32_t));
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
+
+        mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t*)&my_qpnum, sizeof(uint32_t));
+        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
+#endif
+
+        strcpy(key_str, MPIDI_CH3I_RMEM_KEY);
+        sprintf(remote_rank_str, "/%x", i);
+        strcat(key_str, remote_rank_str);
+
+        ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_conns[i].fd, IBCOM_RDMAWR_TO, IBCOM_INFOKEY_MR_ADDR, &my_rmem, sizeof(void*));
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
+
+        dprintf("rmem=%p\n", my_rmem);
+        mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t*)&my_rmem, sizeof(void*));
+        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
+
+        strcpy(key_str, MPIDI_CH3I_RKEY_KEY);
+        sprintf(remote_rank_str, "/%x", i);
+        strcat(key_str, remote_rank_str);
+
+        ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_conns[i].fd, IBCOM_RDMAWR_TO, IBCOM_INFOKEY_MR_RKEY, &my_rkey, sizeof(int));
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
+
+        mpi_errno = MPID_nem_dcfa_kvs_put_binary(MPID_nem_dcfa_myrank, key_str, (uint8_t*)&my_rkey, sizeof(uint32_t));
+        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_kvs_put_binary");
+    }
+
+  fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_ANNOUNCE_NETWORK_ADDR);
+    return mpi_errno;
+  fn_fail:
+    MPIU_CHKLMEM_FREEALL();
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_connect_to_root
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_connect_to_root(const char *business_card, MPIDI_VC_t * new_vc) {
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_CONNECT_TO_ROOT);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_CONNECT_TO_ROOT);
+
+    dprintf("toroot,%d->%d", MPID_nem_dcfa_myrank, new_vc->pg_rank);
+    /* not implemented */
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_CONNECT_TO_ROOT);
+    return MPI_SUCCESS;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_vc_init
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_vc_init(MPIDI_VC_t * vc)
+{
+    MPIDI_CH3I_VC *vc_ch = VC_CH(vc);
+    int mpi_errno = MPI_SUCCESS;
+
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    int ibcom_errno;
+    size_t s;
+    dcfaconn_t *sc;
+    off_t offset;
+
+    int remote_qpnum;
+    uint16_t remote_lid;
+    union ibv_gid remote_gid;
+    void* remote_rmem;
+    int remote_rkey;
+
+    char key_str[256], remote_rank_str[256];
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_VC_INIT);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_VC_INIT);
+
+    vc_dcfa->sc = &MPID_nem_dcfa_conns[vc->pg_rank];
+
+    /* store pointer to ibcom */
+    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_conns[vc->pg_rank].fd, &vc_dcfa->ibcom); 
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_obtain_pointer");
+
+    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_conns[vc->pg_rank].fd_lmt_put, &vc_dcfa->ibcom_lmt_put); 
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_obtain_pointer");
+
+    //dprintf("vc_init,open,fd=%d,ptr=%p,rsr_seq_num_poll=%d\n", MPID_nem_dcfa_conns[vc->pg_rank].fd, vc_dcfa->ibcom, vc_dcfa->ibcom->rsr_seq_num_poll);
+
+    /* initialize sendq */
+    vc_dcfa->sendq.head = NULL;
+    vc_dcfa->sendq.tail = NULL;
+    vc_dcfa->sendq_lmt_put.head = NULL;
+    vc_dcfa->sendq_lmt_put.tail = NULL;
+
+    /* rank is sent as wr_id and used to obtain vc in poll */
+    MPID_nem_dcfa_conns[vc->pg_rank].vc = vc;
+    MPIU_ERR_CHKANDJUMP(MPID_nem_dcfa_npollingset+1 > MPID_NEM_DCFA_MAX_POLLINGSET, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_npollingset");
+    MPID_nem_dcfa_pollingset[MPID_nem_dcfa_npollingset++] = vc;
+    //printf("vc_init,%d->%d,vc=%p,npollingset=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, vc, MPID_nem_dcfa_npollingset);
+
+    /* wait until you-to-me eager-send QP becomes RTR */
+    IbCom* ibcom_scratch_pad;
+    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_scratch_pad_fds[vc->pg_rank], &ibcom_scratch_pad); 
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_obtain_pointer");
+
+    int ntrial = 0;
+    volatile ibcom_qp_state_t* rstate = (ibcom_qp_state_t*)(ibcom_scratch_pad->icom_mem[IBCOM_SCRATCH_PAD_TO] + vc->pg_rank * sizeof(ibcom_qp_state_t));
+    dprintf("dcfa_init,rstate=%p,*rstate=%08x\n", rstate, *((uint32_t*)rstate));
+    while(rstate->state != IBCOM_QP_STATE_RTR) {
+        __asm__ __volatile__ ("pause;" : : : "memory"); 
+        if(++ntrial > 1024) {
+            /* detect RDMA-write failure */
+            ibcom_errno = MPID_nem_dcfa_drain_scq_scratch_pad();
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq_scratch_pad");
+        }
+    }
+    dprintf("dcfa_init,you-to-me eager-send QP is RTR\n");
+
+    /* post IBCOM_MAX_SQ_CAPACITY of recv commands beforehand, replenish when retiring them in dcfa_poll */
+    int i;
+    for(i = 0; i < IBCOM_MAX_RQ_CAPACITY; i++) {
+        //dprintf("irecv,%d->%d\n", MPID_nem_dcfa_myrank, vc->pg_rank);
+        ibcom_errno = ibcom_irecv(MPID_nem_dcfa_conns[vc->pg_rank].fd, (uint64_t)vc->pg_rank);
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_irecv");
+    }
+
+    MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
+
+
+    uint32_t max_msg_sz;
+    ibcom_get_info_conn(MPID_nem_dcfa_conns[vc->pg_rank].fd, IBCOM_INFOKEY_PATTR_MAX_MSG_SZ, &max_msg_sz, sizeof(max_msg_sz));
+    VC_FIELD(vc,pending_sends) = 0;
+#ifdef DCFA_ONDEMAND
+    VC_FIELD(vc,is_connected) = 0;
+#endif
+
+    MPIU_Assert(sizeof(sz_hdrmagic_t) == 8); /* assumption in dcfa_ibcom.h */
+    MPIU_Assert(sizeof(tailmagic_t) == 1); /* in dcfa_ibcom.h */
+
+    uint32_t sz;
+#if 0
+    /* assumption in released(), must be power of two  */
+    sz = IBCOM_RDMABUF_NSEG;
+    while((sz & 1) == 0) { sz>>=1; }
+    sz >>= 1;
+    if(sz) { MPIU_Assert(0); }
+#endif
+
+    /* assumption in dcfa_poll.c, must be power of two */
+    for(sz = IBCOM_RDMABUF_SZSEG; sz > 0; sz >>= 1) {
+        if(sz != 1 && (sz & 1)) { MPIU_Assert(0); }
+    }
+
+    char* val;
+    val = getenv("MP2_IBA_EAGER_THRESHOLD");
+    vc->eager_max_msg_sz = val ? atoi(val) : MPID_NEM_DCFA_EAGER_MAX_MSG_SZ;
+    vc->ready_eager_max_msg_sz = val ? atoi(val) : MPID_NEM_DCFA_EAGER_MAX_MSG_SZ;
+    dprintf("dcfa_vc_init,vc->eager_max_msg_sz=%d\n", vc->eager_max_msg_sz);
+
+    /* vc->rndvSend_fn is set in MPID_nem_vc_init (in src/mpid/ch3/channels/nemesis/src/mpid_nem_init.c) */;
+    vc->sendNoncontig_fn = MPID_nem_dcfa_SendNoncontig;
+
+    vc->comm_ops         = &comm_ops;
+
+
+    /* register packet handler */
+    vc_ch->pkt_handler = MPID_nem_dcfa_pkt_handler;
+    vc_ch->num_pkt_handlers = MPIDI_NEM_DCFA_NUM_PKT_HANDLERS;
+    MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_PKT_EAGER_SEND] = MPID_nem_dcfa_PktHandler_EagerSend;
+    MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_PKT_PUT] = MPID_nem_dcfa_PktHandler_Put;
+    MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_PKT_GET] = MPID_nem_dcfa_PktHandler_Get;
+    MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_PKT_GET_RESP] = MPID_nem_dcfa_PktHandler_GetResp;
+    MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_PKT_ACCUMULATE] = MPID_nem_dcfa_PktHandler_Accumulate;
+    MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_PKT_LMT_GET_DONE] = MPID_nem_dcfa_pkt_GET_DONE_handler;
+    MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_REQ_SEQ_NUM] = MPID_nem_dcfa_PktHandler_req_seq_num;
+    MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_REPLY_SEQ_NUM] = MPID_nem_dcfa_PktHandler_reply_seq_num;
+    MPID_nem_dcfa_pkt_handler[MPIDI_NEM_DCFA_CHG_RDMABUF_OCC_NOTIFY_STATE] = MPID_nem_dcfa_PktHandler_change_rdmabuf_occupancy_notify_state;
+
+    /* register CH3 send/recv functions */
+    vc_ch->iStartContigMsg = MPID_nem_dcfa_iStartContigMsg;
+    vc_ch->iSendContig = MPID_nem_dcfa_iSendContig;
+
+    /* register CH3--lmt send/recv functions */
+    vc_ch->lmt_initiate_lmt = MPID_nem_dcfa_lmt_initiate_lmt;
+    vc_ch->lmt_start_recv = MPID_nem_dcfa_lmt_start_recv;
+    vc_ch->lmt_handle_cookie = MPID_nem_dcfa_lmt_handle_cookie;
+    vc_ch->lmt_done_send = MPID_nem_dcfa_lmt_done_send;
+    vc_ch->lmt_done_recv = MPID_nem_dcfa_lmt_done_recv;
+    vc_ch->lmt_vc_terminated = MPID_nem_dcfa_lmt_vc_terminated;
+    vc_ch->next = NULL;
+    vc_ch->prev = NULL;
+
+  fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_VC_INIT);
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_vc_destroy
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_vc_destroy(MPIDI_VC_t * vc)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_VC_DESTROY);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_VC_DESTROY);
+    /* currently do nothing */
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_VC_DESTROY);
+    return mpi_errno;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_vc_terminate
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_vc_terminate(MPIDI_VC_t * vc)
+{
+    dprintf("dcfa_vc_terminate,enter\n");
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    int req_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_VC_TERMINATE);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_VC_TERMINATE);
+
+    /* Check to make sure that it's OK to terminate the
+       connection without making sure that all sends have been sent */
+    /* it is safe to only check command queue because
+       data transactions always proceed after confirming send by MPI_Wait
+       and control transactions always proceed after receiveing reply */
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+
+    dprintf("init,before,%d->%d,r rdmaocc=%d,l rdmaocc=%d,sendq=%d,ncqe=%d,pending_sends=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent), MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail), MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), MPID_nem_dcfa_ncqe, VC_FIELD(vc, pending_sends));
+
+    /* update remote RDMA-write-to buffer occupancy */
+#if 0 /* we can't send it when the other party has closed QP */
+    while(MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent) > 0) {
+        MPID_nem_dcfa_send_reply_seq_num(vc); 
+    }
+#endif
+
+    /* update local RDMA-write-to buffer occupancy */
+#if 0
+    while(MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) > 0) {
+        MPID_nem_dcfa_poll_eager(vc);
+    }
+#endif
+
+    /* drain sendq */
+    while(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) {
+        MPID_nem_dcfa_send_progress(vc_dcfa);
+    }
+
+    dprintf("init,middle,%d->%d,r rdmaocc=%d,l rdmaocc=%d,sendq=%d,ncqe=%d,pending_sends=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent), MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail), MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), MPID_nem_dcfa_ncqe, VC_FIELD(vc, pending_sends));
+
+    if(MPID_nem_dcfa_ncqe > 0 || VC_FIELD(vc, pending_sends) > 0) {
+        usleep(1000);
+        MPID_nem_dcfa_drain_scq(0);
+    }
+    dprintf("init,middle2,%d->%d,r rdmaocc=%d,l rdmaocc=%d,sendq=%d,ncqe=%d,pending_sends=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent), MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail), MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), MPID_nem_dcfa_ncqe, VC_FIELD(vc, pending_sends));
+
+    if(MPID_nem_dcfa_ncqe > 0 || VC_FIELD(vc, pending_sends) > 0) {
+        usleep(1000);
+        MPID_nem_dcfa_drain_scq(0);
+    }
+#if 0
+    /* drain scq */
+    while(MPID_nem_dcfa_ncqe > 0 || VC_FIELD(vc, pending_sends) > 0) {
+        usleep(1000);
+        MPID_nem_dcfa_drain_scq(0);
+        //printf("%d\n", VC_FIELD(vc, pending_sends));
+        //printf("%d\n", MPID_nem_dcfa_ncqe);
+    }
+#endif
+
+    dprintf("init,after ,%d->%d,r rdmaocc=%d,l rdmaocc=%d,sendq=%d,ncqe=%d,pending_sends=%d\n",  MPID_nem_dcfa_myrank, vc->pg_rank, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent), MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail), MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), MPID_nem_dcfa_ncqe, VC_FIELD(vc, pending_sends));
+
+    /* drain scratch-pad scq */
+    ibcom_errno = MPID_nem_dcfa_drain_scq_scratch_pad();
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq_scratch_pad");
+
+    mpi_errno = MPIDI_CH3U_Handle_connection(vc, MPIDI_VC_EVENT_TERMINATED);
+    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+
+  fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_VC_TERMINATE);
+    return mpi_errno;
+  fn_fail:
+    MPIU_DBG_MSG_FMT(NEM_SOCK_DET, VERBOSE, (MPIU_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
+    goto fn_exit;
+
+}
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_lmt.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_lmt.c
new file mode 100644
index 0000000..1a83117
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_lmt.c
@@ -0,0 +1,492 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2013 NEC Corporation
+ *      Author: Masamichi Takagi
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "dcfa_impl.h"
+
+//#define DEBUG_DCFA_LMT
+#ifdef dprintf /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
+#undef dprintf
+#endif
+#ifdef DEBUG_DCFA_LMT
+#define dprintf printf
+#else
+#define dprintf(...)
+#endif
+
+/* Get mode: sender sends RTS */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_lmt_initiate_lmt
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_lmt_initiate_lmt(struct MPIDI_VC *vc, union MPIDI_CH3_Pkt *rts_pkt, struct MPID_Request *req)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int dt_contig;
+    MPIDI_msg_sz_t data_sz; 
+    MPID_Datatype * dt_ptr;
+    MPI_Aint dt_true_lb;
+    MPIDI_CH3I_VC *vc_ch = VC_CH(vc);
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_LMT_INITIATE_LMT);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_LMT_INITIATE_LMT);
+
+    dprintf("lmt_initiate_lmt,enter,%d->%d,req=%p\n", MPID_nem_dcfa_myrank, vc->pg_rank, req);
+
+    /* obtain dt_true_lb */
+    /* see MPIDI_Datatype_get_info(in, in, out, out, out, out) (in src/mpid/ch3/include/mpidimpl.h) */
+    MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
+
+    /* malloc memory area for cookie. auto variable is NG because isend does not copy payload */
+    MPID_nem_dcfa_lmt_cookie_t* s_cookie_buf = (MPID_nem_dcfa_lmt_cookie_t*)MPIU_Malloc(sizeof(MPID_nem_dcfa_lmt_cookie_t));
+
+    /* remember address to "free" when receiving DONE from receiver */
+    req->ch.s_cookie = s_cookie_buf;
+
+    /* see MPIDI_CH3_PktHandler_RndvClrToSend (in src/mpid/ch3/src/ch3u_rndv.c) */
+    //assert(dt_true_lb == 0);
+    void* write_from_buf; 
+    if(dt_contig) {
+        write_from_buf = req->dev.user_buf;
+    } else {
+        /* see MPIDI_CH3_EagerNoncontigSend (in ch3u_eager.c) */
+        req->dev.segment_ptr = MPID_Segment_alloc( );
+        MPIU_ERR_CHKANDJUMP((req->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**outofmemory");
+        
+        MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype, req->dev.segment_ptr, 0);
+        req->dev.segment_first = 0;
+        req->dev.segment_size = data_sz;
+
+        MPIDI_msg_sz_t last;
+        last = req->dev.segment_size; /* segment_size is byte offset */
+        MPIU_Assert(last > 0);
+        REQ_FIELD(req, lmt_pack_buf) = MPIU_Malloc((size_t)req->dev.segment_size);
+        MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER, "**outofmemory");
+        MPID_Segment_pack(req->dev.segment_ptr, req->dev.segment_first, &last, (char *)(REQ_FIELD(req, lmt_pack_buf)));
+        MPIU_Assert(last == req->dev.segment_size);
+        write_from_buf = REQ_FIELD(req, lmt_pack_buf);
+    }
+    dprintf("lmt_initate_lmt,dt_contig=%d,write_from_buf=%p,req->dev.user_buf=%p,REQ_FIELD(req, lmt_pack_buf)=%p\n", dt_contig, write_from_buf, req->dev.user_buf, REQ_FIELD(req, lmt_pack_buf));
+
+#ifdef DCFA
+#else
+    s_cookie_buf->addr = write_from_buf; 
+#endif
+    /* put sz, see MPID_nem_lmt_RndvSend (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c) */
+    /* TODO remove sz field 
+    /* pkt_RTS_handler (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c) 
+         rreq->ch.lmt_data_sz = rts_pkt->data_sz; */
+    //s_cookie_buf->sz = (uint32_t)((MPID_nem_pkt_lmt_rts_t*)rts_pkt)->data_sz;
+
+    /* preserve and put tail, because tail magic is written on the tail of payload 
+       because we don't want to add another SGE or RDMA command */
+    MPIU_Assert(((MPID_nem_pkt_lmt_rts_t*)rts_pkt)->data_sz == data_sz);
+    s_cookie_buf->tail = *((uint8_t*)(write_from_buf + data_sz - sizeof(uint8_t)));
+    /* prepare magic */
+    //*((uint32_t*)(write_from_buf + data_sz - sizeof(tailmagic_t))) = IBCOM_MAGIC;
+
+#if 1 /* embed RDMA-write-to buffer occupancy information */
+    dprintf("lmt_initiate_lmt,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
+    /* embed RDMA-write-to buffer occupancy information */
+    s_cookie_buf->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail;
+
+    /* remember the last one sent */
+    vc_dcfa->ibcom->rsr_seq_num_tail_last_sent = vc_dcfa->ibcom->rsr_seq_num_tail;
+#endif
+
+    /* put IB rkey */
+    struct ibv_mr *mr = ibcom_reg_mr_fetch(write_from_buf, data_sz);
+    MPIU_ERR_CHKANDJUMP(!mr, mpi_errno, MPI_ERR_OTHER, "**ibcom_reg_mr_fetch");
+#ifdef DCFA
+    s_cookie_buf->addr = (void*)mr->host_addr; 
+    dprintf("lmt_initiate_lmt,s_cookie_buf->addr=%p\n", s_cookie_buf->addr);
+#endif
+    s_cookie_buf->rkey = mr->rkey;
+    dprintf("lmt_initiate_lmt,tail=%02x,mem-tail=%p,%02x,sz=%ld,raddr=%p,rkey=%08x\n", s_cookie_buf->tail, write_from_buf + data_sz - sizeof(uint8_t), *((uint8_t*)(write_from_buf + data_sz - sizeof(uint8_t))), data_sz, s_cookie_buf->addr, s_cookie_buf->rkey);
+    /* send cookie. rts_pkt as the MPI-header, s_cookie_buf as the payload */
+    MPID_nem_lmt_send_RTS(vc, (MPID_nem_pkt_lmt_rts_t*)rts_pkt, s_cookie_buf, sizeof(MPID_nem_dcfa_lmt_cookie_t));
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_INITIATE_LMT);
+    return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
+/* essential lrecv part extracted for dequeueing and issue from sendq */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_lmt_start_recv_core
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_lmt_start_recv_core(struct MPID_Request *req, void* raddr, uint32_t rkey, void* write_to_buf) 
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    struct MPIDI_VC *vc = req->ch.vc;
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_LMT_START_RECV_CORE);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_LMT_START_RECV_CORE);
+
+    ibcom_errno = ibcom_lrecv(vc_dcfa->sc->fd, (uint64_t)req, raddr, req->ch.lmt_data_sz, rkey, write_to_buf);
+    MPID_nem_dcfa_ncqe += 1;
+    //dprintf("start_recv,ncqe=%d\n", MPID_nem_dcfa_ncqe);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_lrecv");    
+    dprintf("lmt_start_recv_core,MPID_nem_dcfa_ncqe=%d\n", MPID_nem_dcfa_ncqe);
+    dprintf("lmt_start_recv_core,req=%p,sz=%ld,write_to_buf=%p,lmt_pack_buf=%p,user_buf=%p,raddr=%p,rkey=%08x,tail=%p=%02x\n", req, req->ch.lmt_data_sz, write_to_buf, REQ_FIELD(req, lmt_pack_buf), req->dev.user_buf, raddr, rkey, write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t), *((uint8_t*)(write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t))));
+    
+#ifdef LMT_GET_CQE
+    MPID_nem_dcfa_ncqe_to_drain += 1; /* use CQE instead of polling */
+#else
+    /* drain_scq and dcfa_poll is not ordered, so both can decrement ref_count */
+    MPIR_Request_add_ref(req);
+    
+    /* register to poll list in dcfa_poll() */
+    /* don't use req->dev.next because it causes unknown problem */
+    MPID_nem_dcfa_lmtq_enqueue(&MPID_nem_dcfa_lmtq, req);
+    dprintf("lmt_start_recv_core,lmtq enqueue\n");
+    //volatile uint8_t* tailmagic = (uint8_t*)((void*)req->dev.user_buf + req->ch.lmt_data_sz - sizeof(uint8_t));
+    //dprintf("start_recv_core,cur_tail=%02x,lmt_receiver_tail=%02x\n", *tailmagic, REQ_FIELD(req, lmt_receiver_tail));
+#endif
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_START_RECV_CORE);
+    return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
+/* Get protocol: (1) sender sends rts to receiver (2) receiver RDMA-reads (here)
+   (3) receiver polls on end-flag (4) receiver sends done to sender 
+   caller: (in mpid_nem_lmt.c)
+*/
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_lmt_start_recv
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_lmt_start_recv(struct MPIDI_VC *vc, struct MPID_Request *req, MPID_IOV s_cookie) 
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    int dt_contig;
+    MPIDI_msg_sz_t data_sz; 
+    MPID_Datatype * dt_ptr;
+    MPI_Aint dt_true_lb;
+    MPIDI_CH3I_VC *vc_ch = VC_CH(vc);
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_LMT_START_RECV);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_LMT_START_RECV);
+
+    dprintf("lmt_start_recv,enter,%d<-%d,req=%p\n", MPID_nem_dcfa_myrank, vc->pg_rank, req);
+
+    /* obtain dt_true_lb */
+    /* see MPIDI_Datatype_get_info(in, in, out, out, out, out) (in src/mpid/ch3/include/mpidimpl.h) */
+    MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
+
+    MPID_nem_dcfa_lmt_cookie_t* s_cookie_buf = s_cookie.iov_base;
+
+    /* stash vc for dcfa_poll */
+    req->ch.vc = vc;
+
+    void* write_to_buf;
+    if(dt_contig) {
+        write_to_buf = (void*)((char *)req->dev.user_buf /*+ REQ_FIELD(req, lmt_dt_true_lb)*/); 
+    } else {
+        //REQ_FIELD(req, lmt_pack_buf) = MPIU_Malloc((size_t)req->ch.lmt_data_sz);
+        REQ_FIELD(req, lmt_pack_buf) = MPID_nem_dcfa_stmalloc((size_t)req->ch.lmt_data_sz);
+        MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER, "**outofmemory");
+        write_to_buf = REQ_FIELD(req, lmt_pack_buf);
+    }
+
+#ifdef LMT_GET_CQE
+#else
+    /* unmark magic */
+    *((uint8_t*)(write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t))) = ~s_cookie_buf->tail; /* size in cookie was not set */
+#endif
+    dprintf("lmt_start_recv,dt_contig=%d,write_to_buf=%p,req->dev.user_buf=%p,REQ_FIELD(req, lmt_pack_buf)=%p,marked-tail=%02x,unmarked-tail=%02x\n", dt_contig, write_to_buf, req->dev.user_buf, REQ_FIELD(req, lmt_pack_buf), s_cookie_buf->tail, *((uint8_t*)(write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t))));
+
+    /* stash tail for poll because do_cts in mpid_nem_lmt.c free s_cookie_buf just after this function */
+    REQ_FIELD(req, lmt_tail) = s_cookie_buf->tail;
+    dprintf("lmt_start_recv,mem-tail=%p,%02x\n", write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t), *((uint32_t*)(write_to_buf + req->ch.lmt_data_sz - sizeof(uint8_t))));
+
+    //dprintf("lmt_start_recv,sendq_empty=%d,ncom=%d,ncqe=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY, MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY);
+        
+    /* try to issue RDMA-read command */
+    int slack = 1; /* slack for control packet bringing sequence number */
+        if(MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq) && vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY - slack && MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY - slack) {
+            mpi_errno = MPID_nem_dcfa_lmt_start_recv_core(req, s_cookie_buf->addr, s_cookie_buf->rkey, write_to_buf); /* fast path not storing raddr and rkey */
+            if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+        } else {
+            /* enqueue command into send_queue */
+            dprintf("lmt_start_recv, enqueuing,sendq_empty=%d,ncom=%d,ncqe=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY, MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY);
+            
+            /* make raddr, (sz is in rreq->ch.lmt_data_sz), rkey, (user_buf is in req->dev.user_buf) survive enqueue, free cookie, dequeue */
+            REQ_FIELD(req, lmt_raddr) = s_cookie_buf->addr;
+            REQ_FIELD(req, lmt_rkey) = s_cookie_buf->rkey;
+            REQ_FIELD(req, lmt_write_to_buf) = write_to_buf;
+            
+            MPID_nem_dcfa_sendq_enqueue(&vc_dcfa->sendq, req);
+        }
+
+    /* extract embeded RDMA-write-to buffer occupancy information */
+        dprintf("lmt_start_recv,old lsr_seq_num=%d,s_cookie_buf->seq_num_tail=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail, s_cookie_buf->seq_num_tail);
+        vc_dcfa->ibcom->lsr_seq_num_tail = DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, s_cookie_buf->seq_num_tail);
+        //dprintf("lmt_start_recv,new lsr_seq_num=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail);
+
+#ifndef DISABLE_VAR_OCC_NOTIFY_RATE
+        /* change remote notification policy of RDMA-write-to buf */
+        //dprintf("lmt_start_recv,reply_seq_num,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+        MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa,& vc_dcfa->ibcom->lsr_seq_num_tail);
+        //dprintf("lmt_start_recv,reply_seq_num,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+#endif
+        //dprintf("lmt_start_recv,reply_seq_num,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+        /* try to send from sendq because at least one RDMA-write-to buffer has been released */
+        //dprintf("lmt_start_recv,reply_seq_num,send_progress\n");
+        if(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) {
+            dprintf("lmt_start_recv,ncom=%d,ncqe=%d,diff=%d\n", vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY, MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG);
+        }
+        if(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq) && MPID_nem_dcfa_sendq_ready_to_send_head(vc_dcfa)) {
+            dprintf("lmt_start_recv,send_progress\n");fflush(stdout);
+            MPID_nem_dcfa_send_progress(vc_dcfa); 
+        }
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_START_RECV);
+    return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
+/* fall-back to lmt-get if end-flag of send-buf has the same value as the end-flag of recv-buf */ 
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_lmt_switch_send
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_lmt_switch_send(struct MPIDI_VC *vc, struct MPID_Request *req) {
+    int mpi_errno = MPI_SUCCESS;
+    int dt_contig;
+    MPIDI_msg_sz_t data_sz; 
+    MPID_Datatype * dt_ptr;
+    MPI_Aint dt_true_lb;
+    MPID_IOV r_cookie = req->ch.lmt_tmp_cookie;
+    MPID_nem_dcfa_lmt_cookie_t* r_cookie_buf = r_cookie.iov_base;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_LMT_SWITCH_SEND);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_LMT_SWITCH_SEND);
+
+    MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
+    
+    void* write_from_buf; 
+    if(dt_contig) {
+        write_from_buf = req->dev.user_buf;
+    } else {
+        /* see MPIDI_CH3_EagerNoncontigSend (in ch3u_eager.c) */
+        req->dev.segment_ptr = MPID_Segment_alloc( );
+        MPIU_ERR_CHKANDJUMP((req->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**outofmemory");
+        
+        MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype, req->dev.segment_ptr, 0);
+        req->dev.segment_first = 0;
+        req->dev.segment_size = data_sz;
+
+        MPIDI_msg_sz_t last;
+        last = req->dev.segment_size; /* segment_size is byte offset */
+        MPIU_Assert(last > 0);
+
+        REQ_FIELD(req, lmt_pack_buf) = MPIU_Malloc(data_sz);
+        MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER, "**outofmemory");
+
+        MPID_Segment_pack(req->dev.segment_ptr, req->dev.segment_first, &last, (char *)(REQ_FIELD(req, lmt_pack_buf)));
+        MPIU_Assert(last == req->dev.segment_size);
+
+        write_from_buf = REQ_FIELD(req, lmt_pack_buf);
+    }
+
+    //assert(dt_true_lb == 0);
+    uint8_t* tailp = (uint8_t*)(write_from_buf /*+ dt_true_lb*/ + data_sz - sizeof(uint8_t));
+#if 0
+    *is_end_flag_same = (r_cookie_buf->tail == *tailp) ? 1 : 0;
+#else
+    REQ_FIELD(req, lmt_receiver_tail) = r_cookie_buf->tail;
+    REQ_FIELD(req, lmt_sender_tail) = *tailp;
+    dprintf("lmt_switch_send,tail on sender=%02x,tail onreceiver=%02x,req=%p\n", *tailp, r_cookie_buf->tail, req);
+    uint8_t* tail_wordp = (uint8_t*)(write_from_buf + data_sz - sizeof(uint32_t) * 2);
+    dprintf("lmt_switch_send,tail on sender=%d\n", *tail_wordp);fflush(stdout);
+#endif
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_SWITCH_SEND);
+    return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
+/* when cookie is received in the middle of the lmt */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_lmt_handle_cookie
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_lmt_handle_cookie(struct MPIDI_VC *vc, struct MPID_Request *req, MPID_IOV cookie)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_LMT_HANDLE_COOKIE);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_LMT_HANDLE_COOKIE);
+
+    dprintf("lmt_handle_cookie,enter\n");
+
+    /* Nothing to do */
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_HANDLE_COOKIE);
+    return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
+/* when sender receives DONE from receiver */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_lmt_done_send
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_lmt_done_send(struct MPIDI_VC *vc, struct MPID_Request *req)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_LMT_DONE_SEND);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_LMT_DONE_SEND);
+
+    dprintf("lmt_done_send,enter,%d<-%d,req=%p,REQ_FIELD(req, lmt_pack_buf)=%p\n", MPID_nem_dcfa_myrank, vc->pg_rank, req, REQ_FIELD(req, lmt_pack_buf));
+
+
+    /* free memory area for cookie */
+    if(!req->ch.s_cookie) {
+        dprintf("lmt_done_send,enter,req->ch.s_cookie is zero");
+    }
+    MPIU_Free(req->ch.s_cookie);
+    //dprintf("lmt_done_send,free cookie,%p\n", req->ch.s_cookie);
+    
+#ifdef LMT_PRED
+        if(MPID_rndv_pred_nlearn == 1 || MPID_rndv_pred_npractice > 1) {
+            if(req->dev.rndv_pred_decision == MPIDI_CH3_RNDV_SEND_RTS) {
+                MPID_rndv_pred_hit++;
+                MPID_rndv_pred_count++;
+            } 
+        }
+        if(MPID_rndv_pred_nlearn > 1) {
+            MPID_hist = (MPID_hist << 1) | 1;
+            pht_update((uint64_t)buf, MPID_hist, 1);
+            MPID_rndv_pred_nlearn++;
+            if(MPID_rndv_pred_nlearn > MPID_RNDV_PRED_MAXLEARN) {
+                MPID_rndv_pred_nlearn = 0;
+                MPID_rndv_pred_npractice = 1;
+            }
+        } 
+        if(MPID_rndv_pred_nlearn == 1) { MPID_rndv_pred_nlearn++; }
+#endif
+
+
+    /* free temporal buffer for eager-send non-contiguous data.
+       MPIDI_CH3U_Recvq_FDU_or_AEP (in mpid_isend.c) sets req->dev.datatype */
+    int is_contig;
+    MPID_Datatype_is_contig(req->dev.datatype, &is_contig);
+    if(!is_contig && REQ_FIELD(req, lmt_pack_buf)) {
+        dprintf("lmt_done_send,lmt-get,non-contiguous,free lmt_pack_buf\n");
+#if 1 /* debug, enable again later */
+        MPIU_Free(REQ_FIELD(req, lmt_pack_buf));       
+#endif
+    }
+
+    /* mark completion on sreq */
+    MPIU_ERR_CHKANDJUMP(req->dev.OnDataAvail, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_lmt_done_send");
+    dprintf("lmt_done_send,1,req=%p,pcc=%d\n", req, MPIDI_CH3I_progress_completion_count.v);
+    MPIDI_CH3U_Request_complete(req);
+    dprintf("lmt_done_send,2,req=%p,pcc=%d\n", req, MPIDI_CH3I_progress_completion_count.v);
+    //dprintf("lmt_done_send, mark completion on sreq\n");
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_DONE_SEND);
+    return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
+/* lmt-put (1) sender sends done when finding cqe of put (2) packet-handler of DONE on receiver (3) here */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_lmt_done_recv
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_lmt_done_recv(struct MPIDI_VC *vc, struct MPID_Request *rreq)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_CH3I_VC *vc_ch = VC_CH(vc);
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_LMT_DONE_RECV);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_LMT_DONE_RECV);
+
+    dprintf("lmt_done_recv,enter,rreq=%p,head=%p\n", rreq, MPID_nem_dcfa_lmtq.head);
+
+
+    int is_contig;
+    MPID_Datatype_is_contig(rreq->dev.datatype, &is_contig);
+    if(!is_contig) {
+        dprintf("lmt_done_recv,copying noncontiguous data to user buffer\n");
+        
+        /* see MPIDI_CH3U_Request_unpack_uebuf (in /src/mpid/ch3/src/ch3u_request.c) */
+        /* or MPIDI_CH3U_Receive_data_found (in src/mpid/ch3/src/ch3u_handle_recv_pkt.c) */
+        MPIDI_msg_sz_t unpack_sz = rreq->ch.lmt_data_sz;
+        MPID_Segment seg;
+        MPI_Aint last;
+        
+        MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, &seg, 0);
+        last = unpack_sz;
+        MPID_Segment_unpack(&seg, 0, &last, REQ_FIELD(rreq, lmt_pack_buf));
+        if (last != unpack_sz) {
+            /* --BEGIN ERROR HANDLING-- */
+            /* received data was not entirely consumed by unpack() 
+               because too few bytes remained to fill the next basic
+               datatype */
+            rreq->status.count = (int)last;
+            rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**MPID_nem_dcfa_lmt_done_recv", 0);
+            /* --END ERROR HANDLING-- */
+        }
+        
+        //MPIU_Free(REQ_FIELD(rreq, lmt_pack_buf));
+        MPID_nem_dcfa_stfree(REQ_FIELD(rreq, lmt_pack_buf), (size_t)rreq->ch.lmt_data_sz);
+    }
+
+    dprintf("lmt_done_recv,1,req=%p,pcc=%d\n", rreq, MPIDI_CH3I_progress_completion_count.v);
+    MPIDI_CH3U_Request_complete(rreq);
+    dprintf("lmt_done_recv,2,pcc=%d\n", MPIDI_CH3I_progress_completion_count.v);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_DONE_RECV);
+    return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_lmt_vc_terminated
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_lmt_vc_terminated(struct MPIDI_VC *vc)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_LMT_VC_TERMINATED);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_LMT_VC_TERMINATED);
+
+    dprintf("lmt_vc_terminated,enter\n");
+
+    /* Nothing to do */
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_LMT_VC_TERMINATED);
+    return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
new file mode 100644
index 0000000..8db10f7
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
@@ -0,0 +1,1813 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2012 NEC Corporation
+ *      Author: Masamichi Takagi
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "dcfa_impl.h"
+
+//#define DEBUG_DCFA_POLL
+#ifdef dprintf /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
+#undef dprintf
+#endif
+#ifdef DEBUG_DCFA_POLL
+#define dprintf printf
+#else
+#define dprintf(...)
+#endif
+
+static int entered_drain_scq = 0;
+
+#if 0
+#define MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET MPID_nem_dcfa_send_progress(vc_dcfa);
+#else
+#define MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET {     \
+   int i; \
+   for(i = 0; i < MPID_nem_dcfa_npollingset; i++) { \
+       MPIDI_VC_t *vc = MPID_nem_dcfa_pollingset[i]; \
+       MPID_nem_dcfa_vc_area *_vc_dcfa = VC_DCFA(vc); \
+       /*MPID_nem_dcfa_debug_current_vc_dcfa = vc_dcfa;*/   \
+       MPID_nem_dcfa_send_progress(_vc_dcfa); \
+   } \
+}
+#endif
+#if 1
+#define MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS \
+    if(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq) && MPID_nem_dcfa_sendq_ready_to_send_head(vc_dcfa)) { \
+    MPID_nem_dcfa_send_progress(vc_dcfa); \
+}
+#else
+#define MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET
+#endif
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_drain_scq
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_drain_scq(int dont_call_progress) {
+
+    int mpi_errno = MPI_SUCCESS;
+    int result;
+    int i;
+    struct ibv_wc cqe[IBCOM_MAX_CQ_HEIGHT_DRAIN];
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ);
+
+    /* prevent a call path drain_scq -> send_progress -> drain_scq */
+    if(entered_drain_scq) { printf("drain_scq,re-enter\n"); goto fn_exit; }
+    entered_drain_scq = 1;
+
+#if 0 /*def DCFA*/
+    result = ibv_poll_cq(rc_shared_scq, 1, &cqe[0]);
+#else
+    result = ibv_poll_cq(rc_shared_scq, /*3*/IBCOM_MAX_CQ_HEIGHT_DRAIN, &cqe[0]);
+#endif
+
+    MPIU_ERR_CHKANDJUMP(result < 0, mpi_errno, MPI_ERR_OTHER, "**netmod,dcfa,ibv_poll_cq");
+    
+    if(result > 0) {
+        dprintf("poll,scq,result=%d\n", result); 
+    }
+    for(i = 0; i < result; i++)  {
+        dprintf("drain_scq,i=%d\n", i);
+        
+        MPID_Request *req;
+        MPID_Request_kind_t kind;
+        int req_type, msg_type;
+        
+        /* Obtain sreq */
+        req = (MPID_Request*)cqe[i].wr_id;
+
+        kind = req->kind;
+        req_type = MPIDI_Request_get_type(req);
+        msg_type = MPIDI_Request_get_msg_type(req);
+
+        dprintf("drain_scq,req=%p,req->ref_count=%d,cc_ptr=%d\n", req, req->ref_count, *req->cc_ptr);
+        if(req->ref_count <= 0) {
+            MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(req->ch.vc);
+            printf("%d\n", *(int*)0); 
+        }
+
+#ifdef DCFA
+        if(cqe[i].status != IBV_WC_SUCCESS) { 
+            dprintf("drain_scq,kind=%d,req_type=%d,msg_type=%d,cqe.status=%08x\n", kind, req_type, msg_type, cqe[i].status); 
+        }
+#else
+        if(cqe[i].status != IBV_WC_SUCCESS) { 
+            dprintf("drain_scq,kind=%d,req_type=%d,msg_type=%d,comm=%p,cqe.status=%08x,%s\n", kind, req_type, msg_type, req->comm, cqe[i].status, ibv_wc_status_str(cqe[i].status)); 
+        }
+#endif
+        MPIU_ERR_CHKANDJUMP(cqe[i].status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
+
+        /* 
+           packets generated by MPIDI_CH3_iStartMsgv has req_type of RECV
+           lmt_initiate_lmt, lmt_put_cts_to_sender, lmt_put_rts_to_receiver, lmt_send_put_done
+        */
+        if(
+           //req_type == MPIDI_REQUEST_TYPE_SEND 
+           (req_type == MPIDI_REQUEST_TYPE_SEND ||
+            req_type == MPIDI_REQUEST_TYPE_RECV ||
+            req_type == MPIDI_REQUEST_TYPE_SSEND)
+           && msg_type == MPIDI_REQUEST_EAGER_MSG) {
+            dprintf("drain_scq,send/recv,eager,req_type=%d,,comm=%p,opcode=%d\n", req_type, req->comm, cqe[i].opcode);
+            
+            MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(req->ch.vc);
+            dprintf("drain_scq,MPIDI_REQUEST_EAGER_MSG,%d->%d,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_myrank, req->ch.vc->pg_rank, MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail)); /* moved before MPID_Request_release because this references req->ch.vc */
+
+            /* free temporal buffer for eager-send non-contiguous data.
+               MPIDI_Request_create_sreq (in mpid_isend.c) sets req->dev.datatype 
+               control message has a req_type of MPIDI_REQUEST_TYPE_RECV and 
+               msg_type of MPIDI_REQUEST_EAGER_MSG because
+               control message send follows 
+               MPIDI_CH3_iStartMsg/v-->MPID_nem_dcfa_iStartContigMsg-->MPID_nem_dcfa_iSendContig
+               and MPID_nem_dcfa_iSendContig set req->dev.state to zero.
+               see MPID_Request_create (in src/mpid/ch3/src/ch3u_request.c) 
+               eager-short message has req->comm of zero 
+            */
+            if(req_type == MPIDI_REQUEST_TYPE_SEND && req->comm) {
+                /* exclude control messages by requiring MPIDI_REQUEST_TYPE_SEND
+                   exclude eager-short by requiring req->comm != 0 */
+                int is_contig;
+                MPID_Datatype_is_contig(req->dev.datatype, &is_contig);
+                if(!is_contig && REQ_FIELD(req, lmt_pack_buf)) {
+                    dprintf("drain_scq,eager-send,non-contiguous,free lmt_pack_buf=%p\n", REQ_FIELD(req, lmt_pack_buf));
+                    MPIU_Free(REQ_FIELD(req, lmt_pack_buf));       
+                }
+            }
+
+            /* decrement the number of entries in IB command queue */
+            vc_dcfa->ibcom->ncom -= 1;
+            MPID_nem_dcfa_ncqe -= 1;
+            dprintf("drain_scq,eager-send,ncqe=%d\n", MPID_nem_dcfa_ncqe);
+            MPIU_Assert(req->ref_count == 1 || req->ref_count == 2);
+
+            /* ref_count is decremented in drain_scq and wait */
+            if (*req->cc_ptr > 0) {
+                dprintf("drain_scq,MPID_nem_dcfa_ncqe_nces=%d,cc_ptr=%d,pending_sends=%d\n", MPID_nem_dcfa_ncqe_nces, *req->cc_ptr, VC_FIELD(req->ch.vc, pending_sends));
+                MPID_nem_dcfa_ncqe_nces -= 1;
+
+            int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
+            
+            (VC_FIELD(req->ch.vc, pending_sends)) -= 1;
+            
+            /* as in the template */
+            reqFn = req->dev.OnDataAvail;
+            if (!reqFn){
+                MPIDI_CH3U_Request_complete(req);
+                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
+                dprintf("drain_scq,complete,req=%p,pcc incremented to %d\n", req, MPIDI_CH3I_progress_completion_count.v);
+            } else {
+                dprintf("drain_scq,reqFn isn't zero\n");
+                MPIDI_VC_t *vc = req->ch.vc;
+                int complete = 0;
+                mpi_errno = reqFn(vc, req, &complete);
+                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                /* not-completed case is not implemented */
+                MPIU_Assert(complete == TRUE);
+            }
+            } else {
+            MPID_Request_release(req); 
+            }
+            /* try to send from sendq */
+            //dprintf("dcfa_poll,SCQ,!lmt,send_progress\n");
+            if(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) {
+                dprintf("drain_scq,eager-send,ncom=%d,ncqe=%d,diff=%d\n", vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY, MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG);
+                
+                MPID_Request *sreq = MPID_nem_dcfa_sendq_head(vc_dcfa->sendq);
+                int msg_type = MPIDI_Request_get_msg_type(sreq);
+                
+                if(sreq->kind == MPID_REQUEST_SEND && msg_type == MPIDI_REQUEST_EAGER_MSG) {
+                    dprintf("drain_scq,eager-send,head is eager-send\n");
+                } else if(sreq->kind == MPID_REQUEST_RECV && msg_type == MPIDI_REQUEST_RNDV_MSG) {
+                    dprintf("drain_scq,eager-send,head is lmt RDMA-read\n");
+                } else if(sreq->kind == MPID_REQUEST_SEND && msg_type == MPIDI_REQUEST_RNDV_MSG) {
+                    dprintf("drain_scq,eager-send,head is lmt RDMA-write\n");
+                }
+            }
+            /*  call MPID_nem_dcfa_send_progress for all VCs in polling-set
+                instead of VC which releases CQ, command
+                when releasing them
+                because commands for VC-A are blocked by the command
+                for VC-B and waiting in the sendq
+            */
+                dprintf("drain_scq,eager-send,send_progress\n");
+                //MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET;
+
+            dprintf("drain_scq,eager-send,next\n");
+
+        } else if(req_type == MPIDI_REQUEST_TYPE_GET_RESP && msg_type == MPIDI_REQUEST_EAGER_MSG) {
+            dprintf("drain_scq,GET_RESP,eager,req_type=%d,,comm=%p,opcode=%d\n", req_type, req->comm, cqe[i].opcode);
+            
+            MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(req->ch.vc);
+            dprintf("drain_scq,MPIDI_REQUEST_EAGER_MSG,%d->%d,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_myrank, req->ch.vc->pg_rank, MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail)); /* moved before MPID_Request_release because this references req->ch.vc */
+
+            /* decrement the number of entries in IB command queue */
+            vc_dcfa->ibcom->ncom -= 1;
+            MPID_nem_dcfa_ncqe -= 1;
+            dprintf("drain_scq,GET_RESP,ncqe=%d\n", MPID_nem_dcfa_ncqe);
+            MPIU_Assert(req->ref_count == 1 || req->ref_count == 2);
+
+            /* ref_count is decremented in drain_scq and wait */
+            dprintf("drain_scq,MPID_nem_dcfa_ncqe_nces=%d,cc_ptr=%d,pending_sends=%d\n", MPID_nem_dcfa_ncqe_nces, *req->cc_ptr, VC_FIELD(req->ch.vc, pending_sends));
+            MPID_nem_dcfa_ncqe_nces -= 1;
+
+            int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
+            
+            (VC_FIELD(req->ch.vc, pending_sends)) -= 1;
+            
+            /* as in the template */
+            reqFn = req->dev.OnDataAvail;
+            if (!reqFn){
+                MPIDI_CH3U_Request_complete(req);
+                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
+                dprintf("drain_scq,complete,req=%p,pcc incremented to %d\n", req, MPIDI_CH3I_progress_completion_count.v);
+            } else {
+                dprintf("drain_scq,reqFn isn't zero\n");
+                dprintf("drain_scq,GET_RESP,before dev.OnDataAvail,ref_count=%d\n", req->ref_count);
+                MPIDI_VC_t *vc = req->ch.vc;
+                int complete = 0;
+                mpi_errno = reqFn(vc, req, &complete);
+                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                /* not-completed case is not implemented */
+                MPIU_Assert(complete == TRUE);
+            }
+                
+            //MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET;
+
+            dprintf("drain_scq,GET_RESP,next\n");
+
+        } else if(req_type == MPIDI_REQUEST_TYPE_RECV && msg_type == MPIDI_REQUEST_RNDV_MSG && cqe[i].opcode == IBV_WC_RDMA_READ) {
+            /* lmt get */
+            /* the case for lmt-put-done or lmt-put where 
+               (1) sender finds end-flag won't change (2) sender sends RTS to receiver
+               (3) receiver gets (4) here 
+               is distinguished by cqe[i].opcode
+            */
+            dprintf("drain_scq,recv,rndv,rdma-read,kind=%d,opcode=%d\n", kind, cqe[i].opcode);
+
+
+           MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(req->ch.vc);
+#if defined(LMT_GET_CQE)
+
+           /* unpack non-contiguous dt */
+           int is_contig;
+           MPID_Datatype_is_contig(req->dev.datatype, &is_contig);
+           if(!is_contig) {
+               dprintf("drain_scq,lmt,GET_CQE,unpack noncontiguous data to user buffer\n");
+
+               /* see MPIDI_CH3U_Request_unpack_uebuf (in /src/mpid/ch3/src/ch3u_request.c) */
+               /* or MPIDI_CH3U_Receive_data_found (in src/mpid/ch3/src/ch3u_handle_recv_pkt.c) */
+               MPIDI_msg_sz_t unpack_sz = req->ch.lmt_data_sz;
+               MPID_Segment seg;
+               MPI_Aint last;
+
+               MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype, &seg, 0);
+               last = unpack_sz;
+               MPID_Segment_unpack(&seg, 0, &last, REQ_FIELD(req, lmt_pack_buf));
+               if (last != unpack_sz) {
+                   /* --BEGIN ERROR HANDLING-- */
+                   /* received data was not entirely consumed by unpack() 
+                      because too few bytes remained to fill the next basic
+                      datatype */
+                   req->status.count = (int)last;
+                   req->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**MPID_nem_dcfa_poll", 0);
+                   /* --END ERROR HANDLING-- */
+               }
+               dprintf("drain_scq,lmt,GET_CQE,ref_count=%d,lmt_pack_buf=%p\n", req->ref_count, REQ_FIELD(req, lmt_pack_buf));
+               MPID_nem_dcfa_stfree(REQ_FIELD(req, lmt_pack_buf), (size_t)req->ch.lmt_data_sz);
+           }
+           dprintf("drain_scq,lmt,GET_CQE,lmt_send_GET_DONE,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
+
+           /* send done to sender. vc is stashed in MPID_nem_dcfa_lmt_start_recv (in dcfa_lmt.c) */
+           MPID_nem_dcfa_lmt_send_GET_DONE(req->ch.vc, req);
+#endif
+            /* unmark "lmt is going on" */
+            
+            //dprintf("dcfa_poll,SCQ,lmt,%d->%d,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_myrank, req->ch.vc->pg_rank, MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail)); /* moved before MPID_Request_release because this references req->ch.vc */
+
+            /* decrement the number of entries in IB command queue */
+            vc_dcfa->ibcom->ncom -= 1;
+            MPID_nem_dcfa_ncqe -= 1;
+            dprintf("drain_scq,rdma-read,ncqe=%d\n", MPID_nem_dcfa_ncqe);
+
+#ifdef LMT_GET_CQE
+            dprintf("drain_scq,GET_CQE,Request_complete\n");
+            /* mark completion on rreq */
+            MPIDI_CH3U_Request_complete(req);
+#else /* GET, and !GET_CQE */
+
+            int is_contig;
+            MPID_Datatype_is_contig(req->dev.datatype, &is_contig);
+            if(!is_contig) {
+                //if(req->ref_count == 1) {
+                dprintf("drain_scq,GET&&!GET_CQE,ref_count=%d,lmt_pack_buf=%p\n", req->ref_count, REQ_FIELD(req, lmt_pack_buf));
+                    /* debug, polling waits forever when freeing here. */
+                    //free(REQ_FIELD(req, lmt_pack_buf));
+                    //MPID_nem_dcfa_stfree(REQ_FIELD(req, lmt_pack_buf), (size_t)req->ch.lmt_data_sz);
+                    //dprintf("drain_scq,lmt,insert to free-list=%p\n", MPID_nem_dcfa_fl);
+                    //} else {
+                    //dprintf("drain_scq,GET&&!GET_CQE,ref_count=%d,lmt_pack_buf=%p\n", req->ref_count, REQ_FIELD(req, lmt_pack_buf));
+                    //}
+            }
+
+           /* lmt_start_recv increments ref_count 
+              drain_scq and dcfa_poll is not ordered, so both can decrement ref_count */
+            MPID_Request_release(req);
+#endif
+            /* try to send from sendq */
+            if(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) {
+                dprintf("drain_scq,GET,ncom=%d,ncqe=%d,diff=%d\n", vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY, MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG);
+                MPID_Request *sreq = MPID_nem_dcfa_sendq_head(vc_dcfa->sendq);
+                int msg_type = MPIDI_Request_get_msg_type(sreq);
+                
+                if(sreq->kind == MPID_REQUEST_SEND && msg_type == MPIDI_REQUEST_EAGER_MSG) {
+                    dprintf("drain_scq,eager-send,head is eager-send\n");
+                } else if(sreq->kind == MPID_REQUEST_RECV && msg_type == MPIDI_REQUEST_RNDV_MSG) {
+                    dprintf("drain_scq,eager-send,head is lmt\n");
+                }
+            }
+            //if(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq) && MPID_nem_dcfa_sendq_ready_to_send_head(vc_dcfa)) {
+                dprintf("drain_scq,GET,send_progress\n");fflush(stdout);
+                //MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET 
+                    //}
+        } else {
+            printf("drain_scq,unknown kind=%d,req_type=%d,msg_type=%d\n", kind, req_type, msg_type);
+            assert(0);
+#if 1 // lazy consulting of completion queue
+            MPIU_ERR_CHKANDJUMP(1, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
+#else
+            //printf("kind=%d\n", kind);
+#endif
+        }
+    }
+    if(!dont_call_progress) {
+        MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET;
+    }
+ fn_exit:
+    entered_drain_scq = 0;
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ);
+    return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_drain_scq_lmt_put
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_drain_scq_lmt_put() {
+
+    int mpi_errno = MPI_SUCCESS;
+    int result;
+    int i;
+    struct ibv_wc cqe[IBCOM_MAX_CQ_HEIGHT_DRAIN];
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ_LMT_PUT);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ_LMT_PUT);
+
+#if 0 /*def DCFA*/
+    result = ibv_poll_cq(rc_shared_scq_lmt_put, 1, &cqe[0]);
+#else
+    result = ibv_poll_cq(rc_shared_scq_lmt_put, IBCOM_MAX_CQ_HEIGHT_DRAIN, &cqe[0]);
+#endif
+    MPIU_ERR_CHKANDJUMP(result < 0, mpi_errno, MPI_ERR_OTHER, "**netmod,dcfa,ibv_poll_cq");
+    
+    if(result > 0) {
+        dprintf("drain_scq_lmt_put,found,result=%d\n", result); 
+    }
+    for(i = 0; i < result; i++)  {
+        
+        MPID_Request *req;
+        MPID_Request_kind_t kind;
+        int req_type, msg_type;
+        
+#ifdef DCFA
+        if(cqe[i].status != IBV_WC_SUCCESS) {
+            dprintf("drain_scq_lmt_put,status=%08x\n", cqe[i].status); 
+        }
+#else
+        if(cqe[i].status != IBV_WC_SUCCESS) {
+            dprintf("drain_scq_lmt_put,status=%08x,%s\n", cqe[i].status, ibv_wc_status_str(cqe[i].status)); 
+        }
+#endif
+        MPIU_ERR_CHKANDJUMP(cqe[i].status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq_lmt_put");
+        
+        /* Obtain sreq */
+        req = (MPID_Request*)cqe[i].wr_id;
+        dprintf("drain_scq_lmt_put,req=%p,req->ref_count=%d\n", req, req->ref_count);
+        MPIU_Assert(req->ref_count > 0);
+
+        kind = req->kind;
+        req_type = MPIDI_Request_get_type(req);
+        msg_type = MPIDI_Request_get_msg_type(req);
+        
+
+        if(req_type == MPIDI_REQUEST_TYPE_RECV && msg_type == MPIDI_REQUEST_RNDV_MSG) {
+            /* lmt-put */
+            /* MPIDI_Request_set_type is not performed when
+               MPID_Isend --> FDU_or_AEP --> recv_posted --> dcfa_poll --> PUTCTS packet-handler */
+            
+            dprintf("drain_scq_lmt_put,lmt-put found\n");
+            
+#if 0 /* moving to just after put */ /*implementing back-to-back put and done */
+#endif            
+            
+            /* decrement the number of entries in IB command queue */
+            MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(req->ch.vc);
+            vc_dcfa->ibcom->ncom_lmt_put -= 1;
+            MPID_nem_dcfa_ncqe_lmt_put -= 1;
+            dprintf("drain_scq_lmt_put,rndv,ncqe=%d\n", MPID_nem_dcfa_ncqe_lmt_put);/*suspicious*/
+            int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
+            
+            (VC_FIELD(req->ch.vc, pending_sends)) -= 1;
+            
+            /* as in the template */
+            reqFn = req->dev.OnDataAvail;
+            if (!reqFn){
+                MPIDI_CH3U_Request_complete(req); /* decrement cc, signal_completion, decrement ref_count, free */
+                dprintf("drain_scq,lmt-put,req=%p,cc incremented to %d\n", req, MPIDI_CH3I_progress_completion_count.v);
+                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
+            } else {
+                MPIDI_VC_t *vc = req->ch.vc;
+                int complete = 0;
+                mpi_errno = reqFn(vc, req, &complete);
+                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                /* not-completed case is not implemented */
+                MPIU_Assert(complete == TRUE);
+                MPIU_Assert(0); /* decrement ref_count and free sreq causes problem */
+            }
+        } else {
+            dprintf("drain_scq_lmt_put,unknown kind=%d,req_type=%d,msg_type=%d\n", kind, req_type, msg_type);
+#if 1 // lazy consulting of completion queue
+            MPIU_ERR_CHKANDJUMP(1, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq_lmt_put");
+#else
+            //printf("kind=%d\n", kind);
+#endif
+        }
+    }
+    
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ_LMT_PUT);
+    return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_drain_scq_scratch_pad
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_drain_scq_scratch_pad() {
+
+    int mpi_errno = MPI_SUCCESS;
+    int result;
+    int i;
+    struct ibv_wc cqe[IBCOM_MAX_CQ_HEIGHT_DRAIN];
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ_SCRATCH_PAD);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ_SCRATCH_PAD);
+
+#if 0 /*def DCFA*/
+    result = ibv_poll_cq(rc_shared_scq_scratch_pad, 1, &cqe[0]);
+#else
+    result = ibv_poll_cq(rc_shared_scq_scratch_pad, IBCOM_MAX_CQ_HEIGHT_DRAIN, &cqe[0]);
+#endif
+    MPIU_ERR_CHKANDJUMP(result < 0, mpi_errno, MPI_ERR_OTHER, "**netmod,dcfa,ibv_poll_cq");
+    
+    if(result > 0) {
+        dprintf("drain_scq_scratch_pad,found,result=%d\n", result); 
+    }
+    for(i = 0; i < result; i++)  {
+        
+#ifdef DCFA
+        if(cqe[i].status != IBV_WC_SUCCESS) {
+            dprintf("drain_scq_scratch_pad,status=%08x\n", cqe[i].status);
+        }
+#else
+        if(cqe[i].status != IBV_WC_SUCCESS) {
+            dprintf("drain_scq_scratch_pad,status=%08x,%s\n", cqe[i].status, ibv_wc_status_str(cqe[i].status)); 
+        }
+#endif
+        MPIU_ERR_CHKANDJUMP(cqe[i].status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq_scratch_pad");
+        
+        IbCom* ibcom_scratch_pad = (IbCom*)cqe[i].wr_id;
+        dprintf("drain_scq_scratch_pad,ibcom_scratch_pad=%p\n", ibcom_scratch_pad);
+        ibcom_scratch_pad->ncom_scratch_pad -= 1;
+        MPID_nem_dcfa_ncqe_scratch_pad -= 1;
+    }
+    
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_DRAIN_SCQ_SCRATCH_PAD);
+    return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_poll_eager
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_poll_eager(MPIDI_VC_t *vc) {
+
+   int mpi_errno = MPI_SUCCESS;   
+   int ibcom_errno;
+   int result;
+   struct ibv_wc cqe[IBCOM_MAX_CQ_HEIGHT_DRAIN];
+   uint64_t tscs, tsce;
+
+   MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_POLL_EAGER);
+   MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_POLL_EAGER);
+
+   //MPID_nem_dcfa_tsc_poll = MPID_nem_dcfa_rdtsc();
+
+       MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+       //dprintf("dcfa_poll,ld,rsr_seq_num_poll=%d\n", vc_dcfa->ibcom->rsr_seq_num_poll);
+       volatile void* buf = vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZSEG * ((uint32_t)vc_dcfa->ibcom->rsr_seq_num_poll % IBCOM_RDMABUF_NSEG);
+       volatile sz_hdrmagic_t* sz_hdrmagic = (sz_hdrmagic_t*)buf;
+       if(sz_hdrmagic->magic != IBCOM_MAGIC) { goto fn_exit; }
+       //dprintf("dcfa_poll_eager,buf=%p,sz=%d\n", buf, sz_hdrmagic->sz);
+
+       /* unmark magic */
+       sz_hdrmagic->magic = 0/*0xdead*/; 
+#if 0
+       ibcom_errno = ibcom_poll_cq(IBCOM_RC_SHARED_RCQ, &cqe, &result);
+       MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_poll_cq");
+#endif
+       dprintf("dcfa_poll_eager,eager-send,found\n");
+       
+       //MPIU_ERR_CHKANDJUMP1(cqe.status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ibcom_poll_cq", "**ibcom_poll_cq %s", ibcom_strerror(ibcom_errno));
+       
+       int sz_data_pow2;
+       DCFA_NEM_SZ_DATA_POW2(sz_hdrmagic->sz);
+       volatile tailmagic_t* tailmagic = (tailmagic_t*)(buf + sz_data_pow2);
+       dprintf("poll,sz_data_pow2=%d,tailmagic=%p,sz=%d\n", sz_data_pow2, tailmagic, sz_hdrmagic->sz);
+       int k = 0;
+       //tsce = MPID_nem_dcfa_rdtsc(); printf("9,%ld\n", tsce - tscs); // 55 for 512-byte
+       //tscs = MPID_nem_dcfa_rdtsc();
+       //#define TLB_PREF_AMT_AHEAD 20
+#ifdef TLB_PREF_AMT_AHEAD
+       int tlb_pref_ahd = (uint64_t)tailmagic + 4096 * TLB_PREF_AMT_AHEAD - (uint64_t)buf;
+#endif
+       while(tailmagic->magic != IBCOM_MAGIC) {
+           //k++; 
+#if 0 /* pre-fetch next RDMA-write-buf slot to cover TLB miss latency */
+           __asm__ __volatile__
+               ("movq %0, %%rsi;"
+                "movq 0(%%rsi), %%rsi;"
+                : : "r"(vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZSEG * ((vc_dcfa->ibcom->rsr_seq_num_poll + 1) % IBCOM_RDMABUF_NSEG)) : "%rsi");
+#endif
+#ifdef TLB_PREF_AMT_AHEAD
+           __asm__ __volatile__
+               ("movq %0, %%rsi;"
+                "movq 0(%%rsi), %%rax;"
+                : : "r"(buf + tlb_pref_ahd) : "%rsi", "%rax");
+           tlb_pref_ahd = (tlb_pref_ahd + 4096 * 20) % IBCOM_RDMABUF_SZ;
+#endif
+       }
+       //tsce = MPID_nem_dcfa_rdtsc(); printf("0,%ld\n", tsce - tscs); // 20-60 for 512-byte
+       //tscs = MPID_nem_dcfa_rdtsc();
+       //dprintf("magic wait=%d\n", k);
+       
+
+       /* this reduces memcpy in MPIDI_CH3U_Receive_data_found */
+       /* MPIDI_CH3_PktHandler_EagerSend (in ch3u_eager.c)
+            MPIDI_CH3U_Receive_data_found (in ch3u_handle_recv_pkt.c)
+              MPIU_Memcpy((char*)(rreq->dev.user_buf) + dt_true_lb, buf, data_sz);
+               600 cycle for 512B!!! --> 284 cycle with prefetch
+       */
+
+#if 1
+       void* rsi;
+       for(rsi = (void*)buf; rsi < buf + sz_hdrmagic->sz; rsi += 64*4) {
+#ifdef __MIC__
+           __asm__ __volatile__
+               (
+                "movq %0, %%rsi;"
+                "vprefetch0 0x00(%%rsi);"
+                "vprefetch0 0x40(%%rsi);"
+                "vprefetch0 0x80(%%rsi);"
+                "vprefetch0 0xc0(%%rsi);"
+                : 
+                : "r"(rsi)
+                : "%rsi"); 
+#else
+           __asm__ __volatile__
+               (
+                "movq %0, %%rsi;"
+                "prefetchnta 0x00(%%rsi);"
+                "prefetchnta 0x40(%%rsi);"
+                "prefetchnta 0x80(%%rsi);"
+                "prefetchnta 0xc0(%%rsi);"
+                : 
+                : "r"(rsi)
+                : "%rsi"); 
+#endif
+       }
+#endif
+
+       MPIDI_CH3_Pkt_eager_send_t *pkt = (MPIDI_CH3_Pkt_eager_send_t*)(buf + sizeof(sz_hdrmagic_t));
+       MPIU_Assert(sz_hdrmagic->sz >= sizeof(sz_hdrmagic_t) + sizeof(MPIDI_CH3_Pkt_t) + sizeof(tailmagic_t));
+       MPIDI_CH3_Pkt_eager_send_t *pkt2 = (MPIDI_CH3_Pkt_eager_send_t*)(buf + sizeof(sz_hdrmagic_t)+sizeof(MPID_nem_dcfa_pkt_prefix_t));
+       dprintf("handle_pkt,before,%d<-%d,id=%d,pkt->type=%d,pcc=%d,MPIDI_NEM_PKT_END=%d,pkt=%p,subtype=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, vc_dcfa->ibcom->rsr_seq_num_poll, pkt->type, MPIDI_CH3I_progress_completion_count.v, MPIDI_NEM_PKT_END, pkt, ((MPID_nem_pkt_netmod_t*)pkt)->subtype);
+       /* see MPIDI_CH3_PktHandler_EagerSend (in src/mpid/ch3/src/ch3u_eager.c) */
+       mpi_errno = MPID_nem_handle_pkt(vc, (char *)(buf + sizeof(sz_hdrmagic_t)), (MPIDI_msg_sz_t)(sz_hdrmagic->sz - sizeof(sz_hdrmagic_t) - sizeof(tailmagic_t)));
+       if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+       //tsce = MPID_nem_dcfa_rdtsc(); printf("0,%ld\n", tsce - tscs); // 512-byte, 900 cyc (1100 w/o prefetch)
+       
+       /* Update occupation status of remote SR (send request) queue */
+       /* this includes local RDMA-wr-to buf occupation
+          because MPID_nem_handle_pkt releases RDMA-wr-to buf by copying data out */
+       /* responder releases resource and then embed largest sequence number into MPI message bound to initiator */
+       //dprintf("after handle_pkt,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
+#if 1
+       dprintf("handle_pkt,after,%d<-%d,id=%d,pkt->type=%d,eagershort=%d,close=%d,rts=%d,piggy-backed-eagersend=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, vc_dcfa->ibcom->rsr_seq_num_poll, pkt->type, MPIDI_CH3_PKT_EAGERSHORT_SEND, MPIDI_CH3_PKT_CLOSE, MPIDI_NEM_PKT_LMT_RTS, MPIDI_NEM_DCFA_PKT_EAGER_SEND);
+
+       int notify_rate;
+       ibcom_errno = ibcom_rdmabuf_occupancy_notify_rate_get(MPID_nem_dcfa_conns[vc->pg_rank].fd, &notify_rate);
+       dprintf("poll_eager,sendq=%d,ncom=%d,ncqe=%d,ldiff=%d(%d-%d),rdiff=%d(%d-%d),rate=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY, MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail), vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent), vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent, notify_rate);
+
+       //dprintf("dcfa_poll,current pcc=%d\n", MPIDI_CH3I_progress_completion_count.v);
+
+       /* Don't forget to put lmt-cookie types here!! */
+       if(
+          1
+          ) 
+           { 
+           /* lmt cookie messages or control message other than eager-short */
+           
+           /* eager-send with zero-length data is released here
+              because there is no way to trace the RDMA-write-to buffer addr
+              because rreq->dev.tmpbuf is set to zero in ch3_eager.c
+           */
+           dprintf("poll_eager,released,type=%d,MPIDI_NEM_DCFA_REPLY_SEQ_NUM=%d\n", pkt->type, MPIDI_NEM_DCFA_REPLY_SEQ_NUM);
+           MPID_nem_dcfa_recv_buf_released(vc, (void*)buf + sizeof(sz_hdrmagic_t) + sizeof(MPIDI_CH3_Pkt_t));
+       } else {
+           if(sz_hdrmagic->sz == sizeof(sz_hdrmagic_t) + sizeof(MPIDI_CH3_Pkt_t) + sizeof(tailmagic_t)) { 
+               if(pkt->type == MPIDI_CH3_PKT_EAGERSHORT_SEND 
+                  //||                  pkt->type == MPIDI_CH3_PKT_GET
+) {
+               } else {
+                   printf("dcfa_poll,unknown pkt->type=%d\n", pkt->type);
+                   assert(0);
+                   MPIU_ERR_INTERNALANDJUMP(mpi_errno, "MPI header only but not released");
+               }
+           }
+       }
+#endif
+
+       vc_dcfa->ibcom->rsr_seq_num_poll += 1;
+       dprintf("dcfa_poll,inc,rsr_seq_num_poll=%d\n", vc_dcfa->ibcom->rsr_seq_num_poll);
+
+ out:
+ fn_exit:
+   MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_POLL_EAGER);
+   return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_poll
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_poll(int in_blocking_poll) {
+
+   int mpi_errno = MPI_SUCCESS;   
+   int ibcom_errno;
+   uint32_t i;
+
+   MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_POLL);
+   MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_POLL);
+
+#if 1
+   unsigned int progress_completion_count_old = MPIDI_CH3I_progress_completion_count.v;
+#endif
+
+   /* poll lmt */
+   /* when receiver side sends CTS to sender side 
+      sender receives CTS and give up sending RTS
+      sender initiates RDMA-write,
+      sender sends RTS of the next epoch,
+      to detect the end of RDMA-write first and DP the entry for CTS,
+      you should perform lmt-poll first, next eager-poll
+    */
+   MPID_Request *rreq, *prev_rreq;
+   rreq = MPID_nem_dcfa_lmtq_head(MPID_nem_dcfa_lmtq);
+   if(rreq) {
+#if defined (TIMER_WAIT_DCFA_POLL)
+   if(in_blocking_poll) { tsc[0] = MPI_rdtsc(); }
+#endif
+   // dprintf("dcfa_poll,poll lmtq\n");
+       prev_rreq = NULL;
+       do {
+           /* Obtain cookie. pkt_RTS_handler memcpy it (in mpid_nem_lmt.c) */
+           /* MPID_IOV_BUF is macro, converted into iov_base (in src/include/mpiiov.h) */
+           /* do not use s_cookie_buf because do_cts frees it */
+           //MPID_nem_dcfa_lmt_cookie_t* s_cookie_buf = (MPID_nem_dcfa_lmt_cookie_t*)rreq->ch.lmt_tmp_cookie.iov_base;
+           
+           /* Wait for completion of DMA */
+           /* do not use s_cookie_buf->sz because do_cts frees it */
+           volatile void* write_to_buf;
+           int is_contig;
+           MPID_Datatype_is_contig(rreq->dev.datatype, &is_contig);
+           if(is_contig) {
+               write_to_buf = (void*)((char *)rreq->dev.user_buf /*+ REQ_FIELD(req, lmt_dt_true_lb)*/); 
+           } else {
+               write_to_buf = REQ_FIELD(rreq, lmt_pack_buf);
+           }
+
+           //assert(REQ_FIELD(rreq, lmt_dt_true_lb) == 0);
+           volatile uint8_t* tailmagic = (uint8_t*)(write_to_buf /*+ REQ_FIELD(rreq, lmt_dt_true_lb)*/ + rreq->ch.lmt_data_sz - sizeof(uint8_t));
+           
+           uint8_t lmt_tail = REQ_FIELD(rreq, lmt_tail);
+           if(*tailmagic != REQ_FIELD(rreq, lmt_tail)) { goto next; }
+           dprintf("dcfa_poll,sz=%ld,old tail=%02x,new tail=%02x\n", rreq->ch.lmt_data_sz, REQ_FIELD(rreq, lmt_tail), *tailmagic);
+           
+           dprintf("dcfa_poll,lmt found,%d<-%d,req=%p,ref_count=%d,is_contig=%d,write_to_buf=%p,lmt_pack_buf=%p,user_buf=%p,tail=%p\n", MPID_nem_dcfa_myrank, rreq->ch.vc->pg_rank, rreq, rreq->ref_count, is_contig, write_to_buf, REQ_FIELD(rreq, lmt_pack_buf), rreq->dev.user_buf, tailmagic);
+
+           /* unpack non-contiguous dt */
+           if(!is_contig) {
+               dprintf("dcfa_poll,copying noncontiguous data to user buffer\n");
+
+               /* see MPIDI_CH3U_Request_unpack_uebuf (in /src/mpid/ch3/src/ch3u_request.c) */
+               /* or MPIDI_CH3U_Receive_data_found (in src/mpid/ch3/src/ch3u_handle_recv_pkt.c) */
+               MPIDI_msg_sz_t unpack_sz = rreq->ch.lmt_data_sz;
+               MPID_Segment seg;
+               MPI_Aint last;
+
+               MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, &seg, 0);
+               last = unpack_sz;
+               MPID_Segment_unpack(&seg, 0, &last, REQ_FIELD(rreq, lmt_pack_buf));
+               if (last != unpack_sz) {
+                   /* --BEGIN ERROR HANDLING-- */
+                   /* received data was not entirely consumed by unpack() 
+                      because too few bytes remained to fill the next basic
+                      datatype */
+                   rreq->status.count = (int)last;
+                   rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**MPID_nem_dcfa_poll", 0);
+                   /* --END ERROR HANDLING-- */
+               }
+#if 1 /* debug, enable again later, polling waits forever when freeing it here. */
+               //if(rreq->ref_count == 1) {
+               dprintf("dcfa_poll,lmt,ref_count=%d,lmt_pack_buf=%p\n", rreq->ref_count, REQ_FIELD(rreq, lmt_pack_buf));
+                   //MPIU_Free(REQ_FIELD(rreq, lmt_pack_buf));
+                   MPID_nem_dcfa_stfree(REQ_FIELD(rreq, lmt_pack_buf), (size_t)rreq->ch.lmt_data_sz);    
+                   //} else {
+                   // dprintf("dcfa_poll,lmt,ref_count=%d,lmt_pack_buf=%p\n", rreq->ref_count, REQ_FIELD(rreq, lmt_pack_buf));
+                   //}
+#endif
+           }
+
+           /* send done to sender. vc is stashed in MPID_nem_dcfa_lmt_start_recv (in dcfa_lmt.c) */
+           MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(rreq->ch.vc);
+           dprintf("dcfa_poll,GET,lmt_send_GET_DONE,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
+           MPID_nem_dcfa_lmt_send_GET_DONE(rreq->ch.vc, rreq);
+           dprintf("dcfa_poll,prev_rreq=%p,rreq->lmt_next=%p\n", prev_rreq, MPID_nem_dcfa_lmtq_next(rreq));
+
+           /* unlink rreq */
+           if(prev_rreq != NULL) {
+               MPID_nem_dcfa_lmtq_next(prev_rreq) = MPID_nem_dcfa_lmtq_next(rreq);
+           } else {
+               MPID_nem_dcfa_lmtq_head(MPID_nem_dcfa_lmtq) = MPID_nem_dcfa_lmtq_next(rreq);
+           }
+           if(MPID_nem_dcfa_lmtq_next(rreq) == NULL) { MPID_nem_dcfa_lmtq.tail = prev_rreq; }
+
+           /* save rreq->dev.next (and rreq) because decrementing reference-counter might free rreq */
+           MPID_Request *tmp_rreq = rreq;
+           rreq = MPID_nem_dcfa_lmtq_next(rreq);
+
+           /* decrement completion-counter */
+           dprintf("dcfa_poll,%d<-%d,", MPID_nem_dcfa_myrank, tmp_rreq->ch.vc->pg_rank);
+           int incomplete;
+           MPIDI_CH3U_Request_decrement_cc(tmp_rreq, &incomplete);
+           dprintf("lmt,complete,tmp_rreq=%p,rreq->ref_count=%d,comm=%p\n", tmp_rreq, tmp_rreq->ref_count, tmp_rreq->comm);
+
+           if(!incomplete) { MPIDI_CH3_Progress_signal_completion(); }
+
+           /* lmt_start_recv increments ref_count 
+              drain_scq and dcfa_poll is not ordered, so both can decrement ref_count */
+           /* ref_count is decremented
+              get-lmt: dcfa_poll, drain_scq, wait 
+              put-lmt: dcfa_poll, wait */
+           MPID_Request_release(tmp_rreq); 
+           dprintf("dcfa_poll,lmt,after release,tmp_rreq=%p,rreq->ref_count=%d,comm=%p\n", tmp_rreq, tmp_rreq->ref_count, tmp_rreq->comm);
+
+    
+           goto next_unlinked;
+       next:
+           prev_rreq = rreq;
+           rreq = MPID_nem_dcfa_lmtq_next(rreq);
+       next_unlinked:;
+       } while(rreq);
+#if defined (TIMER_WAIT_DCFA_POLL)
+   if(in_blocking_poll) { stsc[0] += MPI_rdtsc() - tsc[0]; }
+#endif
+   }
+
+#if defined (TIMER_WAIT_DCFA_POLL)
+   if(in_blocking_poll) { tsc[1] = MPI_rdtsc(); }
+#endif
+   int ncom_almost_full = 0;
+   for(i = 0; i < MPID_nem_dcfa_npollingset; i++) {
+       //tscs = MPID_nem_dcfa_rdtsc();
+       MPIDI_VC_t *vc = MPID_nem_dcfa_pollingset[i];
+       mpi_errno = MPID_nem_dcfa_poll_eager(vc);
+       if(mpi_errno) { MPIU_ERR_POP (mpi_errno); }
+
+       MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+
+       /* without this, command in sendq doesn't have a chance 
+          to perform send_progress 
+          when send and progress_send call drain_scq but asking it 
+          for not performing send_progress and make the CQ empty */
+       MPID_nem_dcfa_send_progress(vc_dcfa);
+
+       ncom_almost_full |= (vc_dcfa->ibcom->ncom >= IBCOM_MAX_SQ_HEIGHT_DRAIN);
+       
+#if 0
+       /* aggressively perform drain_scq */
+       ncom_almost_full |= !(MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq));
+#endif
+   }
+#if defined (TIMER_WAIT_DCFA_POLL)
+   if(in_blocking_poll) { stsc[1] += MPI_rdtsc() - tsc[1]; }
+#endif
+
+   // lazy feching of completion queue entry because it causes cache-miss
+#if !defined (LMT_PUT_DONE) && defined (LMT_GET_CQE)
+   if(MPID_nem_dcfa_ncqe_to_drain > 0 || MPID_nem_dcfa_ncqe_nces > 0 || MPID_nem_dcfa_ncqe >= IBCOM_MAX_CQ_HEIGHT_DRAIN || ncom_almost_full) {
+#endif
+#if !defined (LMT_PUT_DONE) && !defined (LMT_GET_CQE)
+   if(/*(in_blocking_poll && result == 0) ||*/ MPID_nem_dcfa_ncqe_nces > 0 || MPID_nem_dcfa_ncqe >= IBCOM_MAX_CQ_HEIGHT_DRAIN || ncom_almost_full) {
+
+#endif
+#if defined (TIMER_WAIT_DCFA_POLL)
+   if(in_blocking_poll) { tsc[0] = MPI_rdtsc(); }
+#endif
+   //dprintf("dcfa_poll,calling drain_scq\n");
+       ibcom_errno = MPID_nem_dcfa_drain_scq(0);
+       MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
+#if defined (TIMER_WAIT_DCFA_POLL)
+   if(in_blocking_poll) { stsc[0] += MPI_rdtsc() - tsc[0]; }
+#endif
+   }
+#if 1
+   /* aggressively perform drain_scq */
+   ibcom_errno = MPID_nem_dcfa_drain_scq(0);
+   MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
+#endif
+   /* detect completion of lmt-put when MPI_Wait kicks dcfa_poll */
+   if(MPID_nem_dcfa_ncqe_lmt_put > 0) {
+       ibcom_errno = MPID_nem_dcfa_drain_scq_lmt_put();
+       MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq_lmt_put");
+   }       
+
+#ifdef DCFA_ONDEMAND
+   /* process incoming connection request */
+   MPID_nem_dcfa_cm_accept();
+
+   /* process outgoing conncetion request */
+   if(MPID_nem_dcfa_ncqe_connect >= IBCOM_MAX_CQ_HEIGHT_DRAIN) {
+       ibcom_errno = MPID_nem_dcfa_cm_drain_scq(0);
+       MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_drain_scq");
+   }       
+#endif
+
+#if 1
+   /* if polling on eager-send and lmt would repeat frequently, perform "pause" to yield instruction issue bandwitdh to other logical-core */
+   if(in_blocking_poll && progress_completion_count_old == MPIDI_CH3I_progress_completion_count.v) {
+           __asm__ __volatile__ ("pause;" : : : "memory"); 
+   }
+#endif
+   //if(in_blocking_poll) { goto prev; }
+
+ out:
+   fn_exit:
+   MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_POLL);
+   return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
+   /* new rreq is obtained in MPID_Irecv in mpid_irecv.c, 
+    so we associate rreq with a receive request and ibv_post_recv it 
+    so that we can obtain rreq by ibv_poll_cq
+   */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_recv_posted
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_recv_posted(struct MPIDI_VC *vc, struct MPID_Request *req) {
+
+    int mpi_errno = MPI_SUCCESS;
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_RECV_POSTED);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_RECV_POSTED);
+    dprintf("recv_posted,enter,%d->%d,req=%p\n", MPID_nem_dcfa_myrank, vc->pg_rank, req);
+
+#if 0
+    int ibcom_errno;
+    ibcom_errno = ibcom_irecv(vc_dcfa->sc->fd, (uint64_t)vc->pg_rank);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_irecv");
+#endif
+    
+#if 1 /*takagi*/
+    MPIDI_msg_sz_t data_sz;
+    int dt_contig;
+    MPI_Aint dt_true_lb;
+    MPID_Datatype * dt_ptr;
+    MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
+
+    /* poll when rreq is for lmt */
+    /* anticipating received message finds maching request in the posted-queue */
+    if(data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) > vc->eager_max_msg_sz) {
+        //if(MPID_nem_dcfa_tsc_poll - MPID_nem_dcfa_rdtsc() > MPID_NEM_DCFA_POLL_PERIOD_RECV_POSTED) {
+#if 1
+        mpi_errno = MPID_nem_dcfa_poll_eager(vc);
+#else
+            mpi_errno = MPID_nem_dcfa_poll(0);
+#endif
+            if(mpi_errno) { MPIU_ERR_POP (mpi_errno); }
+            //}
+    } else {
+#if 1
+    /* anticipating received message finds maching request in the posted-queue */
+    //if(MPID_nem_dcfa_tsc_poll - MPID_nem_dcfa_rdtsc() > MPID_NEM_DCFA_POLL_PERIOD_RECV_POSTED) {
+#if 1
+        mpi_errno = MPID_nem_dcfa_poll_eager(vc);
+#else
+        mpi_errno = MPID_nem_dcfa_poll(0);
+#endif
+        if(mpi_errno) { MPIU_ERR_POP (mpi_errno); }
+        //}
+#endif
+    }
+#endif
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_RECV_POSTED);
+    return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
+/* (1) packet-handler memcpy RDMA-write-to buf data to MPI user-buffer when matching request is found in posted-queue
+   (2) MPI_Irecv memcpy RDMA-write-to buf data to MPI user-buffer when matching request is found in unexpected-queue
+   the latter case can't be dealt with when call this after poll-found and packet-handler
+   (packet-handler memcpy RDMA-write-to buf to another buffer when
+   matching request is not found in posted-queue, so calling this after poll-found and packet-handler
+   suffices in original MPICH implementation )
+*/
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_recv_buf_released
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_recv_buf_released(struct MPIDI_VC *vc, void* user_data) {
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_RECV_BUF_RELEASED);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_RECV_BUF_RELEASED);
+    dprintf("recv_buf_released,%d<-%d,user_data=%p\n", MPID_nem_dcfa_myrank, vc->pg_rank, user_data);
+#if 1 /* moving from dcfa_poll */
+       /* unmark magic */
+       /* magic is located at IBCOM_INLINE_DATA boundary and variable length entails multiple prospective locations for the future use */
+
+    /* see MPIDI_CH3_PktHandler_EagerShortSend (in src/mpid/ch3/src/ch3u_eager.c */
+    /* eager-send with zero-length data is released in poll
+       because there is no way to trace the RDMA-write-to buffer addr
+       because rreq->dev.tmpbuf is set to zero in ch3_eager.c
+        */
+    if(user_data == NULL) { goto fn_exit; }
+
+    MPIU_Assert(vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] <= user_data && user_data < vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZ);
+    unsigned long mod = (unsigned long)(user_data - vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO]) & (IBCOM_RDMABUF_SZSEG-1);
+
+    void* buf = (void*)(user_data - mod);
+    //dprintf("recv_buf_released,clearing,buf=%p\n", buf);
+    sz_hdrmagic_t* sz_hdrmagic = (sz_hdrmagic_t*)buf;
+    
+    int sz_data_pow2;
+    DCFA_NEM_SZ_DATA_POW2(sz_hdrmagic->sz);
+    //dprintf("recv_buf_released,sz=%d,pow2=%d\n", sz_hdrmagic->sz, sz_data_pow2);
+#if 1
+    uint32_t offset;
+    for(offset = 0; ; offset = offset ? ( (((offset + 1) << 1) - 1) > DCFA_NEM_MAX_DATA_POW2 ? DCFA_NEM_MAX_DATA_POW2 : (((offset + 1) << 1) - 1) ) : 15) {
+        volatile tailmagic_t* ptr = (tailmagic_t*)(buf + offset);
+        MPIU_Assert(vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] <= ptr && ptr < vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO] + IBCOM_RDMABUF_SZ);
+        ptr->magic = 0/*0xde*/; 
+        if(offset == sz_data_pow2) { break; }
+    }
+#endif
+#endif
+
+#if 1 /* moving from dcfa_poll */
+    /* mark that one eager-send RDMA-write-to buffer has been released */
+    int index_slot = (unsigned long)(user_data - vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO]) / IBCOM_RDMABUF_SZSEG;
+    MPIU_Assert(0 <= index_slot && index_slot < IBCOM_RDMABUF_NSEG);
+    //dprintf("user_data=%p,mem=%p,sub=%08lx,index_slot=%d\n", user_data, vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO], (unsigned long)user_data - (unsigned long)vc_dcfa->ibcom->icom_mem[IBCOM_RDMAWR_TO], index_slot);
+    //dprintf("index_slot=%d,released=%016lx\n", index_slot, vc_dcfa->ibcom->rsr_seq_num_released[index_slot / 64]);
+    vc_dcfa->ibcom->rsr_seq_num_released[index_slot / 64] |= (1ULL << (index_slot & 63));
+    //dprintf("released[index_slot/64]=%016lx\n", vc_dcfa->ibcom->rsr_seq_num_released[index_slot / 64]);
+    //    int index_tail = (vc_dcfa->ibcom->rsr_seq_num_tail + 1) & (IBCOM_RDMABUF_NSEG-1);
+    int index_tail = (vc_dcfa->ibcom->rsr_seq_num_tail + 1) % IBCOM_RDMABUF_NSEG;
+    //dprintf("tail+1=%d,index_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail + 1, index_tail);
+    //dprintf("released=%016lx\n", vc_dcfa->ibcom->rsr_seq_num_released[index_tail / 64]);
+    if(1||(index_tail & 7) || MPID_nem_dcfa_diff32(index_slot, index_tail) >= IBCOM_RDMABUF_NSEG - 8) { /* avoid wrap-around */
+        while(1) {
+        if(((vc_dcfa->ibcom->rsr_seq_num_released[index_tail / 64] >> (index_tail & 63)) & 1) == 1) {
+            vc_dcfa->ibcom->rsr_seq_num_tail += 1;
+            vc_dcfa->ibcom->rsr_seq_num_released[index_tail / 64] &= ~(1ULL << (index_tail & 63));
+            dprintf("rsr_seq_num_tail,incremented to %d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
+        } else {
+            break;
+        }
+        }
+    } else {
+        if(((vc_dcfa->ibcom->rsr_seq_num_released[index_tail / 64] >> (index_tail & 63)) & 0xff) == 0xff) {
+            vc_dcfa->ibcom->rsr_seq_num_tail += 8;
+            vc_dcfa->ibcom->rsr_seq_num_released[index_tail / 64] &= ~(0xffULL << (index_tail & 63));
+            //dprintf("released[index_tail/64]=%016lx\n", vc_dcfa->ibcom->rsr_seq_num_released[index_tail / 64]);
+        }
+    }
+    
+    //dprintf("recv_buf_released,%d->%d,rsr_seq_num_tail=%d,rsr_seq_num_tail_last_sent=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent);
+
+    int notify_rate;
+    ibcom_errno = ibcom_rdmabuf_occupancy_notify_rate_get(MPID_nem_dcfa_conns[vc->pg_rank].fd, &notify_rate);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rdmabuf_occupancy_notify_rate_get");
+
+    /* if you missed the chance to make eager-send message piggy-back it */
+    if(MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent) > IBCOM_RDMABUF_OCCUPANCY_NOTIFY_RATE_DELAY_MULTIPLIER(notify_rate) 
+       //|| MPID_nem_dcfa_diff32(lsr_seq_num_head, vc_dcfa->ibcom->lsr_seq_num_tail_last_sent) == IBCOM_RDMABUF_NSEG
+       ) {
+        MPID_Request *sreq;
+        sreq = MPID_nem_dcfa_sendq_head(vc_dcfa->sendq);
+        if(sreq) {
+            int msg_type = MPIDI_Request_get_msg_type(sreq);
+            MPIDI_CH3_Pkt_t* ch3_hdr = (MPIDI_CH3_Pkt_t *)sreq->dev.iov[0].MPID_IOV_BUF;
+            if(msg_type == MPIDI_REQUEST_EAGER_MSG && /* guard for the following pointer dereference */
+               ch3_hdr->type == MPIDI_NEM_DCFA_REPLY_SEQ_NUM) {
+                goto skip;
+            }
+        }
+        //printf("recv_buf_released,sending reply_seq_num,diff=%d,rate=%d,id=%d\n", MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent), notify_rate + (notify_rate>>1), vc_dcfa->ibcom->sseq_num);
+        MPID_nem_dcfa_send_reply_seq_num(vc);
+    skip:;
+    }
+#endif
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_RECV_BUF_RELEASED);
+    return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
+#if 0
+/* packet handler for wrapper packet of MPIDI_NEM_PKT_LMT_DONE */
+/* see pkt_DONE_handler (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c) */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_PktHandler_lmt_done
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_PktHandler_lmt_done(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    MPID_nem_dcfa_pkt_lmt_done_t * const done_pkt = (MPID_nem_dcfa_pkt_lmt_done_t *)pkt;
+    MPID_Request *req;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_LMT_DONE);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_LMT_DONE);
+
+    /* Check the assumption on sizeof(MPIDI_CH3_Pkt_t).
+       It is utilized in pkt_DONE_handler (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c) 
+       that must be larger than sizeof(MPID_nem_dcfa_pkt_lmt_done_t) */
+    if(sizeof(MPID_nem_dcfa_pkt_lmt_done_t) > sizeof(MPIDI_CH3_Pkt_t)) {
+        MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_INTERN, "**sizeof(MPIDI_CH3_Pkt_t)");
+    }
+
+    /* fall back to the original handler */
+    /* we don't need to worry about the difference caused by embedding seq_num 
+       because the handler does not use it (e.g. applying sizeof operator to it) */
+    MPID_nem_pkt_lmt_done_t *pkt_parent_class = (MPID_nem_pkt_lmt_done_t *)pkt;
+    pkt_parent_class->type = MPIDI_NEM_PKT_LMT_DONE;
+#if 0
+    mpi_errno = MPID_nem_handle_pkt(vc, (char *)pkt_parent_class, *buflen);
+#else
+    MPIU_ERR_CHKANDJUMP(1, mpi_errno, MPI_ERR_OTHER, "**notimplemented");
+    /* you need to modify mpid_nem_lmt.c to make pkt_DONE_handler visible to me */
+    //mpi_errno = pkt_DONE_handler(vc, pkt, buflen, rreqp);
+#endif
+    if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_LMT_DONE);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+#endif
+
+/* packet handler for wrapper packet of MPIDI_CH3_PKT_EAGER_SEND */
+/* see MPIDI_CH3_PktHandler_EagerSend (in src/mpid/ch3/src/ch3u_eager.c) */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_PktHandler_EagerSend
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_PktHandler_EagerSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */) {
+    MPID_nem_dcfa_pkt_prefix_t *netmod_pkt = (MPID_nem_dcfa_pkt_prefix_t*)pkt;
+    MPIDI_CH3_Pkt_eager_send_t *ch3_pkt = (MPIDI_CH3_Pkt_eager_send_t*)((void*)pkt + sizeof(MPID_nem_dcfa_pkt_prefix_t));
+    MPID_Request * rreq;
+    int found;
+    int complete;
+    char *data_buf;
+    MPIDI_msg_sz_t data_len;
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_EAGERSEND);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_EAGERSEND);
+
+    printf("dcfa_pkthandler_eagersend,tag=%d\n", ch3_pkt->match.parts.tag);
+
+    /* Check the assumption on sizeof(MPIDI_CH3_Pkt_t).
+       It is utilized to point the payload location in MPIDI_CH3_PktHandler_EagerSend
+       (src/mpid/ch3/src/ch3u_eager.c) that must be larger than sizeof(MPID_nem_dcfa_pkt_eager_send_t) */
+    //if(sizeof(MPID_nem_dcfa_pkt_eager_send_t) > sizeof(MPIDI_CH3_Pkt_t)) {
+    //MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_INTERN, "**sizeof(MPIDI_CH3_Pkt_t)");
+    //}
+
+    /* Update occupation status of local SR (send request) queue */
+    int *lsr_seq_num_tail;
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    ibcom_errno = ibcom_lsr_seq_num_tail_get(vc_dcfa->sc->fd, &lsr_seq_num_tail);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_lsr_seq_num_tail_get");
+    dprintf("MPID_nem_dcfa_PktHandler_EagerSend,lsr_seq_num_tail=%d,netmod_pkt->seq_num_tail=%d\n", *lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    *lsr_seq_num_tail = DCFA_MAX(*lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    dprintf("MPID_nem_dcfa_PktHandler_EagerSend,lsr_seq_num_tail updated to %d\n", *lsr_seq_num_tail);
+
+#ifndef DISABLE_VAR_OCC_NOTIFY_RATE
+    /* change remote notification policy of RDMA-write-to buf */
+    dprintf("pkthandler,eagersend,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+    MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, lsr_seq_num_tail);
+    dprintf("pkthandler,eagersend,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+#endif
+    
+    dprintf("pkthandler,eagersend,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+    /* try to send from sendq because at least one RDMA-write-to buffer has been released */
+    /* calling drain_scq from progress_send derpives of chance
+       for dcfa_poll to drain sendq using ncqe
+       however transfers events to
+       (not to reply_seq_num because it's regulated by the rate)
+       fire on dcfa_poll using nces (e.g. MPI_Put) so we need to perform 
+       progress_send for all of VCs using nces in dcfa_poll.*/
+        dprintf("pkthandler,eagersend,send_progress\n");fflush(stdout);
+        MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS
+
+    /* fall back to the original handler */
+    /* we don't need to worry about the difference caused by embedding seq_num
+       because size of MPI-header of MPIDI_CH3_PKT_EAGER_SEND equals to sizeof(MPIDI_CH3_Pkt_t) 
+       see MPID_nem_dcfa_iSendContig
+     */
+    //ch3_pkt->type = MPIDI_CH3_PKT_EAGER_SEND;
+#if 0
+    mpi_errno = MPID_nem_handle_pkt(vc, (char *)pkt_parent_class, *buflen);
+#else
+    printf("dcfa_poll.c,before PktHandler_EagerSend,buflen=%ld\n", *buflen);
+    MPIDI_msg_sz_t ch3_buflen = *buflen - sizeof(MPID_nem_dcfa_pkt_prefix_t);
+    mpi_errno = MPIDI_CH3_PktHandler_EagerSend(vc, (MPIDI_CH3_Pkt_t*)ch3_pkt, &ch3_buflen, rreqp);
+    printf("dcfa_poll.c,after PktHandler_EagerSend,buflen=%ld\n", ch3_buflen);
+    *buflen = ch3_buflen + sizeof(MPID_nem_dcfa_pkt_prefix_t); 
+    printf("dcfa_poll.c,after addition,buflen=%ld\n", *buflen);
+#endif
+    if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+ 
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_EAGERSEND);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#if 1
+/* packet handler for wrapper packet of MPIDI_CH3_PKT_PUT */
+/* see MPIDI_CH3_PktHandler_EagerSend (in src/mpid/ch3/src/ch3u_rma_sync.c) */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_PktHandler_Put
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_PktHandler_Put( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */) {
+    MPID_nem_dcfa_pkt_prefix_t *netmod_pkt = (MPID_nem_dcfa_pkt_prefix_t*)pkt;
+    MPIDI_CH3_Pkt_put_t *ch3_pkt = (MPIDI_CH3_Pkt_put_t*)((void*)pkt + sizeof(MPID_nem_dcfa_pkt_prefix_t));
+    MPID_Request * rreq;
+    int found;
+    int complete;
+    char *data_buf;
+    MPIDI_msg_sz_t data_len;
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_PUT);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_PUT);
+
+    /* Update occupation status of local SR (send request) queue */
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    dprintf("MPID_nem_dcfa_Pkthandler_Put,lsr_seq_num_tail=%d,put_pkt->seq_num_tail=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    vc_dcfa->ibcom->lsr_seq_num_tail = DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    dprintf("MPID_nem_dcfa_Pkthandler_Put,lsr_seq_num_tail updated to %d\n", vc_dcfa->ibcom->lsr_seq_num_tail);
+
+#ifndef DISABLE_VAR_OCC_NOTIFY_RATE
+    /* change remote notification policy of RDMA-write-to buf */
+    dprintf("pkthandler,put,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+    MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, &vc_dcfa->ibcom->lsr_seq_num_tail);
+    dprintf("pkthandler,put,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+#endif    
+    dprintf("pkthandler,put,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+    /* try to send from sendq because at least one RDMA-write-to buffer has been released */
+    dprintf("pkthandler,put,send_progress\n");fflush(stdout);
+    MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS;
+    
+    /* fall back to the original handler */
+    /* we don't need to worry about the difference caused by embedding seq_num
+       because size of MPI-header of MPIDI_CH3_PKT_PUT equals to sizeof(MPIDI_CH3_Pkt_t) 
+       see MPID_nem_dcfa_iSendContig
+    */
+    MPIDI_msg_sz_t ch3_buflen = *buflen - sizeof(MPID_nem_dcfa_pkt_prefix_t);
+    mpi_errno = MPIDI_CH3_PktHandler_Put(vc, (MPIDI_CH3_Pkt_t *)ch3_pkt, &ch3_buflen, rreqp);
+    *buflen = ch3_buflen + sizeof(MPID_nem_dcfa_pkt_prefix_t);
+    if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+ 
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_PUT);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+#endif
+
+/* packet handler for wrapper packet of MPIDI_CH3_PKT_ACCUMULATE */
+/* see MPIDI_CH3_PktHandler_Accumulate (in src/mpid/ch3/src/ch3u_rma_sync.c) */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_PktHandler_Accumulate
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_PktHandler_Accumulate( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */) {
+    MPID_nem_dcfa_pkt_prefix_t *netmod_pkt = (MPID_nem_dcfa_pkt_prefix_t*)pkt;
+    MPIDI_CH3_Pkt_accum_t *ch3_pkt = (MPIDI_CH3_Pkt_accum_t*)((void*)pkt + sizeof(MPID_nem_dcfa_pkt_prefix_t));
+    MPID_Request * rreq;
+    int found;
+    int complete;
+    char *data_buf;
+    MPIDI_msg_sz_t data_len;
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_ACCUMULATE);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_ACCUMULATE);
+
+    /* Update occupation status of local SR (send request) queue */
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    dprintf("MPID_nem_dcfa_Pkthandler_Accumulate,lsr_seq_num_tail=%d,accum_pkt->seq_num_tail=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    vc_dcfa->ibcom->lsr_seq_num_tail = DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    dprintf("MPID_nem_dcfa_Pkthandler_Accumulate,lsr_seq_num_tail updated to %d\n", vc_dcfa->ibcom->lsr_seq_num_tail);
+
+#ifndef DISABLE_VAR_OCC_NOTIFY_RATE
+    /* change remote notification policy of RDMA-write-to buf */
+    dprintf("pkthandler,put,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+    MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, &vc_dcfa->ibcom->lsr_seq_num_tail);
+    dprintf("pkthandler,put,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+#endif    
+    dprintf("pkthandler,put,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+    /* try to send from sendq because at least one RDMA-write-to buffer has been released */
+        dprintf("pkthandler,put,send_progress\n");fflush(stdout);
+        MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS
+
+
+    /* fall back to the original handler */
+    /* we don't need to worry about the difference caused by embedding seq_num
+       because size of MPI-header of MPIDI_CH3_PKT_PUT equals to sizeof(MPIDI_CH3_Pkt_t) 
+       see MPID_nem_dcfa_iSendContig
+     */
+    MPIDI_msg_sz_t ch3_buflen = *buflen - sizeof(MPID_nem_dcfa_pkt_prefix_t);
+    mpi_errno = MPIDI_CH3_PktHandler_Accumulate(vc, (MPIDI_CH3_Pkt_t *)ch3_pkt, &ch3_buflen, rreqp);
+    *buflen = ch3_buflen + sizeof(MPID_nem_dcfa_pkt_prefix_t); 
+    if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+ 
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_ACCUMULATE);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+/* packet handler for wrapper packet of MPIDI_CH3_PKT_GET */
+/* see MPIDI_CH3_PktHandler_Get (in src/mpid/ch3/src/ch3u_rma_sync.c) */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_PktHandler_Get
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_PktHandler_Get( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */) {
+    MPID_nem_dcfa_pkt_prefix_t *netmod_pkt = (MPID_nem_dcfa_pkt_prefix_t*)pkt;
+    MPIDI_CH3_Pkt_get_t *ch3_pkt = (MPIDI_CH3_Pkt_get_t*)((void*)pkt + sizeof(MPID_nem_dcfa_pkt_prefix_t));
+    MPID_Request * rreq;
+    int found;
+    int complete;
+    char *data_buf;
+    MPIDI_msg_sz_t data_len;
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_GET);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_GET);
+
+    /* Update occupation status of local SR (send request) queue */
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    dprintf("MPID_nem_dcfa_Pkthandler_Get,lsr_seq_num_tail=%d,get_pkt->seq_num_tail=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    vc_dcfa->ibcom->lsr_seq_num_tail = DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    dprintf("MPID_nem_dcfa_Pkthandler_Get,lsr_seq_num_tail updated to %d\n", vc_dcfa->ibcom->lsr_seq_num_tail);
+
+#ifndef DISABLE_VAR_OCC_NOTIFY_RATE
+    /* change remote notification policy of RDMA-write-to buf */
+    dprintf("pkthandler,put,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+    MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, &vc_dcfa->ibcom->lsr_seq_num_tail);
+    dprintf("pkthandler,put,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+#endif    
+    dprintf("pkthandler,put,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+    /* try to send from sendq because at least one RDMA-write-to buffer has been released */
+        dprintf("pkthandler,get,send_progress\n");fflush(stdout);
+        MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET 
+
+    /* fall back to the original handler */
+    /* we don't need to worry about the difference caused by embedding seq_num
+       because size of MPI-header of MPIDI_CH3_PKT_PUT equals to sizeof(MPIDI_CH3_Pkt_t) 
+       see MPID_nem_dcfa_iSendContig
+     */
+    MPIDI_msg_sz_t ch3_buflen = *buflen - sizeof(MPID_nem_dcfa_pkt_prefix_t);
+    mpi_errno = MPIDI_CH3_PktHandler_Get(vc, (MPIDI_CH3_Pkt_t *)ch3_pkt, &ch3_buflen, rreqp);
+    *buflen = ch3_buflen + sizeof(MPID_nem_dcfa_pkt_prefix_t);
+    if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+ 
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_GET);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+/* packet handler for wrapper packet of MPIDI_CH3_PKT_GET_RESP */
+/* see MPIDI_CH3_PktHandler_GetResp (in src/mpid/ch3/src/ch3u_rma_sync.c) */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_PktHandler_GetResp
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_PktHandler_GetResp( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen /* out */, MPID_Request **rreqp /* out */) {
+    MPID_nem_dcfa_pkt_prefix_t *netmod_pkt = (MPID_nem_dcfa_pkt_prefix_t*)pkt;
+    MPIDI_CH3_Pkt_get_t *ch3_pkt = (MPIDI_CH3_Pkt_get_t*)((void*)pkt + sizeof(MPID_nem_dcfa_pkt_prefix_t));
+    MPID_Request * rreq;
+    int found;
+    int complete;
+    char *data_buf;
+    MPIDI_msg_sz_t data_len;
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_GETRESP);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_GETRESP);
+
+    /* Update occupation status of local SR (send request) queue */
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    dprintf("MPID_nem_dcfa_Pkthandler_GetResp,lsr_seq_num_tail=%d,get_pkt->seq_num_tail=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    vc_dcfa->ibcom->lsr_seq_num_tail = DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, netmod_pkt->seq_num_tail);
+    dprintf("MPID_nem_dcfa_Pkthandler_GetResp,lsr_seq_num_tail updated to %d\n", vc_dcfa->ibcom->lsr_seq_num_tail);
+
+#ifndef DISABLE_VAR_OCC_NOTIFY_RATE
+    /* change remote notification policy of RDMA-write-to buf */
+    dprintf("pkthandler,put,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+    MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, &vc_dcfa->ibcom->lsr_seq_num_tail);
+    dprintf("pkthandler,put,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+#endif    
+    dprintf("pkthandler,put,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+    /* try to send from sendq because at least one RDMA-write-to buffer has been released */
+        dprintf("pkthandler,get,send_progress\n");fflush(stdout);
+        MPID_NEM_DCFA_SEND_PROGRESS_POLLINGSET 
+
+    /* fall back to the original handler */
+    /* we don't need to worry about the difference caused by embedding seq_num
+       because size of MPI-header of MPIDI_CH3_PKT_PUT equals to sizeof(MPIDI_CH3_Pkt_t) 
+       see MPID_nem_dcfa_iSendContig
+     */
+    MPIDI_msg_sz_t ch3_buflen = *buflen - sizeof(MPID_nem_dcfa_pkt_prefix_t);
+    mpi_errno = MPIDI_CH3_PktHandler_GetResp(vc, (MPIDI_CH3_Pkt_t *)ch3_pkt, &ch3_buflen, rreqp);
+    *buflen = ch3_buflen + sizeof(MPID_nem_dcfa_pkt_prefix_t);
+    if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+ 
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_GETRESP);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+/* MPI_Isend set req-type to MPIDI_REQUEST_TYPE_RECV */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_pkt_GET_DONE_handler
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_pkt_GET_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    MPID_nem_dcfa_pkt_lmt_get_done_t * const done_pkt = (MPID_nem_dcfa_pkt_lmt_get_done_t *)pkt;
+    MPID_Request *req;
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_PKT_GET_DONE_HANDLER);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_PKT_GET_DONE_HANDLER);
+
+    dprintf("get_done_handler,enter\n");
+
+    *buflen = sizeof(MPIDI_CH3_Pkt_t);
+    MPID_Request_get_ptr(done_pkt->req_id, req);
+
+    MPIU_THREAD_CS_ENTER(LMT,);
+
+    switch (MPIDI_Request_get_type(req))
+    {
+    /* MPIDI_Request_set_type is not performed when
+       MPID_Isend --> FDU_or_AEP --> recv_posted --> dcfa_poll --> PUTCTS packet-handler */
+    case MPIDI_REQUEST_TYPE_RECV:
+        MPIU_ERR_INTERNALANDJUMP(mpi_errno, "unexpected request type");
+        break;
+    case MPIDI_REQUEST_TYPE_SEND:
+    case MPIDI_REQUEST_TYPE_RSEND:
+    case MPIDI_REQUEST_TYPE_SSEND:
+    case MPIDI_REQUEST_TYPE_BSEND:
+        /* extract embeded RDMA-write-to buffer occupancy information */
+        dprintf("get_done_handler,old lsr_seq_num_tail=%d,done_pkt->seq_num_tail=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail, done_pkt->seq_num_tail);
+        vc_dcfa->ibcom->lsr_seq_num_tail = DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, done_pkt->seq_num_tail);
+        //dprintf("lmt_start_recv,new lsr_seq_num=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail);
+        
+#ifndef DISABLE_VAR_OCC_NOTIFY_RATE
+        /* change remote notification policy of RDMA-write-to buf */
+        //dprintf("lmt_start_recv,reply_seq_num,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+        MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, &vc_dcfa->ibcom->lsr_seq_num_tail);
+        //dprintf("lmt_start_recv,reply_seq_num,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+#endif        
+        //dprintf("lmt_start_recv,reply_seq_num,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+        /* try to send from sendq because at least one RDMA-write-to buffer has been released */
+        //dprintf("lmt_start_recv,reply_seq_num,send_progress\n");
+        if(!MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) {
+            dprintf("get_done_handler,ncom=%d,ncqe=%d,diff=%d(%d-%d)\n", vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY, MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG,
+                   vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail);
+        }
+            dprintf("get_done_handler,send_progress\n");fflush(stdout);
+            MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS
+
+        mpi_errno = vc->ch.lmt_done_send(vc, req);
+        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        break;
+    default:
+        MPIU_ERR_INTERNALANDJUMP(mpi_errno, "unexpected request type");
+        break;
+    }
+
+    *rreqp = NULL;
+
+ fn_exit:
+    MPIU_THREAD_CS_EXIT(LMT,);
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKT_GET_DONE_HANDLER);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_PktHandler_req_seq_num
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_PktHandler_req_seq_num(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    MPID_nem_dcfa_pkt_req_seq_num_t * const req_pkt = (MPID_nem_dcfa_pkt_req_seq_num_t *)pkt;
+    MPID_Request *req;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_REQ_SEQ_NUM);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_REQ_SEQ_NUM);
+
+    /* mark as all of the message is read */
+    *buflen = sizeof(MPIDI_CH3_Pkt_t);
+
+    /* mark as I don't need continuation read request */
+    *rreqp = NULL;
+
+    /* update occupancy info of SR */
+    /* request piggy-backs seq_num although it's requesting responder's seq_num */
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    vc_dcfa->ibcom->lsr_seq_num_tail = DCFA_MAX(vc_dcfa->ibcom->lsr_seq_num_tail, req_pkt->seq_num_tail);
+
+    dprintf("PktHandler_req_seq_num,sendq=%d,ncom=%d,ncqe=%d,diff=%d(%d-%d)\n",
+           MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq),
+           vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY,
+           MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY,
+           MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG,
+           vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail
+           );
+
+    /* send reply */
+    dprintf("PktHandler_req_seq_num,sending reply_seq_num,id=%d\n", vc_dcfa->ibcom->sseq_num);
+    MPID_nem_dcfa_send_reply_seq_num(vc);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_REQ_SEQ_NUM);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_PktHandler_reply_seq_num
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_PktHandler_reply_seq_num(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    MPID_nem_dcfa_pkt_reply_seq_num_t * const reply_pkt = (MPID_nem_dcfa_pkt_reply_seq_num_t *)pkt;
+    MPID_Request *req;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_REPLY_SEQ_NUM);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_REPLY_SEQ_NUM);
+
+
+    /* mark as all of the message is consumed */
+    *buflen = sizeof(MPIDI_CH3_Pkt_t);
+
+    /* mark as I don't need continuation read request */
+    *rreqp = NULL;
+
+    /* update occupancy info of RDMA-write-buf */
+    int *lsr_seq_num_tail;
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    dprintf("pkthandler,reply_seq_num,old lsr_seq_num=%d,reply_pkt->seq_num_tail=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail, reply_pkt->seq_num_tail);
+    ibcom_errno = ibcom_lsr_seq_num_tail_get(vc_dcfa->sc->fd, &lsr_seq_num_tail);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_lsr_seq_num_tail_get");
+    *lsr_seq_num_tail = DCFA_MAX(*lsr_seq_num_tail, reply_pkt->seq_num_tail);
+    //dprintf("pkthandler,reply_seq_num,new lsr_seq_num=%d\n", vc_dcfa->ibcom->lsr_seq_num_tail);
+
+#ifndef DISABLE_VAR_OCC_NOTIFY_RATE
+    /* change remote notification policy of RDMA-write-to buf */
+    //dprintf("pkthandler,reply_seq_num,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+    MPID_nem_dcfa_change_rdmabuf_occupancy_notify_policy_lw(vc_dcfa, lsr_seq_num_tail);
+    //dprintf("pkthandler,reply_seq_num,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);   
+#endif
+
+    //dprintf("pkthandler,reply_seq_num,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+    /* try to send from sendq because at least one RDMA-write-to buffer has been released */
+    //dprintf("pkthandler,reply_seq_num,send_progress\n");
+        dprintf("pkthandler,reply_seq_num,send_progress\n");
+        MPID_NEM_DCFA_CHECK_AND_SEND_PROGRESS
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_REPLY_SEQ_NUM);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_PktHandler_change_rdmabuf_occupancy_notify_state
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_PktHandler_change_rdmabuf_occupancy_notify_state(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    MPID_nem_dcfa_pkt_change_rdmabuf_occupancy_notify_state_t * const reply_pkt = (MPID_nem_dcfa_pkt_change_rdmabuf_occupancy_notify_state_t *)pkt;
+    MPID_Request *req;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_CHANGE_RDMABUF_OCCUPANCY_NOTIFY_STATE);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_CHANGE_RDMABUF_OCCUPANCY_NOTIFY_STATE);
+
+    /* mark as all of the message is read */
+    *buflen = sizeof(MPIDI_CH3_Pkt_t);
+
+    /* mark as I don't need continuation read request */
+    *rreqp = NULL;
+
+
+    /* update occupancy info of SR */
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    dprintf("pkthandler,change notify state,old lstate=%d,pkt->state=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_lstate, reply_pkt->state);
+    int *rdmabuf_occupancy_notify_lstate;
+    ibcom_errno = ibcom_rdmabuf_occupancy_notify_lstate_get(vc_dcfa->sc->fd, &rdmabuf_occupancy_notify_lstate);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rdmabuf_occupancy_notify_lstate_get"); 
+    *rdmabuf_occupancy_notify_lstate = reply_pkt->state;
+    dprintf("pkthandler,change notify state,new lstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_lstate);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_PKTHANDLER_CHANGE_RDMABUF_OCCUPANCY_NOTIFY_STATE);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#ifdef DCFA_ONDEMAND
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_cm_drain_scq
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_cm_drain_scq() {
+
+    int mpi_errno = MPI_SUCCESS;
+    int result;
+    int i;
+    struct ibv_wc cqe[IBCOM_MAX_CQ_HEIGHT_DRAIN];
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_CM_DRAIN_SCQ);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_CM_DRAIN_SCQ);
+
+    result = ibv_poll_cq(rc_shared_scq_lmt_put, IBCOM_MAX_CQ_HEIGHT_DRAIN, &cqe[0]);
+    MPIU_ERR_CHKANDJUMP(result < 0, mpi_errno, MPI_ERR_OTHER, "**netmod,dcfa,ibv_poll_cq");
+    
+    if(result > 0) { dprintf("cm_drain_scq,found,result=%d\n", result); }
+    for(i = 0; i < result; i++)  {
+        
+#ifdef DCFA
+        if(cqe[i].status != IBV_WC_SUCCESS) { dprintf("cm_drain_scq,status=%08x\n", cqe[i].status); }
+#else
+        if(cqe[i].status != IBV_WC_SUCCESS) { dprintf("cm_drain_scq,status=%08x,%s\n", cqe[i].status, ibv_wc_status_str(cqe[i].status)); }
+#endif
+        MPIU_ERR_CHKANDJUMP(cqe[i].status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_drain_scq");
+        /* TODO retry a connection request when it timed out */
+
+        if(cqe.wr_id == MPID_NEM_DCFA_SYN ||
+           cqe.wr_id == MPID_NEM_DCFA_SYNACK) {
+
+            MPID_nem_dcfa_conn_ud_ibcom->ncom_lmt_put -= 1;
+            MPID_nem_dcfa_ncqe_connect -= 1;
+            
+            /* Try to send from sendq_connect */
+            if(!MPID_nem_dcfa_sendq_empty(sendq_connect) &&
+               MPID_nem_dcfa_ncom_lmt_put < IBCOM_MAX_SQ_CAPACITY &&
+               MPID_nem_dcfa_ncqe_lmt_put < IBCOM_MAX_CQ_CAPACITY) {
+                MPID_nem_dcfa_send_progress_connect(); 
+            }
+        } else {
+            printf("unknown command=%d\n", cqe.wr_id);
+            MPIU_ERR_CHKANDJUMP(1, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_drain_scq");
+        }
+    }
+    
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_CM_DRAIN_SCQ);
+    return mpi_errno;
+ fn_fail:
+   goto fn_exit;   
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_cm_poll
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_cm_poll()
+{
+    int mpi_errno = MPI_SUCCESS;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_CM_POLL);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_CM_POLL);
+
+    dprintf("cm_poll,enter\n");
+
+
+    volatile uint32_t* owner = (uint32_t*)(ibcom_scratch_pad->icom_mem[IBCOM_SCRATCH_PAD_TO]);
+    if(*owner == (uint32_t)-1) { goto fn_exit; } /* not acquired */
+
+    IbCom* ibcom_scratch_pad;
+    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_scratch_pad_fds[*owner], &ibcom_scratch_pad); 
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_obtain_pointer");
+
+    MPID_nem_dcfa_cm_cmd_t* received = (MPID_nem_dcfa_cm_cmd_t*)(ibcom_scratch_pad->icom_mem[IBCOM_SCRATCH_PAD_TO] + sizeof(uint32_t));
+    MPID_nem_dcfa_cm_cmd_t cmd;
+    MPID_nem_dcfa_vc_area *vc_dcfa;
+    switch(received->type) {
+    case MPID_NEM_DCFA_CM_SYN:
+        ibcom_errno = ibcomOpen(ib_port, IBCOM_OPEN_RC, &MPID_nem_dcfa_conns[*owner].fd);
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcomOpen");
+        cmd.type = MPID_NEM_DCFA_CM_SYNACK;
+        goto common_tail;
+        break;
+    case MPID_NEM_DCFA_CM_BUSINESSCARD: {
+        ibcom_errno = ibcom_rts(MPID_nem_dcfa_conns[*owner].fd, received->qpnum, received->lid, &(received->gid));
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rts");
+        ibcom_errno = ibcom_reg_mr_connect(MPID_nem_dcfa_conns[*owner].fd, received->rmem, received->rkey);
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_reg_mr_connect");
+        VC_FIELD(MPID_nem_dcfa_conns[owner].vc,is_connected) = 1;
+
+        cmd.type = MPID_NEM_DCFA_CM_ACK;
+        common_tail:        
+        ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[*owner].fd, IBCOM_INFOKEY_PORT_LID, &(cmd.lid), sizeof(uint16_t));
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
+
+        ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[*owner].fd, IBCOM_INFOKEY_PORT_GID, &(cmd.gid), sizeof(union ibv_gid));
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
+
+        ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[*owner].fd, IBCOM_INFOKEY_QP_QPN, &(cmd.qpnum), sizeof(uint32_t));
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
+
+        ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_conns[*owner].fd, IBCOM_SCRATCH_PAD_TO, IBCOM_INFOKEY_MR_ADDR, &(cmd.rmem), sizeof(void*));
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
+
+        ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_conns[*owner].fd, IBCOM_SCRATCH_PAD_TO, IBCOM_INFOKEY_MR_RKEY, &(cmd.rkey), sizeof(int));
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
+
+        *owner = (uint32_t)-1; /* release */
+
+        mpi_errno = MPID_nem_dcfa_cm_send_core(rank, &cmd);
+        MPIU_ERR_CHKANDJUMP(mp_errno, mpi_errno, MPI_ERR_OTHER, "MPID_nem_dcfa_cm_send_core"); 
+        break;
+    default:
+        printf("unknown connection command\n");
+        MPIU_ERR_CHKANDJUMP(1, mpi_errno, MPI_ERR_OTHER, "MPID_nem_dcfa_cm_poll");
+    }
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_CM_POLL);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_cm_accept
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_cm_accept() {
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    int result;
+    int i;
+    struct ibv_wc cqe[IBCOM_MAX_CQ_HEIGHT_DRAIN];
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_CM_ACCEPT);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_CM_ACCEPT);
+
+    result = ibv_poll_cq(ud_shared_rcq, IBCOM_MAX_CQ_HEIGHT_DRAIN, &cqe);
+    MPIU_ERR_CHKANDJUMP(result < 0, mpi_errno, MPI_ERR_OTHER, "**netmod,dcfa,ibv_poll_cq");
+
+    if(result > 0) {
+        dprintf("accept,result=%d\n", result); 
+    }
+    for(i = 0; i < result; i++)  {
+        dprintf("accept,i=%d\n", i);
+
+        MPIU_ERR_CHKANDJUMP(cqe.status != IBV_WC_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_cm_accept");
+        
+        void* rbuf;
+        ibcom_errno = ibcom_mem_udwr_to(MPID_nem_dcfa_conn_ud_fd, &rbuf);
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_mem_udwr_to");
+        MPID_nem_dcfa_conn_pkt_t* rpkt = (MPID_nem_dcfa_conn_pkt_t*)(rbuf + 40);
+        if(rpkt->type == MPID_NEM_DCFA_SYN) {
+
+            dprintf("accept,%d<-%d,type=%08x\n", MPID_nem_dcfa_myrank, rpkt->remote_rank, rpkt->type);                            
+
+            void* sbuf;
+            ibcom_errno = ibcom_mem_udwr_from(MPID_nem_dcfa_conn_ud_fd, &sbuf);
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_mem_udwr_from");
+            MPID_nem_dcfa_conn_pkt_t* spkt = (MPID_nem_dcfa_conn_pkt_t*)(sbuf + 40);
+            spkt->remote_rank = MPID_nem_dcfa_myrank;
+            spkt->type = MPID_NEM_DCFA_SYNACK;
+
+            ibcom_errno = ibcom_get_info_conn(MPID_nem_dcfa_conns[rpkt->remote_rank].fd, IBCOM_INFOKEY_QP_QPN, &spkt->qpn, sizeof(uint32_t));
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_conn");
+
+            ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_conns[remote_rank].fd, IBCOM_RDMAWR_TO, IBCOM_INFOKEY_MR_ADDR, &spkt->rmem, sizeof(void*));
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
+
+            ibcom_errno = ibcom_get_info_mr(MPID_nem_dcfa_conns[remote_rank].fd, IBCOM_RDMAWR_TO, IBCOM_INFOKEY_MR_RKEY, &spkt->rkey, sizeof(int));
+            MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_get_info_mr");
+            
+            /* kokomade. add udsend_core(synack) */
+            if(MPID_nem_dcfa_conn_ibcom->ncom < &&
+               MPID_nem_dcfa_ncqe_connect < ) {
+                MPID_nem_dcfa_conn_send_core(rpkt->remote_rank);
+            } else {
+                MPID_nem_dcfa_sendq_conn_entry_t* entry = MPIU_Malloc(sizeof(MPID_nem_dcfa_sendq_conn_entry_t));
+                MPIU_ERR_CHKANDJUMP(!entry, mpi_errno, MPI_ERR_OTHER, "**outofmemory");
+                entry->pending_pkt = *spkt;
+                MPID_nem_dcfa_conn_sendq_enqueue(MPID_nem_dcfa_conn_sendq, entry);
+            }
+
+
+        } else {
+            dprintf("accept,unknown type=%08x\n", *((uint32_t*)(rbuf + 44)));
+        }                        
+    }
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_CM_ACCEPT);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+#endif
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_reg_mr.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_reg_mr.c
new file mode 100644
index 0000000..160c49d
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_reg_mr.c
@@ -0,0 +1,287 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2012 NEC Corporation
+ *      Author: Masamichi Takagi
+ *  (C) 2012 Oct 10 Min Si
+ *  (C) 2001-2009 Yutaka Ishikawa
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include "dcfa_ibcom.h"
+
+//#define DEBUG_REG_MR
+#ifdef DEBUG_REG_MR
+#define dprintf printf
+#else
+#define dprintf(...)
+#endif
+
+/* cache size of ibv_reg_mr */
+#define IBCOM_REG_MR_NLINE 4096
+#define IBCOM_REG_MR_NWAY  1024
+
+#define IBCOM_REG_MR_SZPAGE 4096
+#define IBCOM_REG_MR_LOGSZPAGE 12
+
+/* arena allocator */
+
+#define NIALLOCID 32
+typedef struct { char* next; } free_list_t;
+static char* free_list_front[NIALLOCID] = { 0 };
+static char* arena_flist[NIALLOCID] = { 0 };
+
+#define SZARENA 4096
+#define CLUSTER_SIZE (SZARENA/sz)
+#define ROUNDUP64(addr, align) ((addr + align - 1) & ~((unsigned long)align - 1))
+#define NCLUST_SLAB 1
+#define IBCOM_AALLOC_ID_MRCACHE 0
+
+static inline void* aalloc(size_t sz, int id) {
+#if 1 /* debug */
+    return malloc(sz);
+#else
+    char* p = free_list_front[id];
+    if((unsigned long)p & (SZARENA-1)) {
+        free_list_front[id] += sz; return p;
+    } else {
+        char* q, r;
+        if(arena_flist[id]) {
+            q = arena_flist[id];
+            arena_flist[id] = ((free_list_t*)arena_flist[id])->next;
+        } else {
+            q = mmap(NULL, ROUNDUP64(SZARENA*NCLUST_SLAB, 4096), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+#if NCLUST_SLAB > 1
+            arena_flist[id] = q + SZARENA;
+            for(p = arena_flist[id]; p < q + (NCLUST_SLAB-1) * SZARENA; p += SZARENA) {
+                ((free_list_t*)p)->next = p + SZARENA;
+            }
+            ((free_list_t*)p)->next = 0;
+#endif
+        }
+        *((int*)q) = CLUSTER_SIZE-1;
+        //	dprintf("q=%llx\n", q);
+        q += sz + (SZARENA % sz);
+        free_list_front[id] = q + sz;
+        return q;
+    }
+#endif
+}
+
+static inline void afree(const void* p, int id) {
+#if 1 /* debug */
+    return free((void*)p);
+#else
+    p = (void*)((unsigned long)p & ~(SZARENA-1));
+    if(!(--(*((int*)p)))) {
+        ((free_list_t*)p)->next = arena_flist[id];
+        arena_flist[id] = (char*)p;
+    }
+#endif
+}
+
+struct ibcom_reg_mr_listnode_t {
+	struct ibcom_reg_mr_listnode_t *lru_next;
+	struct ibcom_reg_mr_listnode_t *lru_prev;
+};
+
+struct ibcom_reg_mr_cache_entry_t {
+    /* : public ibcom_reg_mr_listnode_t */
+	struct ibcom_reg_mr_listnode_t *lru_next;
+	struct ibcom_reg_mr_listnode_t *lru_prev;
+
+	struct ibv_mr *mr;
+	void* addr;
+	int len;
+	int refc;
+};
+
+static struct ibcom_reg_mr_listnode_t ibcom_reg_mr_cache[IBCOM_REG_MR_NLINE];
+
+__inline__ int ibcom_hash_func(char *addr) {
+	unsigned int v = (unsigned int) (unsigned long) addr;
+	//v = v >> IBCOM_REG_MR_LOGSZPAGE; /* assume it is page aligned */
+	v = v & (IBCOM_REG_MR_NLINE - 1);
+	return (int) v;
+}
+
+void ibcom_reg_mr_insert(struct ibcom_reg_mr_listnode_t *c, struct ibcom_reg_mr_listnode_t *e) {
+	struct ibcom_reg_mr_listnode_t *next;
+    struct ibcom_reg_mr_listnode_t *prev;
+	prev = c;
+	next = prev->lru_next;
+	e->lru_next = next;
+	e->lru_prev = prev;
+	next->lru_prev = e;
+    prev->lru_next = e;
+}
+
+void ibcom_reg_mr_unlink(struct ibcom_reg_mr_listnode_t *e) {
+	struct ibcom_reg_mr_listnode_t *next, *prev;
+	next = e->lru_next;
+	prev = e->lru_prev;
+	next->lru_prev = prev;
+	prev->lru_next = next;
+}
+
+static inline void __lru_queue_display() {
+	struct ibcom_reg_mr_cache_entry_t *p;
+	int i = 0;
+	for (i = 0; i < IBCOM_REG_MR_NLINE; i++) {
+		dprintf("---- hash %d\n", i);
+		for (p = (struct ibcom_reg_mr_cache_entry_t*)ibcom_reg_mr_cache[i].lru_next; p != (struct ibcom_reg_mr_cache_entry_t*)&ibcom_reg_mr_cache[i]; p = (struct ibcom_reg_mr_cache_entry_t*)p->lru_next) {
+			if (p && p->addr) {
+				dprintf("-------- p=%p,addr=%p,len=%d,refc=%d,lru_next=%p\n", p, p->addr, p->len, p->refc, p->lru_next);
+			} else {
+				dprintf("-------- p=%p,lru_next=%p\n", p, p->lru_next);
+			}
+		}
+	}
+}
+
+struct ibv_mr *ibcom_reg_mr_fetch(void *addr, int len) {
+#if 0 /* debug */
+    struct ibv_mr *mr;
+	int ibcom_errno = ibcom_reg_mr(addr, len, &mr);
+    printf("mrcache,ibcom_reg_mr,error,addr=%p,len=%d,lkey=%08x,rkey=%08x\n", addr, len, mr->lkey, mr->rkey);
+    if(ibcom_errno != 0) {
+        goto fn_fail;
+    }
+ fn_exit:
+    return mr;
+ fn_fail:
+    goto fn_exit;
+#else
+    int ibcom_errno;
+	int key;
+	struct ibcom_reg_mr_cache_entry_t *e;
+
+#if 1 /*def DCFA*/
+    /* we can't change addr because ibv_post_send assumes mr->host_addr (output of this function)
+       must have an exact mirror value of addr (input of this function) */
+    void* addr_aligned = addr;
+    int len_aligned = len;
+#else
+    void* addr_aligned = (void*)((unsigned long)addr & ~(IBCOM_REG_MR_SZPAGE-1));
+    int len_aligned = ((((unsigned long)addr + len) - (unsigned long)addr_aligned + IBCOM_REG_MR_SZPAGE-1) & ~(IBCOM_REG_MR_SZPAGE-1));
+#endif
+	key = ibcom_hash_func(addr);
+
+	dprintf("[MrCache] addr=%p, len=%d\n", addr, len);
+	dprintf("[MrCache] aligned addr=%p, len=%d\n", addr_aligned, len_aligned);
+
+	//__lru_queue_display();
+    int way = 0; 
+	for(e = (struct ibcom_reg_mr_cache_entry_t*)ibcom_reg_mr_cache[key].lru_next; e != (struct ibcom_reg_mr_cache_entry_t*)&ibcom_reg_mr_cache[key]; e = (struct ibcom_reg_mr_cache_entry_t*)e->lru_next, way++) {
+		//dprintf("e=%p, e->hash_next=%p\n", e, e->lru_next);
+
+		if(e->addr <= addr_aligned && addr_aligned + len_aligned <= e->addr + e->len) {
+			dprintf("ibcom_reg_mr_fetch,hit,entry addr=%p,len=%d,mr addr=%p,len=%ld,requested addr=%p,len=%d\n", e->addr, e->len, e->mr->addr, e->mr->length, addr, len);
+			goto hit;
+		}
+	}
+
+    // miss
+
+    // evict an entry and de-register its MR when the cache-set is full
+    if(way > IBCOM_REG_MR_NWAY) {
+        struct ibcom_reg_mr_cache_entry_t* victim = (struct ibcom_reg_mr_cache_entry_t*)e->lru_prev;
+        ibcom_reg_mr_unlink((struct ibcom_reg_mr_listnode_t*)victim);
+
+        dprintf("ibcom_reg_mr,evict,entry addr=%p,len=%d,mr addr=%p,len=%ld\n", e->addr, e->len, e->mr->addr, e->mr->length);
+        int ibcom_errno = ibcom_dereg_mr(victim->mr);
+        if(ibcom_errno) {
+            printf("mrcache,ibcom_dereg_mr\n");
+            goto fn_fail;
+        }
+        afree(victim, IBCOM_AALLOC_ID_MRCACHE);
+    }
+
+	e = aalloc(sizeof(struct ibcom_reg_mr_cache_entry_t), IBCOM_AALLOC_ID_MRCACHE);
+    /* reference counter is used when evicting entry */
+    e->refc = 1;
+
+	dprintf("ibcom_reg_mr_fetch,miss,addr=%p,len=%d\n", addr_aligned, len_aligned);
+	/* register memory */
+	ibcom_errno = ibcom_reg_mr(addr_aligned, len_aligned, &e->mr);
+    if(ibcom_errno != 0) {
+        fprintf(stderr, "mrcache,ibcom_reg_mr\n");
+        goto fn_fail;
+    }
+	e->addr = addr_aligned;
+	e->len = len_aligned;
+
+	dprintf("ibcom_reg_mr_fetch,fill,e=%p,key=%d,mr=%p,mr addr=%p,len=%ld,lkey=%08x,rkey=%08x\n", e, key, e->mr, e->mr->addr, e->mr->length, e->mr->lkey, e->mr->rkey);
+
+	/* register to cache */
+    ibcom_reg_mr_insert(&ibcom_reg_mr_cache[key], (struct ibcom_reg_mr_listnode_t*)e);
+
+	//__lru_queue_display();
+
+	goto fn_exit;
+
+ hit:
+
+    /* reference counter is used when evicting entry */
+    e->refc++;
+#if 0 /* disable for debug */
+    /* move to head of the list */
+	if(e != (struct ibcom_reg_mr_cache_entry_t*)ibcom_reg_mr_cache[key].lru_next) {
+        ibcom_reg_mr_unlink((struct ibcom_reg_mr_listnode_t*)e);
+        ibcom_reg_mr_insert(&ibcom_reg_mr_cache[key], (struct ibcom_reg_mr_listnode_t*)e);
+    }
+#endif
+	dprintf("[MrCache] reuse e=%p,key=%d,mr=%p,refc=%d,addr=%p,len=%ld,lkey=%08x,rkey=%08x\n", e, key, e->mr, e->refc, e->mr->addr, e->mr->length, e->mr->lkey, e->mr->rkey);
+
+	//__lru_queue_display();
+    
+ fn_exit:
+	return e->mr;
+ fn_fail:
+    goto fn_exit;
+#endif
+}
+
+void ibcom_reg_mr_dereg(struct ibv_mr *mr) {
+
+	struct ibcom_reg_mr_cache_entry_t *e;
+	struct ibcom_reg_mr_cache_entry_t *zero = 0;
+	unsigned long offset = (unsigned long)zero->mr;
+	e = (struct ibcom_reg_mr_cache_entry_t *) ((unsigned long)mr - offset);
+	e->refc--;
+
+	dprintf("ibcom_reg_mr_dereg,entry=%p,mr=%p,addr=%p,refc=%d,offset=%lx\n", e, mr, e->mr->addr, e->refc, offset);
+}
+
+void ibcom_RegisterCacheInit() {
+	int i;
+
+    /* Using the address to the start node to express the end of the list
+       instead of using NULL */
+    for(i = 0; i < IBCOM_REG_MR_NLINE; i++) {
+        ibcom_reg_mr_cache[i].lru_next = (struct ibcom_reg_mr_listnode_t*)&ibcom_reg_mr_cache[i];
+        ibcom_reg_mr_cache[i].lru_prev = (struct ibcom_reg_mr_listnode_t*)&ibcom_reg_mr_cache[i];
+    }
+	
+	dprintf("[MrCache] cache initializes %d entries\n", IBCOM_REG_MR_NLINE);
+}
+
+void ibcom_RegisterCacheDestroy() {
+	struct ibcom_reg_mr_cache_entry_t *p;
+	int i = 0, cnt = 0;
+
+	for(i = 0; i < IBCOM_REG_MR_NLINE; i++) {
+        for(p = (struct ibcom_reg_mr_cache_entry_t*)ibcom_reg_mr_cache[i].lru_next; p != (struct ibcom_reg_mr_cache_entry_t*)&ibcom_reg_mr_cache[i]; p = (struct ibcom_reg_mr_cache_entry_t*)p->lru_next) {
+            if (p && p->addr > 0) {
+                ibcom_dereg_mr(p->mr);
+                afree(p, IBCOM_AALLOC_ID_MRCACHE);
+                cnt++;
+            }
+        }
+    }
+
+	//__lru_queue_display();
+
+	dprintf("[MrCache] cache destroyed %d entries\n", cnt);
+}
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_send.c b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_send.c
new file mode 100644
index 0000000..2017cfb
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_send.c
@@ -0,0 +1,984 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2012 NEC Corporation
+ *      Author: Masamichi Takagi
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "dcfa_impl.h"
+
+//#define DEBUG_DCFA_SEND
+#ifdef dprintf /* avoid redefinition with src/mpid/ch3/include/mpidimpl.h */
+#undef dprintf
+#endif
+#ifdef DEBUG_DCFA_SEND
+#define dprintf printf
+#else
+#define dprintf(...)
+#endif
+
+static int entered_send_progress = 0;
+
+#ifdef DCFA_ONDEMAND
+// tree format is
+// one or more <left_pointer(int), right_pointer(int), value(int), length(int), string(char[])>
+#define MPID_NEM_DCFA_LPTR(ptr) *(int*)((ptr) + sizeof(int)*0)
+#define MPID_NEM_DCFA_RPTR(ptr) *(int*)((ptr) + sizeof(int)*1)
+#define MPID_NEM_DCFA_VAL(ptr) *(int*)((ptr) + sizeof(int)*2)
+#define MPID_NEM_DCFA_LEN(ptr) *(int*)((ptr) + sizeof(int)*3)
+#define MPID_NEM_DCFA_PBODY(ptr) ((ptr) + sizeof(int)*4)
+
+#define ALLOCATE(map, key, key_length, initial) {                        \
+    if(map->length + key_length + sizeof(int)*4 > map->max_length) { \
+        map->max_length = map->max_length ? map->max_length * 2 : 4096; \
+        map->data = realloc(map->data, map->max_length); \
+    } \
+    char* new_str = map->data + map->length; \
+    MPID_NEM_DCFA_LPTR(new_str) = 0; \
+    MPID_NEM_DCFA_RPTR(new_str) = 0; \
+    MPID_NEM_DCFA_VAL(new_str) = initial; \
+    MPID_NEM_DCFA_LEN(new_str) = key_length; \
+    memcpy(MPID_NEM_DCFA_PBODY(new_str), key, key_length); \
+    map->length += sizeof(int)*4 + key_length; \
+}
+
+void MPID_nem_dcfa_cm_map_set(MPID_nem_dcfa_cm_map_t* map, char* key, int key_length, int val) {
+    char* pTree = map->data;
+    dprintf("MPID_nem_dcfa_cm_map_set,val=%d\n", val);
+    
+	if(!pTree) {
+        ALLOCATE(map, key, key_length, val);
+        dprintf("pTree was empty\n");
+        return;
+    }
+    int s1_minus_s2;
+    while(1) {
+        int lmin = key_length < MPID_NEM_DCFA_LEN(pTree) ? key_length : MPID_NEM_DCFA_LEN(pTree);
+        int residual = key_length - MPID_NEM_DCFA_LEN(pTree);
+        s1_minus_s2 = memcmp(key, MPID_NEM_DCFA_PBODY(pTree), lmin);
+        
+        if(!s1_minus_s2 && !residual) {
+            MPID_NEM_DCFA_VAL(pTree) = val;
+            dprintf("found\n");
+            return; // same string, same length
+        } else if(s1_minus_s2 < 0 || !s1_minus_s2 && residual < 0) { 
+            // psArg is "smaller" OR same substring, psArg is shorter
+            if(MPID_NEM_DCFA_LPTR(pTree) == 0) {
+                MPID_NEM_DCFA_LPTR(pTree) = map->length; // pointer write
+                /* left child */
+                ALLOCATE(map, key, key_length, val);
+                dprintf("stored as left child\n");
+                return;
+            }
+            pTree = map->data + MPID_NEM_DCFA_LPTR(pTree); // go to left child
+        } else {
+            //  psArg is "larger" OR same substring, psArg is longer
+            if(MPID_NEM_DCFA_RPTR(pTree) == 0) {
+                MPID_NEM_DCFA_RPTR(pTree) = map->length; // pointer write
+                /* right child */
+                ALLOCATE(map, key, key_length, val); 
+                dprintf("stored as right child\n");
+                return; 
+            }
+            pTree = map->data + MPID_NEM_DCFA_RPTR(pTree); // go to right child
+        }
+    }
+}
+
+int MPID_nem_dcfa_cm_map_get(MPID_nem_dcfa_cm_map_t* map, char* key, int key_length, int *val) {
+    int llc_errno = LLC_SUCCESS;
+    char* pTree = map->data;
+
+    dprintf("MPID_nem_dcfa_cm_map_get,key=%s\n", key);
+
+	if(!pTree) {
+        llc_errno = -1;
+        dprintf("pTree is empty\n");
+        goto fn_fail;
+    }
+    int s1_minus_s2;
+    while(1) {
+        int lmin = key_length < MPID_NEM_DCFA_LEN(pTree) ? key_length : MPID_NEM_DCFA_LEN(pTree);
+        int residual = key_length - MPID_NEM_DCFA_LEN(pTree);
+        s1_minus_s2 = memcmp(key, MPID_NEM_DCFA_PBODY(pTree), lmin);
+
+	if(!s1_minus_s2 && !residual) {
+        *val = MPID_NEM_DCFA_VAL(pTree);
+        dprintf("value found=%d\n", *val);
+	    goto fn_exit; // same string, same length
+	} else if(s1_minus_s2 < 0 || !s1_minus_s2 && residual < 0) { 
+	    // psArg is "smaller" OR same substring, psArg is shorter
+	    if(MPID_NEM_DCFA_LPTR(pTree) == 0) {
+		llc_errno = -1;
+        dprintf("left is null\n");
+        goto fn_fail;
+	    }
+	    pTree = map->data + MPID_NEM_DCFA_LPTR(pTree); // go to left child
+	} else {
+	    //  psArg is "larger" OR same substring, psArg is longer
+	    if(MPID_NEM_DCFA_RPTR(pTree) == 0) {
+		llc_errno = -1;
+        dprintf("right is null\n");
+        goto fn_fail;
+	    }
+	    pTree = map->data + MPID_NEM_DCFA_RPTR(pTree); // go to right child
+	}
+    }
+ fn_exit:
+    return llc_errno;
+ fn_fail:
+    goto fn_exit;
+}
+#endif
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_iSendContig_core
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+static int MPID_nem_dcfa_iSendContig_core(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
+                              MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    MPID_nem_dcfa_pkt_prefix_t pkt_netmod;
+    void* netmod_hdr;
+    int sz_netmod_hdr;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_ISENDCONTIG_CORE);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_ISENDCONTIG_CORE);
+
+    /* piggy-back SR occupancy info might copy and modify given header */
+
+    /* remote SR sequence number which is last sent */
+    int *rsr_seq_num_tail;
+    ibcom_errno = ibcom_rsr_seq_num_tail_get(vc_dcfa->sc->fd, &rsr_seq_num_tail);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rsr_seq_num_tail_get"); 
+
+    /* remote SR sequence number which is last sent */
+    int *rsr_seq_num_tail_last_sent;
+    ibcom_errno = ibcom_rsr_seq_num_tail_last_sent_get(vc_dcfa->sc->fd, &rsr_seq_num_tail_last_sent);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rsr_seq_num_tail_last_sent_get"); 
+
+    //dprintf("isendcontig,rsr_seq_num_tail=%d,rsr_seq_num_tail_last_sent=%d\n", *rsr_seq_num_tail, *rsr_seq_num_tail_last_sent);
+
+    int notify_rate;
+    ibcom_errno = ibcom_rdmabuf_occupancy_notify_rate_get(vc_dcfa->sc->fd, &notify_rate);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_sq_occupancy_notify_rate_get");
+
+    /* send RDMA-write-to buffer occupancy information */
+    /* embed SR occupancy information and remember the last one sent */
+    MPIDI_CH3_Pkt_t* ch3_hdr = (MPIDI_CH3_Pkt_t*)hdr;
+    if(
+       MPID_nem_dcfa_diff32(*rsr_seq_num_tail, *rsr_seq_num_tail_last_sent) > notify_rate
+       ) {
+#if 1 /* debug, disabling piggy-back */
+        switch(ch3_hdr->type) {
+        case MPIDI_CH3_PKT_EAGER_SEND:
+            pkt_netmod.subtype = MPIDI_NEM_DCFA_PKT_EAGER_SEND;
+            goto common_tail;
+        case MPIDI_CH3_PKT_PUT:
+            pkt_netmod.subtype = MPIDI_NEM_DCFA_PKT_PUT;
+            goto common_tail;
+        case MPIDI_CH3_PKT_ACCUMULATE:
+            pkt_netmod.subtype = MPIDI_NEM_DCFA_PKT_ACCUMULATE;
+            goto common_tail;
+        case MPIDI_CH3_PKT_GET:
+            pkt_netmod.subtype = MPIDI_NEM_DCFA_PKT_GET;
+            goto common_tail;
+        case MPIDI_CH3_PKT_GET_RESP:
+            pkt_netmod.subtype = MPIDI_NEM_DCFA_PKT_GET_RESP;
+        common_tail:
+            pkt_netmod.type = MPIDI_NEM_PKT_NETMOD;
+            pkt_netmod.seq_num_tail = *rsr_seq_num_tail;
+            *rsr_seq_num_tail_last_sent = *rsr_seq_num_tail;
+            netmod_hdr = (void*)&pkt_netmod;
+            sz_netmod_hdr = sizeof(MPID_nem_dcfa_pkt_prefix_t);
+            break;
+        default:
+            netmod_hdr = NULL;
+            sz_netmod_hdr = 0;
+            break;
+        }
+#else
+        netmod_hdr = NULL;
+        sz_netmod_hdr = 0;
+#endif
+    } else {
+        netmod_hdr = NULL;
+        sz_netmod_hdr = 0;
+    }
+
+    /* packet handlers including MPIDI_CH3_PktHandler_EagerSend and MPID_nem_handle_pkt assume this */
+    hdr_sz = sizeof(MPIDI_CH3_Pkt_t);
+
+    /* send myrank as wr_id so that receiver can find vc using MPID_nem_dcfa_conns in poll */
+    /* packet handler of MPIDI_CH3_PKT_EAGER_SEND uses sizeof(MPIDI_CH3_Pkt_t), so ignoring hdr_sz */
+    
+    /* MPIDI_CH3_ReqHandler_GetSendRespComplete, drain_scq decrement it */
+    if(((MPIDI_CH3_Pkt_t*)hdr)->type == MPIDI_CH3_PKT_GET_RESP) {
+        //        MPIR_Request_add_ref(sreq);
+        //printf("isendcontig_core,MPIDI_CH3_PKT_GET_RESP,ref_count=%d\n", sreq->ref_count);
+    }
+
+    /* increment cc because PktHandler_EagerSyncAck, ssend.c, drain_scq decrement it */
+    if(((MPIDI_CH3_Pkt_t*)hdr)->type == MPIDI_CH3_PKT_EAGER_SYNC_SEND) {
+        MPIR_Request_add_ref(sreq);
+    }
+    if(((MPIDI_CH3_Pkt_t*)hdr)->type == MPIDI_CH3_PKT_GET) {
+        //printf("isendcontig_core,MPIDI_CH3_PKT_GET,ref_count=%d\n", sreq->ref_count);
+    }
+    if(hdr&&((MPIDI_CH3_Pkt_t*)hdr)->type == MPIDI_CH3_PKT_ACCUM_IMMED) {
+        dprintf("isendcontig_core,MPIDI_CH3_PKT_ACCUM_IMMED,ref_count=%d\n", sreq->ref_count);
+    }
+    if(hdr&&((MPIDI_CH3_Pkt_t*)hdr)->type == MPIDI_CH3_PKT_ACCUMULATE) {
+        dprintf("isendcontig_core,MPIDI_CH3_PKT_ACCUMULATE,ref_count=%d\n", sreq->ref_count);
+    }
+
+    int msg_type = MPIDI_Request_get_msg_type(sreq);
+
+    dprintf("isendcontig_core,netmod_hdr=%p,sz_netmod_hdr=%d,hdr=%p,sz_hdr=%ld,data=%p,sz_data=%d\n", netmod_hdr, sz_netmod_hdr, hdr, hdr_sz, data, (int)data_sz);
+
+    if(sizeof(MPIDI_CH3_Pkt_t) != hdr_sz) {
+        printf("type=%d,subtype=%d\n", ((MPID_nem_pkt_netmod_t*)hdr)->type, ((MPID_nem_pkt_netmod_t*)hdr)->subtype);
+    }
+
+    int copied;
+    ibcom_errno = ibcom_isend(vc_dcfa->sc->fd, (uint64_t)sreq, netmod_hdr, sz_netmod_hdr, hdr, hdr_sz, data, (int)data_sz, &copied);
+    MPIU_ERR_CHKFATALANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_isend");
+    MPID_nem_dcfa_ncqe += 1;
+    //dprintf("isendcontig_core,ncqe=%d\n", MPID_nem_dcfa_ncqe);
+    dprintf("isendcontig_core,isend,kind=%d,msg_type=%d,copied=%d\n", sreq->kind, msg_type, copied);/*suspicious lines,(if1,on,on,off,if0) works*/
+#if 0
+#define TLBPREF_AHEAD 20//20
+    int tlb_pref_ahd = 4096 * TLBPREF_AHEAD;
+    __asm__ __volatile__
+        ("movq %0, %%rsi;"
+         "movq 0(%%rsi), %%rax;"
+         : : "r"((uint64_t)data + tlb_pref_ahd) : "%rsi", "%rax");
+#endif
+#if 1
+#ifdef __MIC__
+    __asm__ __volatile__
+        ("movq %0, %%rsi;"
+         "vprefetch0 0x00(%%rsi);"
+         "vprefetch0 0x40(%%rsi);"
+         "vprefetch0 0x80(%%rsi);"
+         "vprefetch0 0xc0(%%rsi);"
+         : : "r"((uint64_t)data + 4 * data_sz) : "%rsi");
+#else
+    __asm__ __volatile__
+        ("movq %0, %%rsi;"
+         "prefetchnta 0x00(%%rsi);"
+         "prefetchnta 0x40(%%rsi);"
+         "prefetchnta 0x80(%%rsi);"
+         "prefetchnta 0xc0(%%rsi);"
+         : : "r"((uint64_t)data + 4 * data_sz) : "%rsi");
+#endif
+#endif
+    
+    MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "dcfa_send, fd=%d", vc_dcfa->sc->fd));
+    vc_dcfa->pending_sends += 1;
+    sreq->ch.vc = vc; /* used in poll */
+
+    /* calling drain_scq from progress_send deprives of chance
+       for dcfa_poll to drain-sendq using ncqe
+       however transfers events to
+       (not to reply_seq_num because it's regulated by the rate)
+       fire on dcfa_poll using nces
+       (make SCQ full once, then put one command in sendq,
+       then send-->drain-scq to reduce CQE level under the threashold)
+       so we need to perform 
+       progress_send for all of VCs using nces in dcfa_poll
+       (if we have drain-sendq in dcfa_poll, this isn't needed. */
+#if 0 /* debug,disabling fast-dec-cc when copied */
+    if(copied && !sreq->dev.OnDataAvail) { /* skip poll scq */
+           int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
+           
+           (VC_FIELD(sreq->ch.vc, pending_sends)) -= 1;
+           
+           /* as in the template */
+           reqFn = sreq->dev.OnDataAvail;
+           if (!reqFn){
+               /* MPID_Request_release is called in 
+                  MPI_Wait (in src/mpi/pt2pt/wait.c)
+                    MPIR_Wait_impl (in src/mpi/pt2pt/wait.c)
+                      MPIR_Request_complete (in /src/mpi/pt2pt/mpir_request.c) */
+               int incomplete;
+               MPIDI_CH3U_Request_decrement_cc(sreq, &incomplete);
+               if(!incomplete) { MPIDI_CH3_Progress_signal_completion(); }
+               //dprintf("isendcontig_core,cc_ptr=%d\n", *(sreq->cc_ptr));
+               dprintf("sendcontig_core,copied,complete,req=%p,cc incremented to %d,ref_count=%d\n", sreq, MPIDI_CH3I_progress_completion_count.v, sreq->ref_count);
+               MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
+           } else {
+               MPIDI_VC_t *vc = sreq->ch.vc;
+               int complete = 0;
+               mpi_errno = reqFn(vc, sreq, &complete);
+               if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+               /* not-completed case is not implemented */
+               MPIU_Assert(complete == TRUE);
+               MPIU_Assert(0); /* decrement ref_count and free sreq causes problem */
+           }
+    } else {
+        MPID_nem_dcfa_ncqe_nces += 1; /* it has different meaning, counting non-copied eager-send */
+    }
+#else
+    MPID_nem_dcfa_ncqe_nces += 1; /* it has different meaning, counting non-copied eager-send */
+#endif
+
+#ifndef DISABLE_VAR_OCC_NOTIFY_RATE
+           //dprintf("isendcontig,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);  
+    int *notify_rstate;
+    ibcom_errno = ibcom_rdmabuf_occupancy_notify_rstate_get(vc_dcfa->sc->fd, &notify_rstate);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rdmabuf_occupancy_notify_rstate_get"); 
+
+    dprintf("isendcontig,head=%d,tail=%d,hw=%d\n", vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail, IBCOM_RDMABUF_HIGH_WATER_MARK);
+    /* if the number of slots in RMDA-write-to buffer have hit the high water-mark */
+    if(*notify_rstate == IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_LW &&
+       MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) > IBCOM_RDMABUF_HIGH_WATER_MARK) {
+        dprintf("changing notify_rstate,id=%d\n", vc_dcfa->ibcom->sseq_num);
+        /* remember remote notifying policy so that local can know when to change remote policy back to LW */
+        *notify_rstate = IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW;
+        /* change remote notifying policy of RDMA-write-to buf occupancy info */
+        MPID_nem_dcfa_send_change_rdmabuf_occupancy_notify_state(vc, IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW);
+    }
+    //dprintf("isendcontig_core,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);  
+#endif
+
+  fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_ISENDCONTIG_CORE);
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_iSendContig
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
+                              MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_ISENDCONTIG);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_ISENDCONTIG);
+
+    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "dcfa_iSendContig");
+    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr);
+
+#ifdef DCFA_ONDEMAND
+    if(!vc_dcfa->is_connected) { MPID_nem_dcfa_send_syn(vc); }
+#endif
+
+#if 0
+    /* aggressively perform drain_scq */
+    /* try to clear the road blocks, i.e. ncom, ncqe */
+    if(vc_dcfa->ibcom->ncom >= /*IBCOM_MAX_SQ_CAPACITY*/IBCOM_MAX_SQ_HEIGHT_DRAIN ||
+       MPID_nem_dcfa_ncqe >= /*IBCOM_MAX_CQ_CAPACITY*/IBCOM_MAX_CQ_HEIGHT_DRAIN) {
+        //printf("isendcontig,kick drain_scq\n");
+        ibcom_errno = MPID_nem_dcfa_drain_scq(1); /* set 1st arg to one which means asking it for not calling send_progress because it recursively call isendcontig_core */
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
+    }
+#endif
+    /* set it for drain_scq */
+	MPIDI_Request_set_msg_type( sreq, MPIDI_REQUEST_EAGER_MSG );
+
+#if 0
+    /* anticipating received message releases RDMA-write-to buffer or IB command-queue entry */
+    /* Unexpected state MPIDI_VC_STATE_CLOSED in vc 0xf1fed0 (expecting MPIDI_VC_STATE_ACTIVE)
+       Assertion failed in file src/mpid/ch3/src/ch3u_handle_connection.c at line 326: vc->state == MPIDI_VC_STATE_ACTIVE */
+    if(vc->state == MPIDI_VC_STATE_ACTIVE &&
+       MPID_nem_dcfa_tsc_poll - MPID_nem_dcfa_rdtsc() > MPID_NEM_DCFA_POLL_PERIOD_SEND) {
+        mpi_errno = MPID_nem_dcfa_poll(0);
+        if(mpi_errno) { MPIU_ERR_POP (mpi_errno); }
+    }
+#endif
+
+    int *lsr_seq_num_tail;
+    /* sequence number of (largest) completed send command */
+    ibcom_errno = ibcom_lsr_seq_num_tail_get(vc_dcfa->sc->fd, &lsr_seq_num_tail);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_lsr_seq_num_tail_get");
+    
+    int lsr_seq_num_head;
+    /* sequence number of (largest) in-flight send command */
+    ibcom_errno = ibcom_sseq_num_get(vc_dcfa->sc->fd, &lsr_seq_num_head);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_sseq_num_get");
+    
+    dprintf("isendcontig,%d->%d,type=%d,subtype=%d,data_sz=%ld,ldiff=%d(%d-%d),rdiff=%d(%d-%d)\n", MPID_nem_dcfa_myrank, vc->pg_rank, ((MPIDI_CH3_Pkt_t *)hdr)->type, ((MPID_nem_pkt_netmod_t*)hdr)->subtype, data_sz, 
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail),
+            vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail,
+            MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent),
+            vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent
+            );
+    dprintf("isendcontig,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+    /* if IB command overflow-queue is empty AND local IB command queue isn't full AND remote RDMA-write-to buf isn't getting overrun */
+    MPIDI_CH3_Pkt_t* ch3_hdr = (MPIDI_CH3_Pkt_t*)hdr;
+    MPID_nem_pkt_netmod_t* netmod_hdr = (MPID_nem_pkt_netmod_t *)hdr;
+    /* reserve one slot for control packet bringing sequence number 
+       to avoid dead-lock */
+    int slack =  (
+                 (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD || netmod_hdr->subtype != MPIDI_NEM_DCFA_REQ_SEQ_NUM) &&
+                 (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD || netmod_hdr->subtype != MPIDI_NEM_DCFA_REPLY_SEQ_NUM) &&
+                 (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD || netmod_hdr->subtype != MPIDI_NEM_DCFA_PKT_LMT_GET_DONE) &&
+                  ch3_hdr->type != MPIDI_NEM_PKT_LMT_RTS && 
+                  ch3_hdr->type != MPIDI_NEM_PKT_LMT_CTS
+                  ) ? IBCOM_AMT_SLACK : 0;
+    /* make control packet bringing sequence number go ahead of 
+       queued packets to avoid dead-lock */
+    int goahead = 
+        (ch3_hdr->type == MPIDI_NEM_PKT_NETMOD && netmod_hdr->subtype == MPIDI_NEM_DCFA_REQ_SEQ_NUM) ||
+        (ch3_hdr->type == MPIDI_NEM_PKT_NETMOD && netmod_hdr->subtype == MPIDI_NEM_DCFA_REPLY_SEQ_NUM) ||
+        (ch3_hdr->type == MPIDI_NEM_PKT_NETMOD && netmod_hdr->subtype == MPIDI_NEM_DCFA_PKT_LMT_GET_DONE)
+        ? 1 : 0;
+    dprintf("isendcontig,slack=%d,goahead=%d\n", slack, goahead);
+
+    if(
+#ifdef DCFA_ONDEMAND
+       vc_dcfa->is_connected &&
+#endif
+       (goahead || MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)) &&
+       vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY - slack &&
+       MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY - slack &&
+       MPID_nem_dcfa_diff32(lsr_seq_num_head, *lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG - slack) {
+
+        mpi_errno = MPID_nem_dcfa_iSendContig_core(vc, sreq, hdr, hdr_sz, data, data_sz);
+        if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+
+    } else {
+
+        /* enqueue command into send_queue */
+        dprintf("isendcontig,enqueuing,sendq=%d,ncom=%d,ncqe=%d,ldiff=%d(%d-%d),slack=%d\n",
+                (goahead || MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq)),
+                vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY - slack,
+                MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY - slack,
+                MPID_nem_dcfa_diff32(lsr_seq_num_head, *lsr_seq_num_tail),
+                vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail,
+                slack
+               );
+
+        /* store required info. see MPIDI_CH3_iSendv in src/mpid/ch3/channels/nemesis/src/ch3_isendv.c */
+        sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) hdr;
+        sreq->dev.iov[0].MPID_IOV_BUF = (char *)&sreq->dev.pending_pkt;
+        sreq->dev.iov[0].MPID_IOV_LEN = hdr_sz;
+        sreq->dev.iov[1].MPID_IOV_BUF = (char *)data;
+        sreq->dev.iov[1].MPID_IOV_LEN = data_sz;
+        
+        sreq->dev.iov_count = 2;
+        sreq->dev.iov_offset = 0;
+        sreq->ch.noncontig = FALSE; /* used in send_progress */
+        sreq->ch.vc = vc;
+
+        if(data_sz > 0) {
+            dprintf("isendcontig,hdr=%p,hdr_sz=%ld,data=%p,data_sz=%ld,*(sreq->dev.iov[1].MPID_IOV_BUF)=%08x,sz=%ld,sz=%ld\n", hdr, hdr_sz, data, data_sz, *((uint32_t*)sreq->dev.iov[1].MPID_IOV_BUF), sizeof(sreq->dev.pending_pkt), sizeof(MPIDI_CH3_Pkt_t));
+        }
+
+        /* enqueue control message telling tail position of ring buffer for eager-send
+           at the head of software MPI command queue. We explain the reason. Consider this case.
+           rank-0 performs 64 eager-sends and 48 of them are enqueued.
+           rank-1 consumes 2 of them and send the control message.
+           rank-0 drains 2 commands from the command queue.
+           ...
+           rank-0 finds that head of ring buffer for receiving messages from rank-1 is
+              growing by the control message from rank-1 and try to send the control message,
+              but the command is queued at the tail.
+           rank-1 stops sending the control message to rank-1 because the ring buffer is full
+           rank-0 stops draining command queue.
+        */
+        dprintf("isendcontig,enqueuing,type=%d,\n", ((MPIDI_CH3_Pkt_t *)hdr)->type);
+#if 0
+        if(((MPIDI_CH3_Pkt_t *)hdr)->type == MPIDI_NEM_DCFA_REPLY_SEQ_NUM) {
+            printf("enqueuing REPLY_SEQ_NUM\ %d->%d,%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, MPID_nem_dcfa_ncqe);
+        }
+        //if(((MPIDI_CH3_Pkt_t *)hdr)->type == MPIDI_CH3_PKT_ACCUMULATE) {
+        //printf("enqueuing ACCUMULATE\n");
+        //}
+        if(((MPIDI_CH3_Pkt_t *)hdr)->type == MPIDI_CH3_PKT_GET_RESP) {
+            printf("enqueuing GET_RESP\n");
+        }
+        if(((MPIDI_CH3_Pkt_t *)hdr)->type == MPIDI_CH3_PKT_GET) {
+            printf("enqueuing GET\n");
+        }
+        if(((MPIDI_CH3_Pkt_t *)hdr)->type == MPIDI_CH3_PKT_PUT) {
+            printf("enqueuing PUT\n");
+        }
+#endif
+        if(((MPIDI_CH3_Pkt_t *)hdr)->type == MPIDI_NEM_PKT_LMT_DONE) {
+            dprintf("isendcontig,enqueue,DONE\n");
+        }
+        if(((MPIDI_CH3_Pkt_t *)hdr)->type == MPIDI_NEM_DCFA_REPLY_SEQ_NUM) {
+            dprintf("isendcontig,REPLY_SEQ_NUM,enqueue_at_head\n");
+            MPID_nem_dcfa_sendq_enqueue_at_head(&vc_dcfa->sendq, sreq);
+        } else {
+            MPID_nem_dcfa_sendq_enqueue(&vc_dcfa->sendq, sreq);
+        }
+        /* we don't need to perform send_progress() here because
+           the events where RDMA-write-to buffer release is detected or release of IB command queue id detected happens
+           only after dcfa_poll is called. it's different than the case where write(2) is used */
+    }
+    
+  fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_ISENDCONTIG);
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_iStartContigMsg
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, MPIDI_msg_sz_t hdr_sz,
+                                  void *data, MPIDI_msg_sz_t data_sz, MPID_Request ** sreq_ptr)
+{
+    MPID_Request *sreq = NULL;
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    int sseq_num;
+    //uint64_t tscs, tsce;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_ISTARTCONTIGMSG);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_ISTARTCONTIGMSG);
+    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "dcfa_iStartContigMsg");
+    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr);
+
+    /* FIXME: avoid creating a request when not queued */
+
+    if(hdr&&((MPIDI_CH3_Pkt_t*)hdr)->type == MPIDI_CH3_PKT_GET) {
+        //printf("istarctontig,MPIDI_CH3_PKT_GET,ref_count=%d\n", sreq->ref_count);
+        /* sreq here is released by drain_scq, caller
+           request in MPIDI_CH3I_Recv_rma_msg is
+           released by PKT_GET_RESP, MPIDI_CH3I_RMAListComplete*/
+    }
+
+    //tscs = MPID_nem_dcfa_rdtsc();
+    sreq = MPID_Request_create();
+    MPIU_Assert (sreq != NULL);
+    MPIU_Object_set_ref (sreq, 2);
+    sreq->kind = MPID_REQUEST_SEND;
+    sreq->dev.OnDataAvail = 0;
+    //tsce = MPID_nem_dcfa_rdtsc(); printf("rc,%ld\n", tsce - tscs); // 124.15 cycles
+
+#if 0
+    ibcom_errno = ibcom_sseq_num_get(vc_dcfa->sc->fd, &sseq_num);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_sseq_num_get");
+
+    if(hdr) {
+
+        MPIDI_CH3_Pkt_t *pkt = (MPIDI_CH3_Pkt_t*)hdr;
+        MPIDI_CH3_Pkt_close_t * close_pkt = &pkt->close;
+        dprintf("isend(istartcontig),%d->%d,seq_num=%d,type=%d,ack=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, sseq_num, close_pkt->type, close_pkt->ack);
+    } else {
+        dprintf("isend(istartcontig),%d->%d,seq_num=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, sseq_num);
+    }
+#endif
+
+    mpi_errno = MPID_nem_dcfa_iSendContig(vc, sreq, hdr, hdr_sz, data, data_sz);
+    if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+
+  fn_exit:
+    *sreq_ptr = sreq;
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_ISTARTCONTIGMSG);
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_SendNoncontig_core
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_SendNoncontig_core(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr, MPIDI_msg_sz_t hdr_sz) {
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    MPIDI_msg_sz_t last;
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    int sseq_num;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_SENDNONCONTIG_CORE);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_SENDNONCONTIG_CORE);
+
+    MPIU_Assert(sreq->dev.segment_first == 0);
+    last = sreq->dev.segment_size; /* segment_size is byte offset */
+    if (last > 0) {
+        REQ_FIELD(sreq, lmt_pack_buf) = MPIU_Malloc((size_t)sreq->dev.segment_size);
+        MPIU_ERR_CHKANDJUMP(!REQ_FIELD(sreq, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER, "**outofmemory");
+        MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, (char *)REQ_FIELD(sreq, lmt_pack_buf));
+        MPIU_Assert(last == sreq->dev.segment_size);
+    }
+        
+    /* increment cc because PktHandler_EagerSyncAck, ssend.c, drain_scq decrement it */
+    if(((MPIDI_CH3_Pkt_t*)hdr)->type == MPIDI_CH3_PKT_EAGER_SYNC_SEND) {
+        MPIR_Request_add_ref(sreq);
+    }
+
+    ibcom_errno = ibcom_sseq_num_get(vc_dcfa->sc->fd, &sseq_num);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno != 0, mpi_errno, MPI_ERR_OTHER, "**ibcom_sseq_num_get");
+        
+    int copied;
+    dprintf("sendnoncontig_core,isend,%d->%d,seq_num=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, sseq_num);
+    ibcom_errno = ibcom_isend(vc_dcfa->sc->fd, (uint64_t)sreq, NULL, 0, hdr, sizeof(MPIDI_CH3_Pkt_t), (void*)REQ_FIELD(sreq, lmt_pack_buf), (int)last, &copied);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno != 0, mpi_errno, MPI_ERR_OTHER, "**ibcom_isend");
+    MPID_nem_dcfa_ncqe += 1;
+    dprintf("sendnoncontig_core,ncqe=%d\n", MPID_nem_dcfa_ncqe);
+
+    vc_dcfa->pending_sends += 1;
+    sreq->ch.vc = vc; /* used in poll */
+
+#if 0 /* see contig */
+    if(copied) { /* skip poll scq */
+           int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
+           
+           (VC_FIELD(sreq->ch.vc, pending_sends)) -= 1;
+           
+           /* as in the template */
+           reqFn = sreq->dev.OnDataAvail;
+           if (!reqFn){
+               /* MPID_Request_release is called in 
+                  MPI_Wait (in src/mpi/pt2pt/wait.c)
+                    MPIR_Wait_impl (in src/mpi/pt2pt/wait.c)
+                      MPIR_Request_complete (in /src/mpi/pt2pt/mpir_request.c) */
+               int incomplete;
+               MPIDI_CH3U_Request_decrement_cc(sreq, &incomplete);
+               if(!incomplete) { MPIDI_CH3_Progress_signal_completion(); }
+               //dprintf("isendcontig_core,cc_ptr=%d\n", *(sreq->cc_ptr));
+               dprintf("sendcontig_core,complete,req=%p,cc incremented to %d\n", sreq, MPIDI_CH3I_progress_completion_count.v);
+               MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
+           } else {
+               MPIDI_VC_t *vc = sreq->ch.vc;
+               int complete = 0;
+               mpi_errno = reqFn(vc, sreq, &complete);
+               if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+               /* not-completed case is not implemented */
+               MPIU_Assert(complete == TRUE);
+               MPIU_Assert(0); /* decrement ref_count and free sreq causes problem */
+           }
+    } else {
+        MPID_nem_dcfa_ncqe_nces += 1; /* it has different meaning, counting non-copied eager-short */
+    }
+#else
+        MPID_nem_dcfa_ncqe_nces += 1; /* it has different meaning, counting non-copied eager-short */
+#endif
+
+#ifndef DISABLE_VAR_OCC_NOTIFY_RATE
+#if 1
+           //dprintf("isendcontig,old rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);  
+    int *notify_rstate;
+    ibcom_errno = ibcom_rdmabuf_occupancy_notify_rstate_get(vc_dcfa->sc->fd, &notify_rstate);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_rdmabuf_occupancy_notify_rstate_get"); 
+
+    dprintf("isendcontig,head=%d,tail=%d,hw=%d\n", vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail, IBCOM_RDMABUF_HIGH_WATER_MARK);
+    /* if the number of slots in RMDA-write-to buffer have hit the high water-mark */
+    if(*notify_rstate == IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_LW &&
+       MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) > IBCOM_RDMABUF_HIGH_WATER_MARK) {
+        dprintf("changing notify_rstate,id=%d\n", vc_dcfa->ibcom->sseq_num);
+        /* remember remote notifying policy so that local can know when to change remote policy back to LW */
+        *notify_rstate = IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW;
+        /* change remote notifying policy of RDMA-write-to buf occupancy info */
+        MPID_nem_dcfa_send_change_rdmabuf_occupancy_notify_state(vc, IBCOM_RDMABUF_OCCUPANCY_NOTIFY_STATE_HW);
+    }
+    //dprintf("isendcontig_core,new rstate=%d\n", vc_dcfa->ibcom->rdmabuf_occupancy_notify_rstate);  
+#endif
+#endif
+  fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_SENDNONCONTIG_CORE);
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_SendNoncontig
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr, MPIDI_msg_sz_t hdr_sz) {
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    MPIDI_msg_sz_t last;
+    MPID_nem_dcfa_vc_area *vc_dcfa = VC_DCFA(vc);
+    int sseq_num;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_SENDNONCONTIG);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_SENDNONCONTIG);
+    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "dcfa_SendNoncontig");
+
+    dprintf("sendnoncontig,%d->%d,sendq_empty=%d,ncom=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_myrank, vc->pg_rank, MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+#if 0
+    /* aggressively perform drain_scq */
+    /* try to clear the road blocks, i.e. ncom, ncqe */
+    if(vc_dcfa->ibcom->ncom >= /*IBCOM_MAX_SQ_CAPACITY*/IBCOM_MAX_SQ_HEIGHT_DRAIN ||
+       MPID_nem_dcfa_ncqe >= /*IBCOM_MAX_CQ_CAPACITY*/IBCOM_MAX_CQ_HEIGHT_DRAIN) {
+        //printf("isendcontig,kick drain_scq\n");
+        ibcom_errno = MPID_nem_dcfa_drain_scq(1); /* set 1st arg to one which means asking it for not calling send_progress because it recursively call isendcontig_core */
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
+    }
+#endif
+    /* set it for drain_scq */
+	MPIDI_Request_set_msg_type( sreq, MPIDI_REQUEST_EAGER_MSG );
+
+    /* if IB command overflow-queue is empty AND local IB command queue isn't full AND remote RDMA-write-to buf isn't getting overrun */
+    /* set it for drain_scq */
+    int slack = IBCOM_AMT_SLACK; /* slack for control packet bringing sequence number */
+    if(MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq) &&
+       vc_dcfa->ibcom->ncom < IBCOM_MAX_SQ_CAPACITY - slack &&
+       MPID_nem_dcfa_ncqe < IBCOM_MAX_CQ_CAPACITY - slack &&
+       MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) < IBCOM_RDMABUF_NSEG - slack) {
+
+        mpi_errno = MPID_nem_dcfa_SendNoncontig_core(vc, sreq, hdr, hdr_sz);
+        if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+
+    } else {
+        /* enqueue command into send_queue */
+        dprintf("sendnoncontig, enqueuing");
+
+        /* store required info. see MPIDI_CH3_iSendv in src/mpid/ch3/channels/nemesis/src/ch3_isendv.c */
+        sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) hdr;
+        sreq->dev.iov[0].MPID_IOV_BUF = (char *)&sreq->dev.pending_pkt;
+        sreq->dev.iov[0].MPID_IOV_LEN = hdr_sz;
+        
+        sreq->dev.iov_count = 1;
+        sreq->dev.iov_offset = 0;
+        sreq->ch.noncontig = TRUE;
+        sreq->ch.vc = vc;
+
+        MPID_nem_dcfa_sendq_enqueue(&vc_dcfa->sendq, sreq);
+    }
+
+  fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_SENDNONCONTIG);
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
+
+/* see MPIDI_CH3I_Shm_send_progress (in src/mpid/ch3/channels/nemesis/src/ch3_progress.c) */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_send_progress
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_send_progress(MPID_nem_dcfa_vc_area *vc_dcfa)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ibcom_errno;
+    MPID_IOV *iov;
+    int n_iov;
+    MPID_Request *sreq, *prev_sreq;
+    int again = 0;
+    int msg_type;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_SEND_PROGRESS);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_SEND_PROGRESS);
+
+    //dprintf("send_progress,enter\n");
+
+    /* prevent a call path send_progress -> drain_scq -> send_progress */
+    if(entered_send_progress) { goto fn_exit; }
+    entered_send_progress = 1;
+
+    sreq = MPID_nem_dcfa_sendq_head(vc_dcfa->sendq);
+    if(sreq) {
+       prev_sreq = NULL;
+       do {
+#if 0
+    /* aggressively perform drain_scq */
+    /* try to clear the road blocks, i.e. ncom, ncqe */
+    if(vc_dcfa->ibcom->ncom >= /*IBCOM_MAX_SQ_CAPACITY*/IBCOM_MAX_SQ_HEIGHT_DRAIN ||
+       MPID_nem_dcfa_ncqe >= /*IBCOM_MAX_CQ_CAPACITY*/IBCOM_MAX_CQ_HEIGHT_DRAIN) {
+        dprintf("send_progress,kick drain_scq\n");
+        ibcom_errno = MPID_nem_dcfa_drain_scq(1); /* set 1st arg to one which means asking it for not calling send_progress because it recursively call isendcontig_core */
+        MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq");
+    }
+#endif
+    msg_type = MPIDI_Request_get_msg_type(sreq);
+
+           MPIDI_CH3_Pkt_t* ch3_hdr = (MPIDI_CH3_Pkt_t *)sreq->dev.iov[0].MPID_IOV_BUF;
+           MPID_nem_pkt_netmod_t* netmod_hdr = (MPID_nem_pkt_netmod_t *)sreq->dev.iov[0].MPID_IOV_BUF;
+           int slack = (msg_type == MPIDI_REQUEST_EAGER_MSG) ? /* guard from RDMA-read or RDMA-write */
+               (
+                (
+                 (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD || netmod_hdr->subtype != MPIDI_NEM_DCFA_REQ_SEQ_NUM) &&
+                 (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD || netmod_hdr->subtype != MPIDI_NEM_DCFA_REPLY_SEQ_NUM) &&
+                 (ch3_hdr->type != MPIDI_NEM_PKT_NETMOD || netmod_hdr->subtype != MPIDI_NEM_DCFA_PKT_LMT_GET_DONE) &&
+                 ch3_hdr->type != MPIDI_NEM_PKT_LMT_RTS && 
+                 ch3_hdr->type != MPIDI_NEM_PKT_LMT_CTS
+                 ) ? IBCOM_AMT_SLACK : 0
+                ) : IBCOM_AMT_SLACK;
+           if(vc_dcfa->ibcom->ncom >= IBCOM_MAX_SQ_CAPACITY - slack ||
+              MPID_nem_dcfa_ncqe >= IBCOM_MAX_CQ_CAPACITY - slack ||
+              MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail) >= IBCOM_RDMABUF_NSEG - slack) {
+               break; 
+           }
+
+
+           if(vc_dcfa != MPID_nem_dcfa_debug_current_vc_dcfa) {
+               dprintf("send_progress,vc_dcfa != MPID_nem_dcfa_debug_current_vc_dcfa\n");
+           }
+           dprintf("send_progress,kind=%d,msg_type=%d\n", sreq->kind, msg_type);
+           if(msg_type == MPIDI_REQUEST_EAGER_MSG) {
+               dprintf("send_progress,type=%d\n", ch3_hdr->type);
+           }
+           dprintf("send_progress,%d->%d,rdiff=%d(%d-%d),ldiff=%d(%d-%d),slack=%d\n", MPID_nem_dcfa_myrank, sreq->ch.vc->pg_rank, 
+                   MPID_nem_dcfa_diff32(vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent),
+                   vc_dcfa->ibcom->rsr_seq_num_tail, vc_dcfa->ibcom->rsr_seq_num_tail_last_sent,
+                   MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail),
+                   vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail,
+                   slack
+                   );
+    if(sreq->kind == MPID_REQUEST_SEND && msg_type == MPIDI_REQUEST_EAGER_MSG) {
+        if(!sreq->ch.noncontig) {
+            dprintf("send_progress,contig,type=%d,sseq_num=%d,MPIDI_NEM_PKT_LMT_RTS=%d,MPIDI_NEM_DCFA_PKT_LMT_GET_DONE=%d\n", ch3_hdr->type, vc_dcfa->ibcom->sseq_num, MPIDI_NEM_PKT_LMT_RTS, MPIDI_NEM_DCFA_PKT_LMT_GET_DONE);
+            if(sreq->dev.iov[1].MPID_IOV_LEN > 0) { dprintf("send_progress,send,contig,sreq->dev.iov[1].MPID_IOV_BUF)=%p,*(sreq->dev.iov[1].MPID_IOV_BUF)=%08x\n", sreq->dev.iov[1].MPID_IOV_BUF, *((uint32_t*)sreq->dev.iov[1].MPID_IOV_BUF)); }
+            MPIU_Assert(sreq->dev.iov_count > 0);
+            
+            switch(ch3_hdr->type) {
+            /* send current rsr_seq_num_tail because message from target to initiator
+               might have happened while being queued */
+            case MPIDI_NEM_PKT_LMT_RTS: {
+                MPID_nem_dcfa_lmt_cookie_t* s_cookie_buf = (MPID_nem_dcfa_lmt_cookie_t*)sreq->dev.iov[1].MPID_IOV_BUF;
+                dprintf("send_progress,MPIDI_NEM_PKT_LMT_RTS,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
+                /* embed RDMA-write-to buffer occupancy information */
+                s_cookie_buf->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail;
+                /* remember the last one sent */
+                vc_dcfa->ibcom->rsr_seq_num_tail_last_sent = vc_dcfa->ibcom->rsr_seq_num_tail;
+                break; }
+                
+            case MPIDI_NEM_PKT_LMT_CTS: {
+                MPID_nem_dcfa_lmt_cookie_t* s_cookie_buf = (MPID_nem_dcfa_lmt_cookie_t*)sreq->dev.iov[1].MPID_IOV_BUF;
+                dprintf("send_progress,MPIDI_NEM_PKT_LMT_CTS,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
+                /* embed RDMA-write-to buffer occupancy information */
+                s_cookie_buf->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail;
+                /* remember the last one sent */
+                vc_dcfa->ibcom->rsr_seq_num_tail_last_sent = vc_dcfa->ibcom->rsr_seq_num_tail;
+                break; }
+
+            default:;
+            }
+
+            if(ch3_hdr->type == MPIDI_NEM_PKT_NETMOD) {
+                switch(netmod_hdr->subtype) {
+                /* send current rsr_seq_num_tail because message from target to initiator
+                   might have happened while being queued */
+                case MPIDI_NEM_DCFA_PKT_LMT_GET_DONE: {
+                    MPID_nem_dcfa_pkt_lmt_get_done_t* _done_pkt = (MPID_nem_dcfa_pkt_lmt_get_done_t*)sreq->dev.iov[0].MPID_IOV_BUF;
+                    dprintf("send_progress,MPIDI_NEM_DCFA_PKT_LMT_GET_DONE,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
+                    /* embed SR occupancy information */
+                    _done_pkt->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail;
+                    /* remember the last one sent */
+                    vc_dcfa->ibcom->rsr_seq_num_tail_last_sent = vc_dcfa->ibcom->rsr_seq_num_tail;
+                    break; }
+                case MPIDI_NEM_DCFA_REPLY_SEQ_NUM: {
+                    MPID_nem_dcfa_pkt_reply_seq_num_t* _pkt = (MPID_nem_dcfa_pkt_reply_seq_num_t*)sreq->dev.iov[0].MPID_IOV_BUF;
+                    dprintf("send_progress,MPIDI_NEM_DCFA_REPLY_SEQ_NUM,rsr_seq_num_tail=%d\n", vc_dcfa->ibcom->rsr_seq_num_tail);
+                    /* embed SR occupancy information */
+                    _pkt->seq_num_tail = vc_dcfa->ibcom->rsr_seq_num_tail;
+                    /* remember the last one sent */
+                    vc_dcfa->ibcom->rsr_seq_num_tail_last_sent = vc_dcfa->ibcom->rsr_seq_num_tail;
+                    break; }
+                    
+                default:;
+                }
+            }
+
+
+            mpi_errno = MPID_nem_dcfa_iSendContig_core(sreq->ch.vc, sreq, sreq->dev.iov[0].MPID_IOV_BUF, sreq->dev.iov[0].MPID_IOV_LEN, sreq->dev.iov[1].MPID_IOV_BUF, sreq->dev.iov[1].MPID_IOV_LEN);
+            if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+        } else {
+            dprintf("send_progress,send,noncontig\n");
+            mpi_errno = MPID_nem_dcfa_SendNoncontig_core(sreq->ch.vc, sreq, sreq->dev.iov[0].MPID_IOV_BUF, sreq->dev.iov[0].MPID_IOV_LEN);
+            if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+        }
+    } else if(sreq->kind == MPID_REQUEST_RECV && msg_type == MPIDI_REQUEST_RNDV_MSG) {
+
+            dprintf("send_progress,kick lmt_start_recv_core,prev=%p,next=%p\n", prev_sreq, MPID_nem_dcfa_sendq_next(sreq));
+            mpi_errno = MPID_nem_dcfa_lmt_start_recv_core(sreq, REQ_FIELD(sreq, lmt_raddr), REQ_FIELD(sreq, lmt_rkey), REQ_FIELD(sreq, lmt_write_to_buf));
+            if(mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+    } else if(sreq->kind == MPID_REQUEST_SEND && msg_type == MPIDI_REQUEST_RNDV_MSG) {
+    } else {
+        dprintf("send_progress,unknown sreq->type=%d,msg_type=%d\n", sreq->kind, msg_type);
+        assert(0);
+        MPIU_ERR_INTERNALANDJUMP(mpi_errno, "send_progress,unknown type");
+    }
+
+
+           /* unlink sreq */
+           if(prev_sreq != NULL) {
+               MPID_nem_dcfa_sendq_next(prev_sreq) = MPID_nem_dcfa_sendq_next(sreq);
+           } else {
+               MPID_nem_dcfa_sendq_head(vc_dcfa->sendq) = MPID_nem_dcfa_sendq_next(sreq);
+           }
+           if(MPID_nem_dcfa_sendq_next(sreq) == NULL) { vc_dcfa->sendq.tail = prev_sreq; }
+
+           /* save sreq->dev.next (and sreq) because decrementing reference-counter might free sreq */
+           MPID_Request *tmp_sreq = sreq;
+           sreq = MPID_nem_dcfa_sendq_next(sreq);
+           goto next_unlinked;
+       next:
+           prev_sreq = sreq;
+           sreq = MPID_nem_dcfa_sendq_next(sreq);
+       next_unlinked:;
+       } while(sreq);
+    }
+
+ out:
+
+    //dprintf("send_progress,exit,sendq_empty=%d,ncom=%d,ncqe=%d,rdmabuf_occ=%d\n", MPID_nem_dcfa_sendq_empty(vc_dcfa->sendq), vc_dcfa->ibcom->ncom, MPID_nem_dcfa_ncqe, MPID_nem_dcfa_diff32(vc_dcfa->ibcom->sseq_num, vc_dcfa->ibcom->lsr_seq_num_tail));
+
+ fn_exit:
+    entered_send_progress = 0;
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_SEND_PROGRESS);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#ifdef DCFA_ONDEMAND
+int MPID_nem_dcfa_cm_send_core(int rank, MPID_nem_dcfa_cm_cmd_t* cmd) {
+    IbCom* ibcom_scratch_pad;
+    ibcom_errno = ibcom_obtain_pointer(MPID_nem_dcfa_scratch_pad_fds[rank], &ibcom_scratch_pad); 
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_obtain_pointer");
+
+    if(MPID_nem_dcfa_ncqe_scratch_pad >= IBCOM_MAX_SQ_CAPACITY || ibcom_scratch_pad->ncom_scratch_pad >= IBCOM_MAX_CQ_CAPACITY) {
+        mpi_errno = MPID_nem_dcfa_drain_scq_scratch_pad();
+        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_dcfa_drain_scq_scratch_pad");
+    }
+
+    ibcom_errno = ibcom_put_scratch_pad(MPID_nem_dcfa_scratch_pad_fds[rank], (uint64_t)ibcom_scratch_pad, sizeof(uint32_t), sizeof(MPID_nem_dcfa_cm_cmd_t), (void*)cmd);
+    MPIU_ERR_CHKANDJUMP(ibcom_errno, mpi_errno, MPI_ERR_OTHER, "**ibcom_put_scratch_pad");
+    MPID_nem_dcfa_ncqe_scratch_pad += 1;
+
+    /* atomic write to doorbell */
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_dcfa_cm_connect
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPID_nem_dcfa_cm_connect(MPIDI_VC_t * vc) {
+    int mpi_errno = MPI_SUCCESS;
+    int val;
+    MPID_nem_dcfa_cm_cmd_t cmd;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_DCFA_CM_CONNECT);
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_DCFA_CM_CONNECT);
+
+    dprintf("connect,enter\n");
+
+    cmd.type = MPID_NEM_DCFA_CM_SYN;
+    mpi_errno = MPID_nem_dcfa_cm_send_core(rank, &cmd);
+    MPIU_ERR_CHKANDJUMP(mp_errno, mpi_errno, MPI_ERR_OTHER, "MPID_nem_dcfa_cm_put"); 
+    
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_DCFA_CM_CONNECT);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+#endif
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/errnames.txt b/src/mpid/ch3/channels/nemesis/netmod/dcfa/errnames.txt
new file mode 100644
index 0000000..c6b16ef
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/errnames.txt
@@ -0,0 +1,53 @@
+
+**MPIDI_PG_GetConnKVSname:MPIDI_PG_GetConnKVSname failed
+**MPID_nem_dcfa_cm_accept:MPID_nem_dcfa_cm_accept failed
+**MPID_nem_dcfa_cm_drain_scq:MPID_nem_dcfa_cm_drain_scq failed
+**MPID_nem_dcfa_cm_poll:MPID_nem_dcfa_cm_poll failed
+**MPID_nem_dcfa_cm_put:MPID_nem_dcfa_cm_put failed
+**MPID_nem_dcfa_cm_send_core:MPID_nem_dcfa_cm_send_core failed
+**MPID_nem_dcfa_drain_scq:MPID_nem_dcfa_drain_scq failed
+**MPID_nem_dcfa_drain_scq_lmt_put:MPID_nem_dcfa_drain_scq_lmt_put failed
+**MPID_nem_dcfa_drain_scq_scratch_pad:MPID_nem_dcfa_drain_scq_scratch_pad failed
+**MPID_nem_dcfa_kvs_put_binary:MPID_nem_dcfa_kvs_put_binary failed
+**MPID_nem_dcfa_lmt_done_recv:MPID_nem_dcfa_lmt_done_recv failed
+**MPID_nem_dcfa_lmt_done_send:MPID_nem_dcfa_lmt_done_send failed
+**MPID_nem_dcfa_npollingset:MPID_nem_dcfa_npollingset failed
+**MPID_nem_dcfa_poll:MPID_nem_dcfa_poll failed
+**MPID_nem_dcfa_send_change_rdmabuf_occupancy_notify_state:MPID_nem_dcfa_send_change_rdmabuf_occupancy_notify_state failed
+**MPID_nem_dcfa_send_reply_seq_num:MPID_nem_dcfa_send_reply_seq_num failed
+**MPID_nem_dcfa_send_req_seq_num:MPID_nem_dcfa_send_req_seq_num failed
+**PMI_Barrier:PMI_Barrier failed
+**PMI_KVS_Get:PMI_KVS_Get failed
+**PMI_KVS_Put:PMI_KVS_Put failed
+**PMS_KVS_Get:PMS_KVS_Get failed
+**donepkt:donepkt failed
+**ibcomOpen:ibcomOpen failed
+**ibcom_alloc:ibcom_alloc failed
+**ibcom_close:ibcom_close failed
+**ibcom_get_info_conn:ibcom_get_info_conn failed
+**ibcom_get_info_mr:ibcom_get_info_mr failed
+**ibcom_irecv:ibcom_irecv failed
+**ibcom_isend:ibcom_isend failed
+**ibcom_lrecv:ibcom_lrecv failed
+**ibcom_lsr_seq_num_tail_get:ibcom_lsr_seq_num_tail_get failed
+**ibcom_mem_udwr_from:ibcom_mem_udwr_from failed
+**ibcom_mem_udwr_to:ibcom_mem_udwr_to failed
+**ibcom_obtain_pointer:ibcom_obtain_pointer failed
+**ibcom_poll_cq %s:ibcom_poll_cq failed with error %s
+**ibcom_poll_cq:ibcom_poll_cq failed
+**ibcom_put_scratch_pad:ibcom_put_scratch_pad failed
+**ibcom_rdmabuf_occupancy_notify_lstate_get:ibcom_rdmabuf_occupancy_notify_lstate_get failed
+**ibcom_rdmabuf_occupancy_notify_rate_get:ibcom_rdmabuf_occupancy_notify_rate_get failed
+**ibcom_rdmabuf_occupancy_notify_rstate_get:ibcom_rdmabuf_occupancy_notify_rstate_get failed
+**ibcom_reg_mr_connect:ibcom_reg_mr_connect failed
+**ibcom_reg_mr_fetch:ibcom_reg_mr_fetch failed
+**ibcom_rsr_seq_num_tail_get:ibcom_rsr_seq_num_tail_get failed
+**ibcom_rsr_seq_num_tail_last_sent_get:ibcom_rsr_seq_num_tail_last_sent_get failed
+**ibcom_rts:ibcom_rts failed
+**ibcom_sq_occupancy_notify_rate_get:ibcom_sq_occupancy_notify_rate_get failed
+**ibcom_sseq_num_get:ibcom_sseq_num_get failed
+**ibcom_udrecv:ibcom_udrecv failed
+**netmod,dcfa,ibv_poll_cq:netmod,dcfa,ibv_poll_cq failed
+**notimplemented:notimplemented failed
+**outofmemory:outofmemory failed
+**sizeof(MPIDI_CH3_Pkt_t):sizeof(MPIDI_CH3_Pkt_t) failed
diff --git a/src/mpid/ch3/channels/nemesis/netmod/dcfa/subconfigure.m4 b/src/mpid/ch3/channels/nemesis/netmod/dcfa/subconfigure.m4
new file mode 100644
index 0000000..9274887
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/dcfa/subconfigure.m4
@@ -0,0 +1,36 @@
+[#] start of __file__
+dnl MPICH_SUBCFG_AFTER=src/mpid/ch3/channels/nemesis
+
+AC_DEFUN([PAC_SUBCFG_PREREQ_]PAC_SUBCFG_AUTO_SUFFIX,[
+    AM_COND_IF([BUILD_CH3_NEMESIS],[
+        for net in $nemesis_networks ; do
+            AS_CASE([$net],[dcfa],[build_nemesis_netmod_dcfa=yes])
+        done
+    ])
+    AM_CONDITIONAL([BUILD_NEMESIS_NETMOD_DCFA],[test "X$build_nemesis_netmod_dcfa" = "Xyes"])
+
+    # check if getpagesize is available
+    AC_CHECK_FUNCS(getpagesize)
+])dnl
+
+AC_DEFUN([PAC_SUBCFG_BODY_]PAC_SUBCFG_AUTO_SUFFIX,[
+AM_COND_IF([BUILD_NEMESIS_NETMOD_DCFA],[
+    AC_MSG_NOTICE([RUNNING CONFIGURE FOR ch3:nemesis:dcfa])
+
+    AC_ARG_ENABLE(dcfa, [--enable-dcfa - use DCFA library instead of IB Verbs library for MPICH/DCFA/McKernel/MIC],,enable_dcfa=no)
+    if test "$enable_dcfa" = "yes" ; then
+        AC_MSG_NOTICE([--enable-dcfa detected])
+        PAC_CHECK_HEADER_LIB_FATAL(dcfa, dcfa.h, dcfa, ibv_open_device)
+# see confdb/aclocal_libs.m4
+    else   
+        PAC_CHECK_HEADER_LIB_FATAL(ib, infiniband/verbs.h, ibverbs, ibv_open_device)
+    fi                 
+
+    AC_DEFINE([MPID_NEM_DCFA_VERSION], ["0.9.0"], [Version of netmod/DCFA])
+    AC_DEFINE([MPID_NEM_DCFA_RELEASE_DATE], ["2013-11-18"], [Release date of netmod/DCFA])
+    AC_DEFINE([ENABLE_COMM_OVERRIDES], 1, [define to add per-vc function pointers to override send and recv functions, registered in dcfa_init.c])
+#    AC_DEFINE([ENABLE_RNDV_WAIT_TIMER], 1, [make MPI_Wtime returns wait time. Wait time is elapsed time from MPIDI_CH3_Progress_start to MPIDI_CH3_Progress_end])
+])dnl end AM_COND_IF(BUILD_NEMESIS_NETMOD_DCFA,...)
+])dnl end _BODY
+
+[#] end of __file__

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/ch3/channels/nemesis/netmod/Makefile.mk   |    1 +
 .../ch3/channels/nemesis/netmod/dcfa/Makefile.mk   |   22 +
 .../channels/nemesis/netmod/dcfa/cross_values.txt  |   16 +
 .../channels/nemesis/netmod/dcfa/dcfa_finalize.c   |   47 +
 .../ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.c  | 2216 ++++++++++++++++++++
 .../ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.h  |  413 ++++
 .../ch3/channels/nemesis/netmod/dcfa/dcfa_impl.h   |  642 ++++++
 .../ch3/channels/nemesis/netmod/dcfa/dcfa_init.c   | 1113 ++++++++++
 .../ch3/channels/nemesis/netmod/dcfa/dcfa_lmt.c    |  552 +++++
 .../ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c   | 2085 ++++++++++++++++++
 .../ch3/channels/nemesis/netmod/dcfa/dcfa_reg_mr.c |  321 +++
 .../ch3/channels/nemesis/netmod/dcfa/dcfa_send.c   | 1116 ++++++++++
 .../ch3/channels/nemesis/netmod/dcfa/errnames.txt  |   53 +
 .../channels/nemesis/netmod/dcfa/subconfigure.m4   |   36 +
 14 files changed, 8633 insertions(+), 0 deletions(-)
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/dcfa/Makefile.mk
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/dcfa/cross_values.txt
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_finalize.c
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.c
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_ibcom.h
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_impl.h
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_init.c
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_lmt.c
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_poll.c
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_reg_mr.c
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/dcfa/dcfa_send.c
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/dcfa/errnames.txt
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/dcfa/subconfigure.m4


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list